Files
litellm/litellm/types/rag.py
Ishaan Jaff 831694897e [Feat] RAG API - QA - allow internal user keys to access api, allow using litellm credentials with API, raise clear exception when RAG API fails (#17169)
* allow using a cred with RAG API

* add /rag/ingest to llm api routes

* add rag endpoints under llm api routes

* raise clear exception when RAG API fails

* use async methods for bedrock ingest

* fix ingestion

* fix _create_opensearch_collection

* fix qa check and linting
2025-11-26 17:07:30 -08:00

191 lines
6.9 KiB
Python

"""
Type definitions for RAG (Retrieval Augmented Generation) Ingest API.
"""
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel
from typing_extensions import TypedDict
class RAGChunkingStrategy(TypedDict, total=False):
"""
Chunking strategy config for RAG ingest using RecursiveCharacterTextSplitter.
See: https://docs.langchain.com/oss/python/langchain/rag
"""
chunk_size: int # Maximum size of chunks (default: 1000)
chunk_overlap: int # Overlap between chunks (default: 200)
separators: Optional[List[str]] # Custom separators for splitting
class RAGIngestOCROptions(TypedDict, total=False):
"""OCR configuration for RAG ingest pipeline."""
model: str # e.g., "mistral/mistral-ocr-latest"
class RAGIngestEmbeddingOptions(TypedDict, total=False):
"""Embedding configuration for RAG ingest pipeline."""
model: str # e.g., "text-embedding-3-small"
class OpenAIVectorStoreOptions(TypedDict, total=False):
"""
OpenAI vector store configuration.
Example (auto-create):
{"custom_llm_provider": "openai"}
Example (use existing):
{"custom_llm_provider": "openai", "vector_store_id": "vs_xxx"}
Example (with credentials):
{"custom_llm_provider": "openai", "litellm_credential_name": "my-openai-creds"}
"""
custom_llm_provider: Literal["openai"]
vector_store_id: Optional[str] # Existing VS ID (auto-creates if not provided)
ttl_days: Optional[int] # Time-to-live in days for indexed content
# Credentials (loaded from litellm.credential_list if litellm_credential_name is provided)
litellm_credential_name: Optional[str] # Credential name to load from litellm.credential_list
api_key: Optional[str] # Direct API key (alternative to litellm_credential_name)
api_base: Optional[str] # Direct API base (alternative to litellm_credential_name)
class BedrockVectorStoreOptions(TypedDict, total=False):
"""
Bedrock Knowledge Base configuration.
Example (auto-create KB and all resources):
{"custom_llm_provider": "bedrock"}
Example (use existing KB):
{"custom_llm_provider": "bedrock", "vector_store_id": "KB_ID"}
Example (with credentials):
{"custom_llm_provider": "bedrock", "litellm_credential_name": "my-aws-creds"}
Auto-creation creates: S3 bucket, OpenSearch Serverless collection,
IAM role, Knowledge Base, and Data Source.
"""
custom_llm_provider: Literal["bedrock"]
vector_store_id: Optional[str] # Existing KB ID (auto-creates if not provided)
# Bedrock-specific options
s3_bucket: Optional[str] # S3 bucket (auto-created if not provided)
s3_prefix: Optional[str] # S3 key prefix (default: "data/")
embedding_model: Optional[str] # Embedding model (default: amazon.titan-embed-text-v2:0)
data_source_id: Optional[str] # For existing KB: override auto-detected DS
wait_for_ingestion: Optional[bool] # Wait for completion (default: False - returns immediately)
ingestion_timeout: Optional[int] # Timeout in seconds if wait_for_ingestion=True (default: 300)
# Credentials (loaded from litellm.credential_list if litellm_credential_name is provided)
litellm_credential_name: Optional[str] # Credential name to load from litellm.credential_list
# AWS auth (uses BaseAWSLLM)
aws_access_key_id: Optional[str]
aws_secret_access_key: Optional[str]
aws_session_token: Optional[str]
aws_region_name: Optional[str] # default: us-west-2
aws_role_name: Optional[str]
aws_session_name: Optional[str]
aws_profile_name: Optional[str]
aws_web_identity_token: Optional[str]
aws_sts_endpoint: Optional[str]
aws_external_id: Optional[str]
class VertexAIVectorStoreOptions(TypedDict, total=False):
"""
Vertex AI RAG Engine configuration.
Example (use existing corpus):
{"custom_llm_provider": "vertex_ai", "vector_store_id": "CORPUS_ID", "gcs_bucket": "my-bucket"}
Requires:
- gcloud auth application-default login (for ADC authentication)
- Files are uploaded to GCS via litellm.files.create_file, then imported into RAG corpus
- GCS bucket must be provided via gcs_bucket or GCS_BUCKET_NAME env var
"""
custom_llm_provider: Literal["vertex_ai"]
vector_store_id: str # RAG corpus ID (required for Vertex AI)
# GCP config
vertex_project: Optional[str] # GCP project ID (uses env VERTEXAI_PROJECT if not set)
vertex_location: Optional[str] # GCP region (default: us-central1)
vertex_credentials: Optional[str] # Path to credentials JSON (uses ADC if not set)
gcs_bucket: Optional[str] # GCS bucket for file uploads (uses env GCS_BUCKET_NAME if not set)
# Import settings
wait_for_import: Optional[bool] # Wait for import to complete (default: True)
import_timeout: Optional[int] # Timeout in seconds (default: 600)
# Union type for vector store options
RAGIngestVectorStoreOptions = Union[
OpenAIVectorStoreOptions, BedrockVectorStoreOptions, VertexAIVectorStoreOptions
]
class RAGIngestOptions(TypedDict, total=False):
"""
Combined options for RAG ingest pipeline.
Unified interface - just specify custom_llm_provider:
Example (OpenAI):
from litellm.types.rag import RAGIngestOptions, OpenAIVectorStoreOptions
options: RAGIngestOptions = {
"vector_store": OpenAIVectorStoreOptions(
custom_llm_provider="openai",
vector_store_id="vs_xxx", # optional
)
}
Example (Bedrock):
from litellm.types.rag import RAGIngestOptions, BedrockVectorStoreOptions
options: RAGIngestOptions = {
"vector_store": BedrockVectorStoreOptions(
custom_llm_provider="bedrock",
vector_store_id="KB_ID", # optional - auto-creates if not provided
wait_for_ingestion=True,
)
}
"""
name: Optional[str] # Optional pipeline name for logging
ocr: Optional[RAGIngestOCROptions] # Optional OCR step
chunking_strategy: Optional[RAGChunkingStrategy] # RecursiveCharacterTextSplitter args
embedding: Optional[RAGIngestEmbeddingOptions] # Embedding model config
vector_store: RAGIngestVectorStoreOptions # OpenAI or Bedrock config
class RAGIngestResponse(TypedDict, total=False):
"""Response from RAG ingest API."""
id: str # Unique ingest job ID
status: Literal["completed", "in_progress", "failed"]
vector_store_id: str # The vector store ID (created or existing)
file_id: Optional[str] # The file ID in the vector store
error: Optional[str] # Error message if status is "failed"
class RAGIngestRequest(BaseModel):
"""Request body for RAG ingest API (for validation)."""
file_url: Optional[str] = None # URL to fetch file from
file_id: Optional[str] = None # Existing file ID
ingest_options: Dict[str, Any] # RAGIngestOptions as dict for flexibility
class Config:
extra = "allow" # Allow additional fields