mirror of
https://github.com/BerriAI/litellm.git
synced 2025-12-06 11:33:26 +08:00
* allow using a cred with RAG API * add /rag/ingest to llm api routes * add rag endpoints under llm api routes * raise clear exception when RAG API fails * use async methods for bedrock ingest * fix ingestion * fix _create_opensearch_collection * fix qa check and linting
191 lines
6.9 KiB
Python
191 lines
6.9 KiB
Python
"""
|
|
Type definitions for RAG (Retrieval Augmented Generation) Ingest API.
|
|
"""
|
|
|
|
from typing import Any, Dict, List, Literal, Optional, Union
|
|
|
|
from pydantic import BaseModel
|
|
from typing_extensions import TypedDict
|
|
|
|
|
|
class RAGChunkingStrategy(TypedDict, total=False):
|
|
"""
|
|
Chunking strategy config for RAG ingest using RecursiveCharacterTextSplitter.
|
|
|
|
See: https://docs.langchain.com/oss/python/langchain/rag
|
|
"""
|
|
|
|
chunk_size: int # Maximum size of chunks (default: 1000)
|
|
chunk_overlap: int # Overlap between chunks (default: 200)
|
|
separators: Optional[List[str]] # Custom separators for splitting
|
|
|
|
|
|
class RAGIngestOCROptions(TypedDict, total=False):
|
|
"""OCR configuration for RAG ingest pipeline."""
|
|
|
|
model: str # e.g., "mistral/mistral-ocr-latest"
|
|
|
|
|
|
class RAGIngestEmbeddingOptions(TypedDict, total=False):
|
|
"""Embedding configuration for RAG ingest pipeline."""
|
|
|
|
model: str # e.g., "text-embedding-3-small"
|
|
|
|
|
|
class OpenAIVectorStoreOptions(TypedDict, total=False):
|
|
"""
|
|
OpenAI vector store configuration.
|
|
|
|
Example (auto-create):
|
|
{"custom_llm_provider": "openai"}
|
|
|
|
Example (use existing):
|
|
{"custom_llm_provider": "openai", "vector_store_id": "vs_xxx"}
|
|
|
|
Example (with credentials):
|
|
{"custom_llm_provider": "openai", "litellm_credential_name": "my-openai-creds"}
|
|
"""
|
|
|
|
custom_llm_provider: Literal["openai"]
|
|
vector_store_id: Optional[str] # Existing VS ID (auto-creates if not provided)
|
|
ttl_days: Optional[int] # Time-to-live in days for indexed content
|
|
|
|
# Credentials (loaded from litellm.credential_list if litellm_credential_name is provided)
|
|
litellm_credential_name: Optional[str] # Credential name to load from litellm.credential_list
|
|
api_key: Optional[str] # Direct API key (alternative to litellm_credential_name)
|
|
api_base: Optional[str] # Direct API base (alternative to litellm_credential_name)
|
|
|
|
|
|
class BedrockVectorStoreOptions(TypedDict, total=False):
|
|
"""
|
|
Bedrock Knowledge Base configuration.
|
|
|
|
Example (auto-create KB and all resources):
|
|
{"custom_llm_provider": "bedrock"}
|
|
|
|
Example (use existing KB):
|
|
{"custom_llm_provider": "bedrock", "vector_store_id": "KB_ID"}
|
|
|
|
Example (with credentials):
|
|
{"custom_llm_provider": "bedrock", "litellm_credential_name": "my-aws-creds"}
|
|
|
|
Auto-creation creates: S3 bucket, OpenSearch Serverless collection,
|
|
IAM role, Knowledge Base, and Data Source.
|
|
"""
|
|
|
|
custom_llm_provider: Literal["bedrock"]
|
|
vector_store_id: Optional[str] # Existing KB ID (auto-creates if not provided)
|
|
|
|
# Bedrock-specific options
|
|
s3_bucket: Optional[str] # S3 bucket (auto-created if not provided)
|
|
s3_prefix: Optional[str] # S3 key prefix (default: "data/")
|
|
embedding_model: Optional[str] # Embedding model (default: amazon.titan-embed-text-v2:0)
|
|
data_source_id: Optional[str] # For existing KB: override auto-detected DS
|
|
wait_for_ingestion: Optional[bool] # Wait for completion (default: False - returns immediately)
|
|
ingestion_timeout: Optional[int] # Timeout in seconds if wait_for_ingestion=True (default: 300)
|
|
|
|
# Credentials (loaded from litellm.credential_list if litellm_credential_name is provided)
|
|
litellm_credential_name: Optional[str] # Credential name to load from litellm.credential_list
|
|
|
|
# AWS auth (uses BaseAWSLLM)
|
|
aws_access_key_id: Optional[str]
|
|
aws_secret_access_key: Optional[str]
|
|
aws_session_token: Optional[str]
|
|
aws_region_name: Optional[str] # default: us-west-2
|
|
aws_role_name: Optional[str]
|
|
aws_session_name: Optional[str]
|
|
aws_profile_name: Optional[str]
|
|
aws_web_identity_token: Optional[str]
|
|
aws_sts_endpoint: Optional[str]
|
|
aws_external_id: Optional[str]
|
|
|
|
|
|
class VertexAIVectorStoreOptions(TypedDict, total=False):
|
|
"""
|
|
Vertex AI RAG Engine configuration.
|
|
|
|
Example (use existing corpus):
|
|
{"custom_llm_provider": "vertex_ai", "vector_store_id": "CORPUS_ID", "gcs_bucket": "my-bucket"}
|
|
|
|
Requires:
|
|
- gcloud auth application-default login (for ADC authentication)
|
|
- Files are uploaded to GCS via litellm.files.create_file, then imported into RAG corpus
|
|
- GCS bucket must be provided via gcs_bucket or GCS_BUCKET_NAME env var
|
|
"""
|
|
|
|
custom_llm_provider: Literal["vertex_ai"]
|
|
vector_store_id: str # RAG corpus ID (required for Vertex AI)
|
|
|
|
# GCP config
|
|
vertex_project: Optional[str] # GCP project ID (uses env VERTEXAI_PROJECT if not set)
|
|
vertex_location: Optional[str] # GCP region (default: us-central1)
|
|
vertex_credentials: Optional[str] # Path to credentials JSON (uses ADC if not set)
|
|
gcs_bucket: Optional[str] # GCS bucket for file uploads (uses env GCS_BUCKET_NAME if not set)
|
|
|
|
# Import settings
|
|
wait_for_import: Optional[bool] # Wait for import to complete (default: True)
|
|
import_timeout: Optional[int] # Timeout in seconds (default: 600)
|
|
|
|
|
|
# Union type for vector store options
|
|
RAGIngestVectorStoreOptions = Union[
|
|
OpenAIVectorStoreOptions, BedrockVectorStoreOptions, VertexAIVectorStoreOptions
|
|
]
|
|
|
|
|
|
class RAGIngestOptions(TypedDict, total=False):
|
|
"""
|
|
Combined options for RAG ingest pipeline.
|
|
|
|
Unified interface - just specify custom_llm_provider:
|
|
|
|
Example (OpenAI):
|
|
from litellm.types.rag import RAGIngestOptions, OpenAIVectorStoreOptions
|
|
|
|
options: RAGIngestOptions = {
|
|
"vector_store": OpenAIVectorStoreOptions(
|
|
custom_llm_provider="openai",
|
|
vector_store_id="vs_xxx", # optional
|
|
)
|
|
}
|
|
|
|
Example (Bedrock):
|
|
from litellm.types.rag import RAGIngestOptions, BedrockVectorStoreOptions
|
|
|
|
options: RAGIngestOptions = {
|
|
"vector_store": BedrockVectorStoreOptions(
|
|
custom_llm_provider="bedrock",
|
|
vector_store_id="KB_ID", # optional - auto-creates if not provided
|
|
wait_for_ingestion=True,
|
|
)
|
|
}
|
|
"""
|
|
|
|
name: Optional[str] # Optional pipeline name for logging
|
|
ocr: Optional[RAGIngestOCROptions] # Optional OCR step
|
|
chunking_strategy: Optional[RAGChunkingStrategy] # RecursiveCharacterTextSplitter args
|
|
embedding: Optional[RAGIngestEmbeddingOptions] # Embedding model config
|
|
vector_store: RAGIngestVectorStoreOptions # OpenAI or Bedrock config
|
|
|
|
class RAGIngestResponse(TypedDict, total=False):
|
|
"""Response from RAG ingest API."""
|
|
|
|
id: str # Unique ingest job ID
|
|
status: Literal["completed", "in_progress", "failed"]
|
|
vector_store_id: str # The vector store ID (created or existing)
|
|
file_id: Optional[str] # The file ID in the vector store
|
|
error: Optional[str] # Error message if status is "failed"
|
|
|
|
|
|
|
|
class RAGIngestRequest(BaseModel):
|
|
"""Request body for RAG ingest API (for validation)."""
|
|
|
|
file_url: Optional[str] = None # URL to fetch file from
|
|
file_id: Optional[str] = None # Existing file ID
|
|
ingest_options: Dict[str, Any] # RAGIngestOptions as dict for flexibility
|
|
|
|
class Config:
|
|
extra = "allow" # Allow additional fields
|
|
|