Guardrails API - new structured_messages param (#17518)

* fix(generic_guardrail_api.py): add 'structured_messages' support

allows guardrail provider to know if text is from system or user

* fix(generic_guardrail_api.md): document 'structured_messages' parameter

give api provider a way to distinguish between user and system messages

* feat(anthropic/): return openai chat completion format structured messages when calls made via `/v1/messages` on Anthropic

* feat(responses/guardrail_translation): support 'structured_messages' param for guardrails

structured openai chat completion spec messages, for guardrail checks when using /v1/responses api

allows guardrail checks to work consistently across APIs
This commit is contained in:
Krish Dholakia
2025-12-04 22:08:00 -08:00
committed by GitHub
parent 51cc102c30
commit b3a3081e8e
24 changed files with 82 additions and 13 deletions

View File

@@ -398,6 +398,7 @@ class LitellmBasicGuardrailRequest(BaseModel):
input_type: Literal["request", "response"]
litellm_call_id: Optional[str] = None
litellm_trace_id: Optional[str] = None
structured_messages: Optional[List[Dict[str, Any]]] = None
class LitellmBasicGuardrailResponse(BaseModel):

View File

@@ -69,6 +69,10 @@ Implement `POST /beta/litellm_basic_guardrail_api`
}
}
],
"structured_messages": [ // optional, full messages in OpenAI format (for chat endpoints)
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "Hello"}
],
"request_data": {
"user_api_key_hash": "hash of the litellm virtual key used",
"user_api_key_alias": "alias of the litellm virtual key used",
@@ -147,6 +151,29 @@ The `tools` parameter provides information about available function/tool definit
- Log tool usage for audit purposes
- Block sensitive tools based on user context
### `structured_messages` Parameter
The `structured_messages` parameter provides the full input in OpenAI chat completion spec format, useful for distinguishing between system and user messages.
**Format:** Array of OpenAI chat completion messages (see [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat-create-messages))
**Example:**
```json
[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "Hello"}
]
```
**Availability:**
- **Supported endpoints:** `/v1/chat/completions`, `/v1/messages`, `/v1/responses`
- **Input only:** Only passed for `input_type="request"` (pre-call guardrails)
**Use cases:**
- Apply different policies for system vs user messages
- Enforce role-based content restrictions
- Log structured conversation context
## LiteLLM Configuration
Add to `config.yaml`:
@@ -211,6 +238,7 @@ class GuardrailRequest(BaseModel):
texts: List[str]
images: Optional[List[str]] = None
tools: Optional[List[Dict[str, Any]]] = None # OpenAI ChatCompletionToolParam format
structured_messages: Optional[List[Dict[str, Any]]] = None # OpenAI messages format (for chat endpoints)
request_data: Dict[str, Any]
input_type: str # "request" or "response"
litellm_call_id: Optional[str] = None
@@ -247,6 +275,17 @@ async def apply_guardrail(request: GuardrailRequest):
blocked_reason=f"Tool '{function_name}' is not allowed"
)
# Example: Check structured messages (if present in request)
if request.structured_messages:
for message in request.structured_messages:
if message.get("role") == "system":
# Apply stricter policies to system messages
if "admin" in message.get("content", "").lower():
return GuardrailResponse(
action="BLOCKED",
blocked_reason="System message contains restricted terms"
)
return GuardrailResponse(action="NONE")
```

View File

@@ -22,6 +22,11 @@ from litellm.llms.anthropic.experimental_pass_through.adapters.transformation im
)
from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
from litellm.types.guardrails import GenericGuardrailAPIInputs
from litellm.types.llms.anthropic import (
AllAnthropicToolsValues,
AnthropicMessagesRequest,
)
from litellm.types.llms.openai import ChatCompletionToolParam
from litellm.types.llms.anthropic import AllAnthropicToolsValues
from litellm.types.llms.openai import (
ChatCompletionToolCallChunk,
@@ -65,9 +70,19 @@ class AnthropicMessagesHandler(BaseTranslation):
if messages is None:
return data
chat_completion_compatible_request = (
LiteLLMAnthropicMessagesAdapter().translate_anthropic_to_openai(
anthropic_message_request=cast(AnthropicMessagesRequest, data)
)
)
structured_messages = chat_completion_compatible_request.get("messages", [])
texts_to_check: List[str] = []
images_to_check: List[str] = []
tools_to_check: List[ChatCompletionToolParam] = []
tools_to_check: List[ChatCompletionToolParam] = (
chat_completion_compatible_request.get("tools", [])
)
task_mappings: List[Tuple[int, Optional[int]]] = []
# Track (message_index, content_index) for each text
# content_index is None for string content, int for list content
@@ -82,12 +97,6 @@ class AnthropicMessagesHandler(BaseTranslation):
task_mappings=task_mappings,
)
if tools is not None:
self._extract_input_tools(
tools=tools,
tools_to_check=tools_to_check,
)
# Step 2: Apply guardrail to all texts in batch
if texts_to_check:
inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
@@ -95,6 +104,8 @@ class AnthropicMessagesHandler(BaseTranslation):
inputs["images"] = images_to_check
if tools_to_check:
inputs["tools"] = tools_to_check
if structured_messages:
inputs["structured_messages"] = structured_messages
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=data,

View File

@@ -80,6 +80,10 @@ class OpenAIChatCompletionsHandler(BaseTranslation):
inputs["images"] = images_to_check
if tool_calls_to_check:
inputs["tool_calls"] = tool_calls_to_check # type: ignore
if messages:
inputs["structured_messages"] = (
messages # pass the openai /chat/completions messages to the guardrail, as-is
)
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,

View File

@@ -81,6 +81,13 @@ class OpenAIResponsesHandler(BaseTranslation):
if input_data is None:
return data
structured_messages = (
LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
input=input_data,
responses_api_request=data,
)
)
# Handle simple string input
if isinstance(input_data, str):
inputs = GenericGuardrailAPIInputs(texts=[input_data])
@@ -91,6 +98,8 @@ class OpenAIResponsesHandler(BaseTranslation):
self._extract_and_transform_tools(data["tools"], tools_to_check)
if tools_to_check:
inputs["tools"] = tools_to_check
if structured_messages:
inputs["structured_messages"] = structured_messages # type: ignore
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
@@ -134,6 +143,8 @@ class OpenAIResponsesHandler(BaseTranslation):
inputs["images"] = images_to_check
if tools_to_check:
inputs["tools"] = tools_to_check
if structured_messages:
inputs["structured_messages"] = structured_messages # type: ignore
guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
inputs=inputs,
request_data=data,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -15,7 +15,7 @@ guardrails:
- guardrail_name: generic-guardrail
litellm_params:
guardrail: generic_guardrail_api
mode: ["post_call"]
mode: ["pre_call"]
headers:
Authorization: Bearer mock-bedrock-token-12345
api_base: http://localhost:8080

View File

@@ -175,6 +175,7 @@ class GenericGuardrailAPI(CustomGuardrail):
texts = inputs.get("texts", [])
images = inputs.get("images")
tools = inputs.get("tools")
structured_messages = inputs.get("structured_messages")
tool_calls = inputs.get("tool_calls")
# Use provided request_data or create an empty dict
@@ -202,6 +203,7 @@ class GenericGuardrailAPI(CustomGuardrail):
request_data=user_metadata,
images=images,
tools=tools,
structured_messages=structured_messages,
tool_calls=tool_calls,
additional_provider_specific_params=additional_params,
input_type=input_type,

View File

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import Required, TypedDict
from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolParam
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionToolCallChunk,

View File

@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field
from typing_extensions import TypedDict
from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolParam
from litellm.types.llms.openai import (
ChatCompletionToolCallChunk,
ChatCompletionToolParam,
@@ -53,11 +54,12 @@ class GenericGuardrailAPIRequest(BaseModel):
litellm_trace_id: Optional[
str
] # the trace id of the LLM call - useful if there are multiple LLM calls for the same conversation
texts: List[str]
request_data: GenericGuardrailAPIMetadata
additional_provider_specific_params: Optional[Dict[str, Any]]
structured_messages: Optional[List[AllMessageValues]]
images: Optional[List[str]]
tools: Optional[List[ChatCompletionToolParam]]
texts: Optional[List[str]]
request_data: GenericGuardrailAPIMetadata
additional_provider_specific_params: Optional[Dict[str, Any]]
tool_calls: Optional[List[ChatCompletionToolCallChunk]]