Guardrails API - new structured_messages param (#17518)

* fix(generic_guardrail_api.py): add 'structured_messages' support allows guardrail provider to know if text is from system or user * fix(generic_guardrail_api.md): document 'structured_messages' parameter give api provider a way to distinguish between user and system messages * feat(anthropic/): return openai chat completion format structured messages when calls made via `/v1/messages` on Anthropic * feat(responses/guardrail_translation): support 'structured_messages' param for guardrails structured openai chat completion spec messages, for guardrail checks when using /v1/responses api allows guardrail checks to work consistently across APIs
2025-12-06 11:33:26 +08:00 · 2025-12-04 22:08:00 -08:00
parent 51cc102c30
commit b3a3081e8e
24 changed files with 82 additions and 13 deletions
--- a/cookbook/mock_guardrail_server/mock_bedrock_guardrail_server.py
+++ b/cookbook/mock_guardrail_server/mock_bedrock_guardrail_server.py
@@ -398,6 +398,7 @@ class LitellmBasicGuardrailRequest(BaseModel):
    input_type: Literal["request", "response"]
    litellm_call_id: Optional[str] = None
    litellm_trace_id: Optional[str] = None
+    structured_messages: Optional[List[Dict[str, Any]]] = None


 class LitellmBasicGuardrailResponse(BaseModel):
--- a/docs/my-website/docs/adding_provider/generic_guardrail_api.md
+++ b/docs/my-website/docs/adding_provider/generic_guardrail_api.md
@@ -69,6 +69,10 @@ Implement `POST /beta/litellm_basic_guardrail_api`
      }
    }
  ],
+  "structured_messages": [  // optional, full messages in OpenAI format (for chat endpoints)
+    {"role": "system", "content": "You are a helpful assistant"},
+    {"role": "user", "content": "Hello"}
+  ],
  "request_data": {
    "user_api_key_hash": "hash of the litellm virtual key used",
    "user_api_key_alias": "alias of the litellm virtual key used",
@@ -147,6 +151,29 @@ The `tools` parameter provides information about available function/tool definit
 - Log tool usage for audit purposes
 - Block sensitive tools based on user context

+### `structured_messages` Parameter
+
+The `structured_messages` parameter provides the full input in OpenAI chat completion spec format, useful for distinguishing between system and user messages.
+
+**Format:** Array of OpenAI chat completion messages (see [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat-create-messages))
+
+**Example:**
+```json
+[
+  {"role": "system", "content": "You are a helpful assistant"},
+  {"role": "user", "content": "Hello"}
+]
+```
+
+**Availability:**
+- **Supported endpoints:** `/v1/chat/completions`, `/v1/messages`, `/v1/responses`
+- **Input only:** Only passed for `input_type="request"` (pre-call guardrails)
+
+**Use cases:**
+- Apply different policies for system vs user messages
+- Enforce role-based content restrictions
+- Log structured conversation context
+
 ## LiteLLM Configuration

 Add to `config.yaml`:
@@ -211,6 +238,7 @@ class GuardrailRequest(BaseModel):
    texts: List[str]
    images: Optional[List[str]] = None
    tools: Optional[List[Dict[str, Any]]] = None  # OpenAI ChatCompletionToolParam format
+    structured_messages: Optional[List[Dict[str, Any]]] = None  # OpenAI messages format (for chat endpoints)
    request_data: Dict[str, Any]
    input_type: str  # "request" or "response"
    litellm_call_id: Optional[str] = None
@@ -247,6 +275,17 @@ async def apply_guardrail(request: GuardrailRequest):
                        blocked_reason=f"Tool '{function_name}' is not allowed"
                    )
    
+    # Example: Check structured messages (if present in request)
+    if request.structured_messages:
+        for message in request.structured_messages:
+            if message.get("role") == "system":
+                # Apply stricter policies to system messages
+                if "admin" in message.get("content", "").lower():
+                    return GuardrailResponse(
+                        action="BLOCKED",
+                        blocked_reason="System message contains restricted terms"
+                    )
+    
    return GuardrailResponse(action="NONE")
 ```

--- a/litellm/llms/anthropic/chat/guardrail_translation/handler.py
+++ b/litellm/llms/anthropic/chat/guardrail_translation/handler.py
@@ -22,6 +22,11 @@ from litellm.llms.anthropic.experimental_pass_through.adapters.transformation im
 )
 from litellm.llms.base_llm.guardrail_translation.base_translation import BaseTranslation
 from litellm.types.guardrails import GenericGuardrailAPIInputs
+from litellm.types.llms.anthropic import (
+    AllAnthropicToolsValues,
+    AnthropicMessagesRequest,
+)
+from litellm.types.llms.openai import ChatCompletionToolParam
 from litellm.types.llms.anthropic import AllAnthropicToolsValues
 from litellm.types.llms.openai import (
    ChatCompletionToolCallChunk,
@@ -65,9 +70,19 @@ class AnthropicMessagesHandler(BaseTranslation):
        if messages is None:
            return data

+        chat_completion_compatible_request = (
+            LiteLLMAnthropicMessagesAdapter().translate_anthropic_to_openai(
+                anthropic_message_request=cast(AnthropicMessagesRequest, data)
+            )
+        )
+
+        structured_messages = chat_completion_compatible_request.get("messages", [])
+
        texts_to_check: List[str] = []
        images_to_check: List[str] = []
-        tools_to_check: List[ChatCompletionToolParam] = []
+        tools_to_check: List[ChatCompletionToolParam] = (
+            chat_completion_compatible_request.get("tools", [])
+        )
        task_mappings: List[Tuple[int, Optional[int]]] = []
        # Track (message_index, content_index) for each text
        # content_index is None for string content, int for list content
@@ -82,12 +97,6 @@ class AnthropicMessagesHandler(BaseTranslation):
                task_mappings=task_mappings,
            )

-        if tools is not None:
-            self._extract_input_tools(
-                tools=tools,
-                tools_to_check=tools_to_check,
-            )
-
        # Step 2: Apply guardrail to all texts in batch
        if texts_to_check:
            inputs = GenericGuardrailAPIInputs(texts=texts_to_check)
@@ -95,6 +104,8 @@ class AnthropicMessagesHandler(BaseTranslation):
                inputs["images"] = images_to_check
            if tools_to_check:
                inputs["tools"] = tools_to_check
+            if structured_messages:
+                inputs["structured_messages"] = structured_messages
            guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
                inputs=inputs,
                request_data=data,
--- a/litellm/llms/openai/chat/guardrail_translation/handler.py
+++ b/litellm/llms/openai/chat/guardrail_translation/handler.py
@@ -80,6 +80,10 @@ class OpenAIChatCompletionsHandler(BaseTranslation):
                inputs["images"] = images_to_check
            if tool_calls_to_check:
                inputs["tool_calls"] = tool_calls_to_check  # type: ignore
+            if messages:
+                inputs["structured_messages"] = (
+                    messages  # pass the openai /chat/completions messages to the guardrail, as-is
+                )

            guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
                inputs=inputs,
--- a/litellm/llms/openai/responses/guardrail_translation/handler.py
+++ b/litellm/llms/openai/responses/guardrail_translation/handler.py
@@ -81,6 +81,13 @@ class OpenAIResponsesHandler(BaseTranslation):
        if input_data is None:
            return data

+        structured_messages = (
+            LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
+                input=input_data,
+                responses_api_request=data,
+            )
+        )
+
        # Handle simple string input
        if isinstance(input_data, str):
            inputs = GenericGuardrailAPIInputs(texts=[input_data])
@@ -91,6 +98,8 @@ class OpenAIResponsesHandler(BaseTranslation):
                self._extract_and_transform_tools(data["tools"], tools_to_check)
                if tools_to_check:
                    inputs["tools"] = tools_to_check
+            if structured_messages:
+                inputs["structured_messages"] = structured_messages  # type: ignore

            guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
                inputs=inputs,
@@ -134,6 +143,8 @@ class OpenAIResponsesHandler(BaseTranslation):
                inputs["images"] = images_to_check
            if tools_to_check:
                inputs["tools"] = tools_to_check
+            if structured_messages:
+                inputs["structured_messages"] = structured_messages  # type: ignore
            guardrailed_inputs = await guardrail_to_apply.apply_guardrail(
                inputs=inputs,
                request_data=data,
--- a/litellm/proxy/_experimental/out/api-reference/index.html
+++ b/litellm/proxy/_experimental/out/api-reference/index.html
--- a/litellm/proxy/_experimental/out/guardrails.html
+++ b/litellm/proxy/_experimental/out/guardrails.html
--- a/litellm/proxy/_experimental/out/login/index.html
+++ b/litellm/proxy/_experimental/out/login/index.html
--- a/litellm/proxy/_experimental/out/logs/index.html
+++ b/litellm/proxy/_experimental/out/logs/index.html
--- a/litellm/proxy/_experimental/out/model-hub/index.html
+++ b/litellm/proxy/_experimental/out/model-hub/index.html
--- a/litellm/proxy/_experimental/out/model_hub_table/index.html
+++ b/litellm/proxy/_experimental/out/model_hub_table/index.html
--- a/litellm/proxy/_experimental/out/models-and-endpoints/index.html
+++ b/litellm/proxy/_experimental/out/models-and-endpoints/index.html
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_experimental/out/organizations/index.html
+++ b/litellm/proxy/_experimental/out/organizations/index.html
--- a/litellm/proxy/_experimental/out/playground/index.html
+++ b/litellm/proxy/_experimental/out/playground/index.html
--- a/litellm/proxy/_experimental/out/teams/index.html
+++ b/litellm/proxy/_experimental/out/teams/index.html
--- a/litellm/proxy/_experimental/out/test-key/index.html
+++ b/litellm/proxy/_experimental/out/test-key/index.html
--- a/litellm/proxy/_experimental/out/usage/index.html
+++ b/litellm/proxy/_experimental/out/usage/index.html
--- a/litellm/proxy/_experimental/out/users/index.html
+++ b/litellm/proxy/_experimental/out/users/index.html
--- a/litellm/proxy/_experimental/out/virtual-keys/index.html
+++ b/litellm/proxy/_experimental/out/virtual-keys/index.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -15,7 +15,7 @@ guardrails:
  - guardrail_name: generic-guardrail
    litellm_params:
      guardrail: generic_guardrail_api
-      mode: ["post_call"]
+      mode: ["pre_call"]
      headers:
        Authorization: Bearer mock-bedrock-token-12345
      api_base: http://localhost:8080
--- a/litellm/proxy/guardrails/guardrail_hooks/generic_guardrail_api/generic_guardrail_api.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/generic_guardrail_api/generic_guardrail_api.py
@@ -175,6 +175,7 @@ class GenericGuardrailAPI(CustomGuardrail):
        texts = inputs.get("texts", [])
        images = inputs.get("images")
        tools = inputs.get("tools")
+        structured_messages = inputs.get("structured_messages")
        tool_calls = inputs.get("tool_calls")

        # Use provided request_data or create an empty dict
@@ -202,6 +203,7 @@ class GenericGuardrailAPI(CustomGuardrail):
            request_data=user_metadata,
            images=images,
            tools=tools,
+            structured_messages=structured_messages,
            tool_calls=tool_calls,
            additional_provider_specific_params=additional_params,
            input_type=input_type,
--- a/litellm/types/guardrails.py
+++ b/litellm/types/guardrails.py
@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
 from pydantic import BaseModel, ConfigDict, Field
 from typing_extensions import Required, TypedDict

+from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolParam
 from litellm.types.llms.openai import (
    AllMessageValues,
    ChatCompletionToolCallChunk,
--- a/litellm/types/proxy/guardrails/guardrail_hooks/generic_guardrail_api.py
+++ b/litellm/types/proxy/guardrails/guardrail_hooks/generic_guardrail_api.py
@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Literal, Optional
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict

+from litellm.types.llms.openai import AllMessageValues, ChatCompletionToolParam
 from litellm.types.llms.openai import (
    ChatCompletionToolCallChunk,
    ChatCompletionToolParam,
@@ -53,11 +54,12 @@ class GenericGuardrailAPIRequest(BaseModel):
    litellm_trace_id: Optional[
        str
    ]  # the trace id of the LLM call - useful if there are multiple LLM calls for the same conversation
-    texts: List[str]
-    request_data: GenericGuardrailAPIMetadata
-    additional_provider_specific_params: Optional[Dict[str, Any]]
+    structured_messages: Optional[List[AllMessageValues]]
    images: Optional[List[str]]
    tools: Optional[List[ChatCompletionToolParam]]
+    texts: Optional[List[str]]
+    request_data: GenericGuardrailAPIMetadata
+    additional_provider_specific_params: Optional[Dict[str, Any]]
    tool_calls: Optional[List[ChatCompletionToolCallChunk]]