Fix: apply_guardrail method and improve test isolation (#17555)

* Fix Bedrock guardrail apply_guardrail method and test mocks Fixed 4 failing tests in the guardrail test suite: 1. BedrockGuardrail.apply_guardrail now returns original texts when guardrail allows content but doesn't provide output/outputs fields. Previously returned empty list, causing test_bedrock_apply_guardrail_success to fail. 2. Updated test mocks to use correct Bedrock API response format: - Changed from 'content' field to 'output' field - Fixed nested structure from {'text': {'text': '...'}} to {'text': '...'} - Added missing 'output' field in filter test 3. Fixed endpoint test mocks to return GenericGuardrailAPIInputs format: - Changed from tuple (List[str], Optional[List[str]]) to dict {'texts': [...]} - Updated method call assertions to use 'inputs' parameter correctly All 12 guardrail tests now pass successfully. * fix: remove python3-dev from Dockerfile.build_from_pip to avoid Python version conflict The base image cgr.dev/chainguard/python:latest-dev already includes Python 3.14 and its development tools. Installing python3-dev pulls Python 3.13 packages which conflict with the existing Python 3.14 installation, causing file ownership errors during apk install. * fix: disable callbacks in vertex fine-tuning tests to prevent Datadog logging interference The test was failing because Datadog logging was making an HTTP POST request that was being caught by the mock, causing assert_called_once() to fail. By disabling callbacks during the test, we prevent Datadog from making any HTTP calls, allowing the mock to only see the Vertex AI API call. * fix: ensure test isolation in test_logging_non_streaming_request Add proper cleanup to restore original litellm.callbacks after test execution. This prevents test interference when running as part of a larger test suite, where global state pollution was causing async_log_success_event to be called multiple times instead of once. Fixes test failure where the test expected async_log_success_event to be called once but was being called twice due to callbacks from previous tests not being cleaned up.
2025-12-06 11:33:26 +08:00 · 2025-12-05 12:59:35 -08:00
parent 4eb9f8036f
commit 655e04f16c
6 changed files with 158 additions and 126 deletions
--- a/docker/build_from_pip/Dockerfile.build_from_pip
+++ b/docker/build_from_pip/Dockerfile.build_from_pip
@@ -7,8 +7,11 @@ ENV HOME=/home/litellm
 ENV PATH="${HOME}/venv/bin:$PATH"

 # Install runtime dependencies
+# Note: The base image has Python 3.14, but python3-dev installs Python 3.13 which conflicts.
+# The -dev variant should include Python headers, but if compilation fails, we may need
+# to install python-3.14-dev specifically (if available in the repo)
 RUN apk update && \
-    apk add --no-cache gcc python3-dev openssl openssl-dev
+    apk add --no-cache gcc openssl openssl-dev

 RUN python -m venv ${HOME}/venv
 RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
--- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -1318,6 +1318,11 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
                            masked_text = str(text_content)
                            masked_texts.append(masked_text)

+            # If no output/outputs were provided, use the original texts
+            # This happens when the guardrail allows content without modification
+            if not masked_texts:
+                masked_texts = texts
+
            verbose_proxy_logger.debug(
                "Bedrock Guardrail: Successfully applied guardrail"
            )
--- a/tests/batches_tests/test_fine_tuning_api.py
+++ b/tests/batches_tests/test_fine_tuning_api.py
@@ -208,6 +208,12 @@ async def test_create_vertex_fine_tune_jobs_mocked():
        }
    )

+    # Save original callbacks to restore later
+    original_callbacks = litellm.callbacks
+    # Disable callbacks to avoid Datadog logging interfering with the mock
+    litellm.callbacks = []
+    
+    try:
        with patch(
            "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
            return_value=mock_response,
@@ -250,6 +256,9 @@ async def test_create_vertex_fine_tune_jobs_mocked():
            assert response_json["trained_tokens"] is None
            assert response_json["estimated_finish"] is None
            assert response_json["integrations"] == []
+    finally:
+        # Restore original callbacks
+        litellm.callbacks = original_callbacks


@pytest.mark.asyncio()
@@ -280,6 +289,12 @@ async def test_create_vertex_fine_tune_jobs_mocked_with_hyperparameters():
        }
    )

+    # Save original callbacks to restore later
+    original_callbacks = litellm.callbacks
+    # Disable callbacks to avoid Datadog logging interfering with the mock
+    litellm.callbacks = []
+    
+    try:
        with patch(
            "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
            return_value=mock_response,
@@ -334,6 +349,9 @@ async def test_create_vertex_fine_tune_jobs_mocked_with_hyperparameters():
            assert response_json["trained_tokens"] is None
            assert response_json["estimated_finish"] is None
            assert response_json["integrations"] == []
+    finally:
+        # Restore original callbacks
+        litellm.callbacks = original_callbacks


 # Testing OpenAI -> Vertex AI param mapping
--- a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py
+++ b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py
@@ -28,9 +28,9 @@ async def test_apply_guardrail_endpoint_returns_correct_response():
    ) as mock_registry:
        # Create a mock guardrail
        mock_guardrail = Mock(spec=CustomGuardrail)
-        # Apply guardrail now returns a tuple (List[str], Optional[List[str]])
+        # Apply guardrail returns GenericGuardrailAPIInputs (dict with texts key)
        mock_guardrail.apply_guardrail = AsyncMock(
-            return_value=(["Redacted text: [REDACTED] and [REDACTED]"], None)
+            return_value={"texts": ["Redacted text: [REDACTED] and [REDACTED]"]}
        )

        # Configure the registry to return our mock guardrail
@@ -56,12 +56,11 @@ async def test_apply_guardrail_endpoint_returns_correct_response():
        assert isinstance(response, ApplyGuardrailResponse)
        assert response.response_text == "Redacted text: [REDACTED] and [REDACTED]"

-        # Verify the guardrail was called with correct parameters (new signature)
+        # Verify the guardrail was called with correct parameters
        mock_guardrail.apply_guardrail.assert_called_once_with(
-            texts=["Test text with PII"],
+            inputs={"texts": ["Test text with PII"]},
            request_data={},
            input_type="request",
-            images=None,
        )


@@ -104,9 +103,9 @@ async def test_apply_guardrail_endpoint_with_presidio_guardrail():
    ) as mock_registry:
        # Create a mock guardrail that simulates Presidio behavior
        mock_guardrail = Mock(spec=CustomGuardrail)
-        # Simulate masking PII entities - returns tuple (List[str], Optional[List[str]])
+        # Simulate masking PII entities - returns GenericGuardrailAPIInputs (dict with texts key)
        mock_guardrail.apply_guardrail = AsyncMock(
-            return_value=(["My name is [PERSON] and my email is [EMAIL_ADDRESS]"], None)
+            return_value={"texts": ["My name is [PERSON] and my email is [EMAIL_ADDRESS]"]}
        )

        # Configure the registry to return our mock guardrail
@@ -149,9 +148,9 @@ async def test_apply_guardrail_endpoint_without_optional_params():
    ) as mock_registry:
        # Create a mock guardrail
        mock_guardrail = Mock(spec=CustomGuardrail)
-        # Returns tuple (List[str], Optional[List[str]])
+        # Returns GenericGuardrailAPIInputs (dict with texts key)
        mock_guardrail.apply_guardrail = AsyncMock(
-            return_value=(["Processed text"], None)
+            return_value={"texts": ["Processed text"]}
        )

        # Configure the registry to return our mock guardrail
@@ -174,7 +173,7 @@ async def test_apply_guardrail_endpoint_without_optional_params():
        assert isinstance(response, ApplyGuardrailResponse)
        assert response.response_text == "Processed text"

-        # Verify the guardrail was called with new signature
+        # Verify the guardrail was called with correct parameters
        mock_guardrail.apply_guardrail.assert_called_once_with(
-            texts=["Test text"], request_data={}, input_type="request", images=None
+            inputs={"texts": ["Test text"]}, request_data={}, input_type="request"
        )
--- a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py
+++ b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py
@@ -34,7 +34,7 @@ async def test_bedrock_apply_guardrail_success():
        # Mock a successful response from Bedrock
        mock_response = {
            "action": "ALLOWED",
-            "content": [{"text": {"text": "This is a test message with some content"}}],
+            "output": [{"text": "This is a test message with some content"}],
        }
        mock_api_request.return_value = mock_response

@@ -219,7 +219,7 @@ async def test_bedrock_apply_guardrail_filters_request_messages_when_flag_enable
    with patch.object(
        guardrail, "make_bedrock_api_request", new_callable=AsyncMock
    ) as mock_api:
-        mock_api.return_value = {"action": "ALLOWED"}
+        mock_api.return_value = {"action": "ALLOWED", "output": [{"text": "latest question"}]}

        guardrailed_inputs = await guardrail.apply_guardrail(
            inputs={"texts": ["latest question"]},
--- a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
+++ b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
@@ -196,6 +196,10 @@ async def test_logging_non_streaming_request():

    import litellm

+    # Save original callbacks to restore after test
+    original_callbacks = getattr(litellm, "callbacks", [])
+
+    try:
        mock_logging_obj = MockPrometheusLogger()

        litellm.callbacks = [mock_logging_obj]
@@ -221,6 +225,9 @@ async def test_logging_non_streaming_request():
                "kwargs"
            ]["standard_logging_object"]
            assert standard_logging_object["stream"] is not True
+    finally:
+        # Restore original callbacks to ensure test isolation
+        litellm.callbacks = original_callbacks


 def test_get_user_agent_tags():