From 655e04f16cd7255850d30275e631e2275dbd47b9 Mon Sep 17 00:00:00 2001
From: Alexsander Hamir <alexsanderhamirgomesbaptista@gmail.com>
Date: Fri, 5 Dec 2025 12:59:35 -0800
Subject: [PATCH] Fix: apply_guardrail method and improve test isolation
 (#17555)

* Fix Bedrock guardrail apply_guardrail method and test mocks

Fixed 4 failing tests in the guardrail test suite:

1. BedrockGuardrail.apply_guardrail now returns original texts when guardrail
   allows content but doesn't provide output/outputs fields. Previously returned
   empty list, causing test_bedrock_apply_guardrail_success to fail.

2. Updated test mocks to use correct Bedrock API response format:
   - Changed from 'content' field to 'output' field
   - Fixed nested structure from {'text': {'text': '...'}} to {'text': '...'}
   - Added missing 'output' field in filter test

3. Fixed endpoint test mocks to return GenericGuardrailAPIInputs format:
   - Changed from tuple (List[str], Optional[List[str]]) to dict {'texts': [...]}
   - Updated method call assertions to use 'inputs' parameter correctly

All 12 guardrail tests now pass successfully.

* fix: remove python3-dev from Dockerfile.build_from_pip to avoid Python version conflict

The base image cgr.dev/chainguard/python:latest-dev already includes Python 3.14
and its development tools. Installing python3-dev pulls Python 3.13 packages
which conflict with the existing Python 3.14 installation, causing file
ownership errors during apk install.

* fix: disable callbacks in vertex fine-tuning tests to prevent Datadog logging interference

The test was failing because Datadog logging was making an HTTP POST request
that was being caught by the mock, causing assert_called_once() to fail.
By disabling callbacks during the test, we prevent Datadog from making any
HTTP calls, allowing the mock to only see the Vertex AI API call.

* fix: ensure test isolation in test_logging_non_streaming_request

Add proper cleanup to restore original litellm.callbacks after test execution.
This prevents test interference when running as part of a larger test suite,
where global state pollution was causing async_log_success_event to be
called multiple times instead of once.

Fixes test failure where the test expected async_log_success_event to be
called once but was being called twice due to callbacks from previous tests
not being cleaned up.
---
 .../build_from_pip/Dockerfile.build_from_pip  |   5 +-
 .../guardrail_hooks/bedrock_guardrails.py     |   5 +
 tests/batches_tests/test_fine_tuning_api.py   | 196 ++++++++++--------
 .../test_apply_guardrail_endpoint.py          |  21 +-
 .../test_bedrock_apply_guardrail.py           |   4 +-
 .../test_litellm_logging.py                   |  53 +++--
 6 files changed, 158 insertions(+), 126 deletions(-)

diff --git a/docker/build_from_pip/Dockerfile.build_from_pip b/docker/build_from_pip/Dockerfile.build_from_pip
index aeb19bce21..dda6e50cbb 100644
--- a/docker/build_from_pip/Dockerfile.build_from_pip
+++ b/docker/build_from_pip/Dockerfile.build_from_pip
@@ -7,8 +7,11 @@ ENV HOME=/home/litellm
 ENV PATH="${HOME}/venv/bin:$PATH"
 
 # Install runtime dependencies
+# Note: The base image has Python 3.14, but python3-dev installs Python 3.13 which conflicts.
+# The -dev variant should include Python headers, but if compilation fails, we may need
+# to install python-3.14-dev specifically (if available in the repo)
 RUN apk update && \
-    apk add --no-cache gcc python3-dev openssl openssl-dev
+    apk add --no-cache gcc openssl openssl-dev
 
 RUN python -m venv ${HOME}/venv
 RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip
diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
index e0fb319240..9d0211e2a0 100644
--- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -1318,6 +1318,11 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
                             masked_text = str(text_content)
                             masked_texts.append(masked_text)
 
+            # If no output/outputs were provided, use the original texts
+            # This happens when the guardrail allows content without modification
+            if not masked_texts:
+                masked_texts = texts
+
             verbose_proxy_logger.debug(
                 "Bedrock Guardrail: Successfully applied guardrail"
             )
diff --git a/tests/batches_tests/test_fine_tuning_api.py b/tests/batches_tests/test_fine_tuning_api.py
index 3561d99d0f..de952cfe29 100644
--- a/tests/batches_tests/test_fine_tuning_api.py
+++ b/tests/batches_tests/test_fine_tuning_api.py
@@ -208,48 +208,57 @@ async def test_create_vertex_fine_tune_jobs_mocked():
         }
     )
 
-    with patch(
-        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
-        return_value=mock_response,
-    ) as mock_post:
-        create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
-            model=base_model,
-            custom_llm_provider="vertex_ai",
-            training_file=training_file,
-            vertex_project=project_id,
-            vertex_location=location,
-        )
+    # Save original callbacks to restore later
+    original_callbacks = litellm.callbacks
+    # Disable callbacks to avoid Datadog logging interfering with the mock
+    litellm.callbacks = []
+    
+    try:
+        with patch(
+            "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+            return_value=mock_response,
+        ) as mock_post:
+            create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
+                model=base_model,
+                custom_llm_provider="vertex_ai",
+                training_file=training_file,
+                vertex_project=project_id,
+                vertex_location=location,
+            )
 
-        # Verify the request
-        mock_post.assert_called_once()
+            # Verify the request
+            mock_post.assert_called_once()
 
-        # Validate the request
-        assert mock_post.call_args.kwargs["json"] == {
-            "baseModel": base_model,
-            "supervisedTuningSpec": {"training_dataset_uri": training_file},
-            "tunedModelDisplayName": None,
-        }
+            # Validate the request
+            assert mock_post.call_args.kwargs["json"] == {
+                "baseModel": base_model,
+                "supervisedTuningSpec": {"training_dataset_uri": training_file},
+                "tunedModelDisplayName": None,
+            }
 
-        # Verify the response
-        response_json = json.loads(create_fine_tuning_response.model_dump_json())
-        assert (
-            response_json["id"]
-            == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}"
-        )
-        assert response_json["model"] == base_model
-        assert response_json["object"] == "fine_tuning.job"
-        assert response_json["fine_tuned_model"] == tuned_model_name
-        assert response_json["status"] == "queued"
-        assert response_json["training_file"] == training_file
-        assert (
-            response_json["created_at"] == 1735684820
-        )  # Unix timestamp for create_time
-        assert response_json["error"] is None
-        assert response_json["finished_at"] is None
-        assert response_json["validation_file"] is None
-        assert response_json["trained_tokens"] is None
-        assert response_json["estimated_finish"] is None
-        assert response_json["integrations"] == []
+            # Verify the response
+            response_json = json.loads(create_fine_tuning_response.model_dump_json())
+            assert (
+                response_json["id"]
+                == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}"
+            )
+            assert response_json["model"] == base_model
+            assert response_json["object"] == "fine_tuning.job"
+            assert response_json["fine_tuned_model"] == tuned_model_name
+            assert response_json["status"] == "queued"
+            assert response_json["training_file"] == training_file
+            assert (
+                response_json["created_at"] == 1735684820
+            )  # Unix timestamp for create_time
+            assert response_json["error"] is None
+            assert response_json["finished_at"] is None
+            assert response_json["validation_file"] is None
+            assert response_json["trained_tokens"] is None
+            assert response_json["estimated_finish"] is None
+            assert response_json["integrations"] == []
+    finally:
+        # Restore original callbacks
+        litellm.callbacks = original_callbacks
 
 
 @pytest.mark.asyncio()
@@ -280,60 +289,69 @@ async def test_create_vertex_fine_tune_jobs_mocked_with_hyperparameters():
         }
     )
 
-    with patch(
-        "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
-        return_value=mock_response,
-    ) as mock_post:
-        create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
-            model=base_model,
-            custom_llm_provider="vertex_ai",
-            training_file=training_file,
-            vertex_project=project_id,
-            vertex_location=location,
-            hyperparameters={
-                "n_epochs": 5,
-                "learning_rate_multiplier": 0.2,
-                "adapter_size": "SMALL",
-            },
-        )
-
-        # Verify the request
-        mock_post.assert_called_once()
-
-        # Validate the request
-        assert mock_post.call_args.kwargs["json"] == {
-            "baseModel": base_model,
-            "supervisedTuningSpec": {
-                "training_dataset_uri": training_file,
-                "hyperParameters": {
-                    "epoch_count": 5,
+    # Save original callbacks to restore later
+    original_callbacks = litellm.callbacks
+    # Disable callbacks to avoid Datadog logging interfering with the mock
+    litellm.callbacks = []
+    
+    try:
+        with patch(
+            "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
+            return_value=mock_response,
+        ) as mock_post:
+            create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
+                model=base_model,
+                custom_llm_provider="vertex_ai",
+                training_file=training_file,
+                vertex_project=project_id,
+                vertex_location=location,
+                hyperparameters={
+                    "n_epochs": 5,
                     "learning_rate_multiplier": 0.2,
                     "adapter_size": "SMALL",
                 },
-            },
-            "tunedModelDisplayName": None,
-        }
+            )
 
-        # Verify the response
-        response_json = json.loads(create_fine_tuning_response.model_dump_json())
-        assert (
-            response_json["id"]
-            == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}"
-        )
-        assert response_json["model"] == base_model
-        assert response_json["object"] == "fine_tuning.job"
-        assert response_json["fine_tuned_model"] == tuned_model_name
-        assert response_json["status"] == "queued"
-        assert response_json["training_file"] == training_file
-        assert (
-            response_json["created_at"] == 1735684820
-        )  # Unix timestamp for create_time
-        assert response_json["error"] is None
-        assert response_json["finished_at"] is None
-        assert response_json["validation_file"] is None
-        assert response_json["trained_tokens"] is None
-        assert response_json["estimated_finish"] is None
-        assert response_json["integrations"] == []
+            # Verify the request
+            mock_post.assert_called_once()
+
+            # Validate the request
+            assert mock_post.call_args.kwargs["json"] == {
+                "baseModel": base_model,
+                "supervisedTuningSpec": {
+                    "training_dataset_uri": training_file,
+                    "hyperParameters": {
+                        "epoch_count": 5,
+                        "learning_rate_multiplier": 0.2,
+                        "adapter_size": "SMALL",
+                    },
+                },
+                "tunedModelDisplayName": None,
+            }
+
+            # Verify the response
+            response_json = json.loads(create_fine_tuning_response.model_dump_json())
+            assert (
+                response_json["id"]
+                == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}"
+            )
+            assert response_json["model"] == base_model
+            assert response_json["object"] == "fine_tuning.job"
+            assert response_json["fine_tuned_model"] == tuned_model_name
+            assert response_json["status"] == "queued"
+            assert response_json["training_file"] == training_file
+            assert (
+                response_json["created_at"] == 1735684820
+            )  # Unix timestamp for create_time
+            assert response_json["error"] is None
+            assert response_json["finished_at"] is None
+            assert response_json["validation_file"] is None
+            assert response_json["trained_tokens"] is None
+            assert response_json["estimated_finish"] is None
+            assert response_json["integrations"] == []
+    finally:
+        # Restore original callbacks
+        litellm.callbacks = original_callbacks
 
 
 # Testing OpenAI -> Vertex AI param mapping
diff --git a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py
index 7ce99abdd1..0d27df50d1 100644
--- a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py
+++ b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py
@@ -28,9 +28,9 @@ async def test_apply_guardrail_endpoint_returns_correct_response():
     ) as mock_registry:
         # Create a mock guardrail
         mock_guardrail = Mock(spec=CustomGuardrail)
-        # Apply guardrail now returns a tuple (List[str], Optional[List[str]])
+        # Apply guardrail returns GenericGuardrailAPIInputs (dict with texts key)
         mock_guardrail.apply_guardrail = AsyncMock(
-            return_value=(["Redacted text: [REDACTED] and [REDACTED]"], None)
+            return_value={"texts": ["Redacted text: [REDACTED] and [REDACTED]"]}
         )
 
         # Configure the registry to return our mock guardrail
@@ -56,12 +56,11 @@ async def test_apply_guardrail_endpoint_returns_correct_response():
         assert isinstance(response, ApplyGuardrailResponse)
         assert response.response_text == "Redacted text: [REDACTED] and [REDACTED]"
 
-        # Verify the guardrail was called with correct parameters (new signature)
+        # Verify the guardrail was called with correct parameters
         mock_guardrail.apply_guardrail.assert_called_once_with(
-            texts=["Test text with PII"],
+            inputs={"texts": ["Test text with PII"]},
             request_data={},
             input_type="request",
-            images=None,
         )
 
 
@@ -104,9 +103,9 @@ async def test_apply_guardrail_endpoint_with_presidio_guardrail():
     ) as mock_registry:
         # Create a mock guardrail that simulates Presidio behavior
         mock_guardrail = Mock(spec=CustomGuardrail)
-        # Simulate masking PII entities - returns tuple (List[str], Optional[List[str]])
+        # Simulate masking PII entities - returns GenericGuardrailAPIInputs (dict with texts key)
         mock_guardrail.apply_guardrail = AsyncMock(
-            return_value=(["My name is [PERSON] and my email is [EMAIL_ADDRESS]"], None)
+            return_value={"texts": ["My name is [PERSON] and my email is [EMAIL_ADDRESS]"]}
         )
 
         # Configure the registry to return our mock guardrail
@@ -149,9 +148,9 @@ async def test_apply_guardrail_endpoint_without_optional_params():
     ) as mock_registry:
         # Create a mock guardrail
         mock_guardrail = Mock(spec=CustomGuardrail)
-        # Returns tuple (List[str], Optional[List[str]])
+        # Returns GenericGuardrailAPIInputs (dict with texts key)
         mock_guardrail.apply_guardrail = AsyncMock(
-            return_value=(["Processed text"], None)
+            return_value={"texts": ["Processed text"]}
         )
 
         # Configure the registry to return our mock guardrail
@@ -174,7 +173,7 @@ async def test_apply_guardrail_endpoint_without_optional_params():
         assert isinstance(response, ApplyGuardrailResponse)
         assert response.response_text == "Processed text"
 
-        # Verify the guardrail was called with new signature
+        # Verify the guardrail was called with correct parameters
         mock_guardrail.apply_guardrail.assert_called_once_with(
-            texts=["Test text"], request_data={}, input_type="request", images=None
+            inputs={"texts": ["Test text"]}, request_data={}, input_type="request"
         )
diff --git a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py
index 8d98f56cd3..9a96919da8 100644
--- a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py
+++ b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py
@@ -34,7 +34,7 @@ async def test_bedrock_apply_guardrail_success():
         # Mock a successful response from Bedrock
         mock_response = {
             "action": "ALLOWED",
-            "content": [{"text": {"text": "This is a test message with some content"}}],
+            "output": [{"text": "This is a test message with some content"}],
         }
         mock_api_request.return_value = mock_response
 
@@ -219,7 +219,7 @@ async def test_bedrock_apply_guardrail_filters_request_messages_when_flag_enable
     with patch.object(
         guardrail, "make_bedrock_api_request", new_callable=AsyncMock
     ) as mock_api:
-        mock_api.return_value = {"action": "ALLOWED"}
+        mock_api.return_value = {"action": "ALLOWED", "output": [{"text": "latest question"}]}
 
         guardrailed_inputs = await guardrail.apply_guardrail(
             inputs={"texts": ["latest question"]},
diff --git a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
index 8065304fd6..95f900b95d 100644
--- a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
+++ b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py
@@ -196,31 +196,38 @@ async def test_logging_non_streaming_request():
 
     import litellm
 
-    mock_logging_obj = MockPrometheusLogger()
+    # Save original callbacks to restore after test
+    original_callbacks = getattr(litellm, "callbacks", [])
 
-    litellm.callbacks = [mock_logging_obj]
+    try:
+        mock_logging_obj = MockPrometheusLogger()
 
-    with patch.object(
-        mock_logging_obj,
-        "async_log_success_event",
-    ) as mock_async_log_success_event:
-        await litellm.acompletion(
-            max_tokens=100,
-            messages=[{"role": "user", "content": "Hey"}],
-            model="openai/codex-mini-latest",
-            mock_response="Hello, world!",
-        )
-        await asyncio.sleep(1)
-        mock_async_log_success_event.assert_called_once()
-        assert mock_async_log_success_event.call_count == 1
-        print(
-            "mock_async_log_success_event.call_args.kwargs",
-            mock_async_log_success_event.call_args.kwargs,
-        )
-        standard_logging_object = mock_async_log_success_event.call_args.kwargs[
-            "kwargs"
-        ]["standard_logging_object"]
-        assert standard_logging_object["stream"] is not True
+        litellm.callbacks = [mock_logging_obj]
+
+        with patch.object(
+            mock_logging_obj,
+            "async_log_success_event",
+        ) as mock_async_log_success_event:
+            await litellm.acompletion(
+                max_tokens=100,
+                messages=[{"role": "user", "content": "Hey"}],
+                model="openai/codex-mini-latest",
+                mock_response="Hello, world!",
+            )
+            await asyncio.sleep(1)
+            mock_async_log_success_event.assert_called_once()
+            assert mock_async_log_success_event.call_count == 1
+            print(
+                "mock_async_log_success_event.call_args.kwargs",
+                mock_async_log_success_event.call_args.kwargs,
+            )
+            standard_logging_object = mock_async_log_success_event.call_args.kwargs[
+                "kwargs"
+            ]["standard_logging_object"]
+            assert standard_logging_object["stream"] is not True
+    finally:
+        # Restore original callbacks to ensure test isolation
+        litellm.callbacks = original_callbacks
 
 
 def test_get_user_agent_tags():