From 655e04f16cd7255850d30275e631e2275dbd47b9 Mon Sep 17 00:00:00 2001 From: Alexsander Hamir Date: Fri, 5 Dec 2025 12:59:35 -0800 Subject: [PATCH] Fix: apply_guardrail method and improve test isolation (#17555) * Fix Bedrock guardrail apply_guardrail method and test mocks Fixed 4 failing tests in the guardrail test suite: 1. BedrockGuardrail.apply_guardrail now returns original texts when guardrail allows content but doesn't provide output/outputs fields. Previously returned empty list, causing test_bedrock_apply_guardrail_success to fail. 2. Updated test mocks to use correct Bedrock API response format: - Changed from 'content' field to 'output' field - Fixed nested structure from {'text': {'text': '...'}} to {'text': '...'} - Added missing 'output' field in filter test 3. Fixed endpoint test mocks to return GenericGuardrailAPIInputs format: - Changed from tuple (List[str], Optional[List[str]]) to dict {'texts': [...]} - Updated method call assertions to use 'inputs' parameter correctly All 12 guardrail tests now pass successfully. * fix: remove python3-dev from Dockerfile.build_from_pip to avoid Python version conflict The base image cgr.dev/chainguard/python:latest-dev already includes Python 3.14 and its development tools. Installing python3-dev pulls Python 3.13 packages which conflict with the existing Python 3.14 installation, causing file ownership errors during apk install. * fix: disable callbacks in vertex fine-tuning tests to prevent Datadog logging interference The test was failing because Datadog logging was making an HTTP POST request that was being caught by the mock, causing assert_called_once() to fail. By disabling callbacks during the test, we prevent Datadog from making any HTTP calls, allowing the mock to only see the Vertex AI API call. * fix: ensure test isolation in test_logging_non_streaming_request Add proper cleanup to restore original litellm.callbacks after test execution. This prevents test interference when running as part of a larger test suite, where global state pollution was causing async_log_success_event to be called multiple times instead of once. Fixes test failure where the test expected async_log_success_event to be called once but was being called twice due to callbacks from previous tests not being cleaned up. --- .../build_from_pip/Dockerfile.build_from_pip | 5 +- .../guardrail_hooks/bedrock_guardrails.py | 5 + tests/batches_tests/test_fine_tuning_api.py | 196 ++++++++++-------- .../test_apply_guardrail_endpoint.py | 21 +- .../test_bedrock_apply_guardrail.py | 4 +- .../test_litellm_logging.py | 53 +++-- 6 files changed, 158 insertions(+), 126 deletions(-) diff --git a/docker/build_from_pip/Dockerfile.build_from_pip b/docker/build_from_pip/Dockerfile.build_from_pip index aeb19bce21..dda6e50cbb 100644 --- a/docker/build_from_pip/Dockerfile.build_from_pip +++ b/docker/build_from_pip/Dockerfile.build_from_pip @@ -7,8 +7,11 @@ ENV HOME=/home/litellm ENV PATH="${HOME}/venv/bin:$PATH" # Install runtime dependencies +# Note: The base image has Python 3.14, but python3-dev installs Python 3.13 which conflicts. +# The -dev variant should include Python headers, but if compilation fails, we may need +# to install python-3.14-dev specifically (if available in the repo) RUN apk update && \ - apk add --no-cache gcc python3-dev openssl openssl-dev + apk add --no-cache gcc openssl openssl-dev RUN python -m venv ${HOME}/venv RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py index e0fb319240..9d0211e2a0 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py +++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py @@ -1318,6 +1318,11 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM): masked_text = str(text_content) masked_texts.append(masked_text) + # If no output/outputs were provided, use the original texts + # This happens when the guardrail allows content without modification + if not masked_texts: + masked_texts = texts + verbose_proxy_logger.debug( "Bedrock Guardrail: Successfully applied guardrail" ) diff --git a/tests/batches_tests/test_fine_tuning_api.py b/tests/batches_tests/test_fine_tuning_api.py index 3561d99d0f..de952cfe29 100644 --- a/tests/batches_tests/test_fine_tuning_api.py +++ b/tests/batches_tests/test_fine_tuning_api.py @@ -208,48 +208,57 @@ async def test_create_vertex_fine_tune_jobs_mocked(): } ) - with patch( - "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", - return_value=mock_response, - ) as mock_post: - create_fine_tuning_response = await litellm.acreate_fine_tuning_job( - model=base_model, - custom_llm_provider="vertex_ai", - training_file=training_file, - vertex_project=project_id, - vertex_location=location, - ) + # Save original callbacks to restore later + original_callbacks = litellm.callbacks + # Disable callbacks to avoid Datadog logging interfering with the mock + litellm.callbacks = [] + + try: + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + return_value=mock_response, + ) as mock_post: + create_fine_tuning_response = await litellm.acreate_fine_tuning_job( + model=base_model, + custom_llm_provider="vertex_ai", + training_file=training_file, + vertex_project=project_id, + vertex_location=location, + ) - # Verify the request - mock_post.assert_called_once() + # Verify the request + mock_post.assert_called_once() - # Validate the request - assert mock_post.call_args.kwargs["json"] == { - "baseModel": base_model, - "supervisedTuningSpec": {"training_dataset_uri": training_file}, - "tunedModelDisplayName": None, - } + # Validate the request + assert mock_post.call_args.kwargs["json"] == { + "baseModel": base_model, + "supervisedTuningSpec": {"training_dataset_uri": training_file}, + "tunedModelDisplayName": None, + } - # Verify the response - response_json = json.loads(create_fine_tuning_response.model_dump_json()) - assert ( - response_json["id"] - == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}" - ) - assert response_json["model"] == base_model - assert response_json["object"] == "fine_tuning.job" - assert response_json["fine_tuned_model"] == tuned_model_name - assert response_json["status"] == "queued" - assert response_json["training_file"] == training_file - assert ( - response_json["created_at"] == 1735684820 - ) # Unix timestamp for create_time - assert response_json["error"] is None - assert response_json["finished_at"] is None - assert response_json["validation_file"] is None - assert response_json["trained_tokens"] is None - assert response_json["estimated_finish"] is None - assert response_json["integrations"] == [] + # Verify the response + response_json = json.loads(create_fine_tuning_response.model_dump_json()) + assert ( + response_json["id"] + == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}" + ) + assert response_json["model"] == base_model + assert response_json["object"] == "fine_tuning.job" + assert response_json["fine_tuned_model"] == tuned_model_name + assert response_json["status"] == "queued" + assert response_json["training_file"] == training_file + assert ( + response_json["created_at"] == 1735684820 + ) # Unix timestamp for create_time + assert response_json["error"] is None + assert response_json["finished_at"] is None + assert response_json["validation_file"] is None + assert response_json["trained_tokens"] is None + assert response_json["estimated_finish"] is None + assert response_json["integrations"] == [] + finally: + # Restore original callbacks + litellm.callbacks = original_callbacks @pytest.mark.asyncio() @@ -280,60 +289,69 @@ async def test_create_vertex_fine_tune_jobs_mocked_with_hyperparameters(): } ) - with patch( - "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", - return_value=mock_response, - ) as mock_post: - create_fine_tuning_response = await litellm.acreate_fine_tuning_job( - model=base_model, - custom_llm_provider="vertex_ai", - training_file=training_file, - vertex_project=project_id, - vertex_location=location, - hyperparameters={ - "n_epochs": 5, - "learning_rate_multiplier": 0.2, - "adapter_size": "SMALL", - }, - ) - - # Verify the request - mock_post.assert_called_once() - - # Validate the request - assert mock_post.call_args.kwargs["json"] == { - "baseModel": base_model, - "supervisedTuningSpec": { - "training_dataset_uri": training_file, - "hyperParameters": { - "epoch_count": 5, + # Save original callbacks to restore later + original_callbacks = litellm.callbacks + # Disable callbacks to avoid Datadog logging interfering with the mock + litellm.callbacks = [] + + try: + with patch( + "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", + return_value=mock_response, + ) as mock_post: + create_fine_tuning_response = await litellm.acreate_fine_tuning_job( + model=base_model, + custom_llm_provider="vertex_ai", + training_file=training_file, + vertex_project=project_id, + vertex_location=location, + hyperparameters={ + "n_epochs": 5, "learning_rate_multiplier": 0.2, "adapter_size": "SMALL", }, - }, - "tunedModelDisplayName": None, - } + ) - # Verify the response - response_json = json.loads(create_fine_tuning_response.model_dump_json()) - assert ( - response_json["id"] - == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}" - ) - assert response_json["model"] == base_model - assert response_json["object"] == "fine_tuning.job" - assert response_json["fine_tuned_model"] == tuned_model_name - assert response_json["status"] == "queued" - assert response_json["training_file"] == training_file - assert ( - response_json["created_at"] == 1735684820 - ) # Unix timestamp for create_time - assert response_json["error"] is None - assert response_json["finished_at"] is None - assert response_json["validation_file"] is None - assert response_json["trained_tokens"] is None - assert response_json["estimated_finish"] is None - assert response_json["integrations"] == [] + # Verify the request + mock_post.assert_called_once() + + # Validate the request + assert mock_post.call_args.kwargs["json"] == { + "baseModel": base_model, + "supervisedTuningSpec": { + "training_dataset_uri": training_file, + "hyperParameters": { + "epoch_count": 5, + "learning_rate_multiplier": 0.2, + "adapter_size": "SMALL", + }, + }, + "tunedModelDisplayName": None, + } + + # Verify the response + response_json = json.loads(create_fine_tuning_response.model_dump_json()) + assert ( + response_json["id"] + == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}" + ) + assert response_json["model"] == base_model + assert response_json["object"] == "fine_tuning.job" + assert response_json["fine_tuned_model"] == tuned_model_name + assert response_json["status"] == "queued" + assert response_json["training_file"] == training_file + assert ( + response_json["created_at"] == 1735684820 + ) # Unix timestamp for create_time + assert response_json["error"] is None + assert response_json["finished_at"] is None + assert response_json["validation_file"] is None + assert response_json["trained_tokens"] is None + assert response_json["estimated_finish"] is None + assert response_json["integrations"] == [] + finally: + # Restore original callbacks + litellm.callbacks = original_callbacks # Testing OpenAI -> Vertex AI param mapping diff --git a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py index 7ce99abdd1..0d27df50d1 100644 --- a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py +++ b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_apply_guardrail_endpoint.py @@ -28,9 +28,9 @@ async def test_apply_guardrail_endpoint_returns_correct_response(): ) as mock_registry: # Create a mock guardrail mock_guardrail = Mock(spec=CustomGuardrail) - # Apply guardrail now returns a tuple (List[str], Optional[List[str]]) + # Apply guardrail returns GenericGuardrailAPIInputs (dict with texts key) mock_guardrail.apply_guardrail = AsyncMock( - return_value=(["Redacted text: [REDACTED] and [REDACTED]"], None) + return_value={"texts": ["Redacted text: [REDACTED] and [REDACTED]"]} ) # Configure the registry to return our mock guardrail @@ -56,12 +56,11 @@ async def test_apply_guardrail_endpoint_returns_correct_response(): assert isinstance(response, ApplyGuardrailResponse) assert response.response_text == "Redacted text: [REDACTED] and [REDACTED]" - # Verify the guardrail was called with correct parameters (new signature) + # Verify the guardrail was called with correct parameters mock_guardrail.apply_guardrail.assert_called_once_with( - texts=["Test text with PII"], + inputs={"texts": ["Test text with PII"]}, request_data={}, input_type="request", - images=None, ) @@ -104,9 +103,9 @@ async def test_apply_guardrail_endpoint_with_presidio_guardrail(): ) as mock_registry: # Create a mock guardrail that simulates Presidio behavior mock_guardrail = Mock(spec=CustomGuardrail) - # Simulate masking PII entities - returns tuple (List[str], Optional[List[str]]) + # Simulate masking PII entities - returns GenericGuardrailAPIInputs (dict with texts key) mock_guardrail.apply_guardrail = AsyncMock( - return_value=(["My name is [PERSON] and my email is [EMAIL_ADDRESS]"], None) + return_value={"texts": ["My name is [PERSON] and my email is [EMAIL_ADDRESS]"]} ) # Configure the registry to return our mock guardrail @@ -149,9 +148,9 @@ async def test_apply_guardrail_endpoint_without_optional_params(): ) as mock_registry: # Create a mock guardrail mock_guardrail = Mock(spec=CustomGuardrail) - # Returns tuple (List[str], Optional[List[str]]) + # Returns GenericGuardrailAPIInputs (dict with texts key) mock_guardrail.apply_guardrail = AsyncMock( - return_value=(["Processed text"], None) + return_value={"texts": ["Processed text"]} ) # Configure the registry to return our mock guardrail @@ -174,7 +173,7 @@ async def test_apply_guardrail_endpoint_without_optional_params(): assert isinstance(response, ApplyGuardrailResponse) assert response.response_text == "Processed text" - # Verify the guardrail was called with new signature + # Verify the guardrail was called with correct parameters mock_guardrail.apply_guardrail.assert_called_once_with( - texts=["Test text"], request_data={}, input_type="request", images=None + inputs={"texts": ["Test text"]}, request_data={}, input_type="request" ) diff --git a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py index 8d98f56cd3..9a96919da8 100644 --- a/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py +++ b/tests/enterprise/litellm_enterprise/proxy/guardrails/test_bedrock_apply_guardrail.py @@ -34,7 +34,7 @@ async def test_bedrock_apply_guardrail_success(): # Mock a successful response from Bedrock mock_response = { "action": "ALLOWED", - "content": [{"text": {"text": "This is a test message with some content"}}], + "output": [{"text": "This is a test message with some content"}], } mock_api_request.return_value = mock_response @@ -219,7 +219,7 @@ async def test_bedrock_apply_guardrail_filters_request_messages_when_flag_enable with patch.object( guardrail, "make_bedrock_api_request", new_callable=AsyncMock ) as mock_api: - mock_api.return_value = {"action": "ALLOWED"} + mock_api.return_value = {"action": "ALLOWED", "output": [{"text": "latest question"}]} guardrailed_inputs = await guardrail.apply_guardrail( inputs={"texts": ["latest question"]}, diff --git a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py index 8065304fd6..95f900b95d 100644 --- a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py +++ b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py @@ -196,31 +196,38 @@ async def test_logging_non_streaming_request(): import litellm - mock_logging_obj = MockPrometheusLogger() + # Save original callbacks to restore after test + original_callbacks = getattr(litellm, "callbacks", []) - litellm.callbacks = [mock_logging_obj] + try: + mock_logging_obj = MockPrometheusLogger() - with patch.object( - mock_logging_obj, - "async_log_success_event", - ) as mock_async_log_success_event: - await litellm.acompletion( - max_tokens=100, - messages=[{"role": "user", "content": "Hey"}], - model="openai/codex-mini-latest", - mock_response="Hello, world!", - ) - await asyncio.sleep(1) - mock_async_log_success_event.assert_called_once() - assert mock_async_log_success_event.call_count == 1 - print( - "mock_async_log_success_event.call_args.kwargs", - mock_async_log_success_event.call_args.kwargs, - ) - standard_logging_object = mock_async_log_success_event.call_args.kwargs[ - "kwargs" - ]["standard_logging_object"] - assert standard_logging_object["stream"] is not True + litellm.callbacks = [mock_logging_obj] + + with patch.object( + mock_logging_obj, + "async_log_success_event", + ) as mock_async_log_success_event: + await litellm.acompletion( + max_tokens=100, + messages=[{"role": "user", "content": "Hey"}], + model="openai/codex-mini-latest", + mock_response="Hello, world!", + ) + await asyncio.sleep(1) + mock_async_log_success_event.assert_called_once() + assert mock_async_log_success_event.call_count == 1 + print( + "mock_async_log_success_event.call_args.kwargs", + mock_async_log_success_event.call_args.kwargs, + ) + standard_logging_object = mock_async_log_success_event.call_args.kwargs[ + "kwargs" + ]["standard_logging_object"] + assert standard_logging_object["stream"] is not True + finally: + # Restore original callbacks to ensure test isolation + litellm.callbacks = original_callbacks def test_get_user_agent_tags():