Fix: apply_guardrail method and improve test isolation (#17555)

* Fix Bedrock guardrail apply_guardrail method and test mocks

Fixed 4 failing tests in the guardrail test suite:

1. BedrockGuardrail.apply_guardrail now returns original texts when guardrail
   allows content but doesn't provide output/outputs fields. Previously returned
   empty list, causing test_bedrock_apply_guardrail_success to fail.

2. Updated test mocks to use correct Bedrock API response format:
   - Changed from 'content' field to 'output' field
   - Fixed nested structure from {'text': {'text': '...'}} to {'text': '...'}
   - Added missing 'output' field in filter test

3. Fixed endpoint test mocks to return GenericGuardrailAPIInputs format:
   - Changed from tuple (List[str], Optional[List[str]]) to dict {'texts': [...]}
   - Updated method call assertions to use 'inputs' parameter correctly

All 12 guardrail tests now pass successfully.

* fix: remove python3-dev from Dockerfile.build_from_pip to avoid Python version conflict

The base image cgr.dev/chainguard/python:latest-dev already includes Python 3.14
and its development tools. Installing python3-dev pulls Python 3.13 packages
which conflict with the existing Python 3.14 installation, causing file
ownership errors during apk install.

* fix: disable callbacks in vertex fine-tuning tests to prevent Datadog logging interference

The test was failing because Datadog logging was making an HTTP POST request
that was being caught by the mock, causing assert_called_once() to fail.
By disabling callbacks during the test, we prevent Datadog from making any
HTTP calls, allowing the mock to only see the Vertex AI API call.

* fix: ensure test isolation in test_logging_non_streaming_request

Add proper cleanup to restore original litellm.callbacks after test execution.
This prevents test interference when running as part of a larger test suite,
where global state pollution was causing async_log_success_event to be
called multiple times instead of once.

Fixes test failure where the test expected async_log_success_event to be
called once but was being called twice due to callbacks from previous tests
not being cleaned up.
This commit is contained in:
Alexsander Hamir
2025-12-05 12:59:35 -08:00
committed by GitHub
parent 4eb9f8036f
commit 655e04f16c
6 changed files with 158 additions and 126 deletions

View File

@@ -7,8 +7,11 @@ ENV HOME=/home/litellm
ENV PATH="${HOME}/venv/bin:$PATH" ENV PATH="${HOME}/venv/bin:$PATH"
# Install runtime dependencies # Install runtime dependencies
# Note: The base image has Python 3.14, but python3-dev installs Python 3.13 which conflicts.
# The -dev variant should include Python headers, but if compilation fails, we may need
# to install python-3.14-dev specifically (if available in the repo)
RUN apk update && \ RUN apk update && \
apk add --no-cache gcc python3-dev openssl openssl-dev apk add --no-cache gcc openssl openssl-dev
RUN python -m venv ${HOME}/venv RUN python -m venv ${HOME}/venv
RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip RUN ${HOME}/venv/bin/pip install --no-cache-dir --upgrade pip

View File

@@ -1318,6 +1318,11 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
masked_text = str(text_content) masked_text = str(text_content)
masked_texts.append(masked_text) masked_texts.append(masked_text)
# If no output/outputs were provided, use the original texts
# This happens when the guardrail allows content without modification
if not masked_texts:
masked_texts = texts
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
"Bedrock Guardrail: Successfully applied guardrail" "Bedrock Guardrail: Successfully applied guardrail"
) )

View File

@@ -208,48 +208,57 @@ async def test_create_vertex_fine_tune_jobs_mocked():
} }
) )
with patch( # Save original callbacks to restore later
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", original_callbacks = litellm.callbacks
return_value=mock_response, # Disable callbacks to avoid Datadog logging interfering with the mock
) as mock_post: litellm.callbacks = []
create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
model=base_model, try:
custom_llm_provider="vertex_ai", with patch(
training_file=training_file, "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
vertex_project=project_id, return_value=mock_response,
vertex_location=location, ) as mock_post:
) create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
model=base_model,
custom_llm_provider="vertex_ai",
training_file=training_file,
vertex_project=project_id,
vertex_location=location,
)
# Verify the request # Verify the request
mock_post.assert_called_once() mock_post.assert_called_once()
# Validate the request # Validate the request
assert mock_post.call_args.kwargs["json"] == { assert mock_post.call_args.kwargs["json"] == {
"baseModel": base_model, "baseModel": base_model,
"supervisedTuningSpec": {"training_dataset_uri": training_file}, "supervisedTuningSpec": {"training_dataset_uri": training_file},
"tunedModelDisplayName": None, "tunedModelDisplayName": None,
} }
# Verify the response # Verify the response
response_json = json.loads(create_fine_tuning_response.model_dump_json()) response_json = json.loads(create_fine_tuning_response.model_dump_json())
assert ( assert (
response_json["id"] response_json["id"]
== f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}" == f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}"
) )
assert response_json["model"] == base_model assert response_json["model"] == base_model
assert response_json["object"] == "fine_tuning.job" assert response_json["object"] == "fine_tuning.job"
assert response_json["fine_tuned_model"] == tuned_model_name assert response_json["fine_tuned_model"] == tuned_model_name
assert response_json["status"] == "queued" assert response_json["status"] == "queued"
assert response_json["training_file"] == training_file assert response_json["training_file"] == training_file
assert ( assert (
response_json["created_at"] == 1735684820 response_json["created_at"] == 1735684820
) # Unix timestamp for create_time ) # Unix timestamp for create_time
assert response_json["error"] is None assert response_json["error"] is None
assert response_json["finished_at"] is None assert response_json["finished_at"] is None
assert response_json["validation_file"] is None assert response_json["validation_file"] is None
assert response_json["trained_tokens"] is None assert response_json["trained_tokens"] is None
assert response_json["estimated_finish"] is None assert response_json["estimated_finish"] is None
assert response_json["integrations"] == [] assert response_json["integrations"] == []
finally:
# Restore original callbacks
litellm.callbacks = original_callbacks
@pytest.mark.asyncio() @pytest.mark.asyncio()
@@ -280,60 +289,69 @@ async def test_create_vertex_fine_tune_jobs_mocked_with_hyperparameters():
} }
) )
with patch( # Save original callbacks to restore later
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post", original_callbacks = litellm.callbacks
return_value=mock_response, # Disable callbacks to avoid Datadog logging interfering with the mock
) as mock_post: litellm.callbacks = []
create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
model=base_model, try:
custom_llm_provider="vertex_ai", with patch(
training_file=training_file, "litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
vertex_project=project_id, return_value=mock_response,
vertex_location=location, ) as mock_post:
hyperparameters={ create_fine_tuning_response = await litellm.acreate_fine_tuning_job(
"n_epochs": 5, model=base_model,
"learning_rate_multiplier": 0.2, custom_llm_provider="vertex_ai",
"adapter_size": "SMALL", training_file=training_file,
}, vertex_project=project_id,
) vertex_location=location,
hyperparameters={
# Verify the request "n_epochs": 5,
mock_post.assert_called_once()
# Validate the request
assert mock_post.call_args.kwargs["json"] == {
"baseModel": base_model,
"supervisedTuningSpec": {
"training_dataset_uri": training_file,
"hyperParameters": {
"epoch_count": 5,
"learning_rate_multiplier": 0.2, "learning_rate_multiplier": 0.2,
"adapter_size": "SMALL", "adapter_size": "SMALL",
}, },
}, )
"tunedModelDisplayName": None,
}
# Verify the response # Verify the request
response_json = json.loads(create_fine_tuning_response.model_dump_json()) mock_post.assert_called_once()
assert (
response_json["id"] # Validate the request
== f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}" assert mock_post.call_args.kwargs["json"] == {
) "baseModel": base_model,
assert response_json["model"] == base_model "supervisedTuningSpec": {
assert response_json["object"] == "fine_tuning.job" "training_dataset_uri": training_file,
assert response_json["fine_tuned_model"] == tuned_model_name "hyperParameters": {
assert response_json["status"] == "queued" "epoch_count": 5,
assert response_json["training_file"] == training_file "learning_rate_multiplier": 0.2,
assert ( "adapter_size": "SMALL",
response_json["created_at"] == 1735684820 },
) # Unix timestamp for create_time },
assert response_json["error"] is None "tunedModelDisplayName": None,
assert response_json["finished_at"] is None }
assert response_json["validation_file"] is None
assert response_json["trained_tokens"] is None # Verify the response
assert response_json["estimated_finish"] is None response_json = json.loads(create_fine_tuning_response.model_dump_json())
assert response_json["integrations"] == [] assert (
response_json["id"]
== f"projects/{project_id}/locations/{location}/tuningJobs/{job_id}"
)
assert response_json["model"] == base_model
assert response_json["object"] == "fine_tuning.job"
assert response_json["fine_tuned_model"] == tuned_model_name
assert response_json["status"] == "queued"
assert response_json["training_file"] == training_file
assert (
response_json["created_at"] == 1735684820
) # Unix timestamp for create_time
assert response_json["error"] is None
assert response_json["finished_at"] is None
assert response_json["validation_file"] is None
assert response_json["trained_tokens"] is None
assert response_json["estimated_finish"] is None
assert response_json["integrations"] == []
finally:
# Restore original callbacks
litellm.callbacks = original_callbacks
# Testing OpenAI -> Vertex AI param mapping # Testing OpenAI -> Vertex AI param mapping

View File

@@ -28,9 +28,9 @@ async def test_apply_guardrail_endpoint_returns_correct_response():
) as mock_registry: ) as mock_registry:
# Create a mock guardrail # Create a mock guardrail
mock_guardrail = Mock(spec=CustomGuardrail) mock_guardrail = Mock(spec=CustomGuardrail)
# Apply guardrail now returns a tuple (List[str], Optional[List[str]]) # Apply guardrail returns GenericGuardrailAPIInputs (dict with texts key)
mock_guardrail.apply_guardrail = AsyncMock( mock_guardrail.apply_guardrail = AsyncMock(
return_value=(["Redacted text: [REDACTED] and [REDACTED]"], None) return_value={"texts": ["Redacted text: [REDACTED] and [REDACTED]"]}
) )
# Configure the registry to return our mock guardrail # Configure the registry to return our mock guardrail
@@ -56,12 +56,11 @@ async def test_apply_guardrail_endpoint_returns_correct_response():
assert isinstance(response, ApplyGuardrailResponse) assert isinstance(response, ApplyGuardrailResponse)
assert response.response_text == "Redacted text: [REDACTED] and [REDACTED]" assert response.response_text == "Redacted text: [REDACTED] and [REDACTED]"
# Verify the guardrail was called with correct parameters (new signature) # Verify the guardrail was called with correct parameters
mock_guardrail.apply_guardrail.assert_called_once_with( mock_guardrail.apply_guardrail.assert_called_once_with(
texts=["Test text with PII"], inputs={"texts": ["Test text with PII"]},
request_data={}, request_data={},
input_type="request", input_type="request",
images=None,
) )
@@ -104,9 +103,9 @@ async def test_apply_guardrail_endpoint_with_presidio_guardrail():
) as mock_registry: ) as mock_registry:
# Create a mock guardrail that simulates Presidio behavior # Create a mock guardrail that simulates Presidio behavior
mock_guardrail = Mock(spec=CustomGuardrail) mock_guardrail = Mock(spec=CustomGuardrail)
# Simulate masking PII entities - returns tuple (List[str], Optional[List[str]]) # Simulate masking PII entities - returns GenericGuardrailAPIInputs (dict with texts key)
mock_guardrail.apply_guardrail = AsyncMock( mock_guardrail.apply_guardrail = AsyncMock(
return_value=(["My name is [PERSON] and my email is [EMAIL_ADDRESS]"], None) return_value={"texts": ["My name is [PERSON] and my email is [EMAIL_ADDRESS]"]}
) )
# Configure the registry to return our mock guardrail # Configure the registry to return our mock guardrail
@@ -149,9 +148,9 @@ async def test_apply_guardrail_endpoint_without_optional_params():
) as mock_registry: ) as mock_registry:
# Create a mock guardrail # Create a mock guardrail
mock_guardrail = Mock(spec=CustomGuardrail) mock_guardrail = Mock(spec=CustomGuardrail)
# Returns tuple (List[str], Optional[List[str]]) # Returns GenericGuardrailAPIInputs (dict with texts key)
mock_guardrail.apply_guardrail = AsyncMock( mock_guardrail.apply_guardrail = AsyncMock(
return_value=(["Processed text"], None) return_value={"texts": ["Processed text"]}
) )
# Configure the registry to return our mock guardrail # Configure the registry to return our mock guardrail
@@ -174,7 +173,7 @@ async def test_apply_guardrail_endpoint_without_optional_params():
assert isinstance(response, ApplyGuardrailResponse) assert isinstance(response, ApplyGuardrailResponse)
assert response.response_text == "Processed text" assert response.response_text == "Processed text"
# Verify the guardrail was called with new signature # Verify the guardrail was called with correct parameters
mock_guardrail.apply_guardrail.assert_called_once_with( mock_guardrail.apply_guardrail.assert_called_once_with(
texts=["Test text"], request_data={}, input_type="request", images=None inputs={"texts": ["Test text"]}, request_data={}, input_type="request"
) )

View File

@@ -34,7 +34,7 @@ async def test_bedrock_apply_guardrail_success():
# Mock a successful response from Bedrock # Mock a successful response from Bedrock
mock_response = { mock_response = {
"action": "ALLOWED", "action": "ALLOWED",
"content": [{"text": {"text": "This is a test message with some content"}}], "output": [{"text": "This is a test message with some content"}],
} }
mock_api_request.return_value = mock_response mock_api_request.return_value = mock_response
@@ -219,7 +219,7 @@ async def test_bedrock_apply_guardrail_filters_request_messages_when_flag_enable
with patch.object( with patch.object(
guardrail, "make_bedrock_api_request", new_callable=AsyncMock guardrail, "make_bedrock_api_request", new_callable=AsyncMock
) as mock_api: ) as mock_api:
mock_api.return_value = {"action": "ALLOWED"} mock_api.return_value = {"action": "ALLOWED", "output": [{"text": "latest question"}]}
guardrailed_inputs = await guardrail.apply_guardrail( guardrailed_inputs = await guardrail.apply_guardrail(
inputs={"texts": ["latest question"]}, inputs={"texts": ["latest question"]},

View File

@@ -196,31 +196,38 @@ async def test_logging_non_streaming_request():
import litellm import litellm
mock_logging_obj = MockPrometheusLogger() # Save original callbacks to restore after test
original_callbacks = getattr(litellm, "callbacks", [])
litellm.callbacks = [mock_logging_obj] try:
mock_logging_obj = MockPrometheusLogger()
with patch.object( litellm.callbacks = [mock_logging_obj]
mock_logging_obj,
"async_log_success_event", with patch.object(
) as mock_async_log_success_event: mock_logging_obj,
await litellm.acompletion( "async_log_success_event",
max_tokens=100, ) as mock_async_log_success_event:
messages=[{"role": "user", "content": "Hey"}], await litellm.acompletion(
model="openai/codex-mini-latest", max_tokens=100,
mock_response="Hello, world!", messages=[{"role": "user", "content": "Hey"}],
) model="openai/codex-mini-latest",
await asyncio.sleep(1) mock_response="Hello, world!",
mock_async_log_success_event.assert_called_once() )
assert mock_async_log_success_event.call_count == 1 await asyncio.sleep(1)
print( mock_async_log_success_event.assert_called_once()
"mock_async_log_success_event.call_args.kwargs", assert mock_async_log_success_event.call_count == 1
mock_async_log_success_event.call_args.kwargs, print(
) "mock_async_log_success_event.call_args.kwargs",
standard_logging_object = mock_async_log_success_event.call_args.kwargs[ mock_async_log_success_event.call_args.kwargs,
"kwargs" )
]["standard_logging_object"] standard_logging_object = mock_async_log_success_event.call_args.kwargs[
assert standard_logging_object["stream"] is not True "kwargs"
]["standard_logging_object"]
assert standard_logging_object["stream"] is not True
finally:
# Restore original callbacks to ensure test isolation
litellm.callbacks = original_callbacks
def test_get_user_agent_tags(): def test_get_user_agent_tags():