[Feat] - Cost Tracking - show input, output, tool call cost breakdown in StandardLoggingPayload (#14921)

* add new CostBreakdown typed dict

* fix CostBreakdown type

* fix fix _store_cost_breakdown_in_logging_obj

* fix CostBreakdown

* test_cost_breakdown_in_standard_logging_payload
This commit is contained in:
Ishaan Jaff
2025-09-25 15:48:22 -07:00
committed by GitHub
parent eaa04cd8ce
commit 2dd38420a7
6 changed files with 262 additions and 1 deletions

View File

@@ -11,6 +11,7 @@ Found under `kwargs["standard_logging_object"]`. This is a standard payload, log
| `trace_id` | `str` | Trace multiple LLM calls belonging to same overall request |
| `call_type` | `str` | Type of call |
| `response_cost` | `float` | Cost of the response in USD ($) |
| `cost_breakdown` | `Optional[CostBreakdown]` | Detailed cost breakdown object |
| `response_cost_failure_debug_info` | `StandardLoggingModelCostFailureDebugInformation` | Debug information if cost tracking fails |
| `status` | `StandardLoggingPayloadStatus` | Status of the payload |
| `total_tokens` | `int` | Total number of tokens |
@@ -39,6 +40,29 @@ Found under `kwargs["standard_logging_object"]`. This is a standard payload, log
| `model_parameters` | `dict` | Model parameters |
| `hidden_params` | `StandardLoggingHiddenParams` | Hidden parameters |
## Cost Breakdown
The `cost_breakdown` field provides detailed cost breakdown for completion requests as a `CostBreakdown` object containing:
- **`input_cost`**: Cost of input/prompt tokens including cache creation tokens
- **`output_cost`**: Cost of output/completion tokens (including reasoning tokens if applicable)
- **`tool_usage_cost`**: Cost of built-in tools usage (e.g., web search, code interpreter)
- **`total_cost`**: Total cost of input + output + tool usage
**Note**: This field is populated for all call types. For non-completion calls, `input_cost` and `output_cost` may be 0.
The total cost relationship is: `response_cost = cost_breakdown.total_cost`
### CostBreakdown Type
```python
class CostBreakdown(TypedDict, total=False):
input_cost: float # Cost of input/prompt tokens in USD
output_cost: float # Cost of output/completion tokens in USD (includes reasoning)
tool_usage_cost: float # Cost of built-in tools usage in USD
total_cost: float # Total cost in USD
```
## StandardLoggingUserAPIKeyMetadata
| Field | Type | Description |

View File

@@ -584,6 +584,42 @@ def _infer_call_type(
return call_type
def _store_cost_breakdown_in_logging_obj(
litellm_logging_obj: Optional[LitellmLoggingObject],
prompt_tokens_cost_usd_dollar: float,
completion_tokens_cost_usd_dollar: float,
cost_for_built_in_tools_cost_usd_dollar: float,
total_cost_usd_dollar: float,
) -> None:
"""
Helper function to store cost breakdown in the logging object.
Args:
litellm_logging_obj: The logging object to store breakdown in
call_type: Type of call (completion, etc.)
prompt_tokens_cost_usd_dollar: Cost of input tokens
completion_tokens_cost_usd_dollar: Cost of completion tokens (includes reasoning if applicable)
cost_for_built_in_tools_cost_usd_dollar: Cost of built-in tools
total_cost_usd_dollar: Total cost of request
"""
if (litellm_logging_obj is None):
return
try:
# Store the cost breakdown - reasoning cost is 0 since it's already included in completion cost
litellm_logging_obj.set_cost_breakdown(
input_cost=prompt_tokens_cost_usd_dollar,
output_cost=completion_tokens_cost_usd_dollar,
total_cost=total_cost_usd_dollar,
cost_for_built_in_tools_cost_usd_dollar=cost_for_built_in_tools_cost_usd_dollar
)
except Exception as breakdown_error:
verbose_logger.debug(f"Error storing cost breakdown: {str(breakdown_error)}")
# Don't fail the main cost calculation if breakdown storage fails
pass
def completion_cost( # noqa: PLR0915
completion_response=None,
model: Optional[str] = None,
@@ -923,7 +959,7 @@ def completion_cost( # noqa: PLR0915
_final_cost = (
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
)
_final_cost += (
cost_for_built_in_tools = (
StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
model=model,
response_object=completion_response,
@@ -932,6 +968,17 @@ def completion_cost( # noqa: PLR0915
custom_llm_provider=custom_llm_provider,
)
)
_final_cost += cost_for_built_in_tools
# Store cost breakdown in logging object if available
_store_cost_breakdown_in_logging_obj(
litellm_logging_obj=litellm_logging_obj,
prompt_tokens_cost_usd_dollar=prompt_tokens_cost_usd_dollar,
completion_tokens_cost_usd_dollar=completion_tokens_cost_usd_dollar,
cost_for_built_in_tools_cost_usd_dollar=cost_for_built_in_tools,
total_cost_usd_dollar=_final_cost
)
return _final_cost
except Exception as e:
verbose_logger.debug(

View File

@@ -84,6 +84,7 @@ from litellm.types.rerank import RerankResponse
from litellm.types.router import CustomPricingLiteLLMParams
from litellm.types.utils import (
CallTypes,
CostBreakdown,
CostResponseTypes,
DynamicPromptManagementParamLiteral,
EmbeddingResponse,
@@ -343,6 +344,9 @@ class Logging(LiteLLMLoggingBaseClass):
litellm_params = scrub_sensitive_keys_in_metadata(litellm_params)
self.litellm_params = litellm_params
# Initialize cost breakdown field
self.cost_breakdown: Optional[CostBreakdown] = None
self.model_call_details: Dict[str, Any] = {
"litellm_trace_id": litellm_trace_id,
@@ -1155,6 +1159,33 @@ class Logging(LiteLLMLoggingBaseClass):
- self.model_call_details.get("start_time", datetime.datetime.now())
).total_seconds() * 1000
def set_cost_breakdown(
self,
input_cost: float,
output_cost: float,
total_cost: float,
cost_for_built_in_tools_cost_usd_dollar: float,
) -> None:
"""
Helper method to store cost breakdown in the logging object.
Args:
input_cost: Cost of input/prompt tokens
output_cost: Cost of output/completion tokens
cost_for_built_in_tools_cost_usd_dollar: Cost of built-in tools
total_cost: Total cost of request
"""
self.cost_breakdown = CostBreakdown(
input_cost=input_cost,
output_cost=output_cost,
total_cost=total_cost,
tool_usage_cost=cost_for_built_in_tools_cost_usd_dollar
)
verbose_logger.debug(
f"Cost breakdown set - input: {input_cost}, output: {output_cost}, cost_for_built_in_tools_cost_usd_dollar: {cost_for_built_in_tools_cost_usd_dollar}, total: {total_cost}"
)
def _response_cost_calculator(
self,
result: Union[
@@ -4533,6 +4564,7 @@ def get_standard_logging_object_payload(
metadata=clean_metadata,
cache_key=clean_hidden_params["cache_key"],
response_cost=response_cost,
cost_breakdown=logging_obj.cost_breakdown,
total_tokens=usage.total_tokens,
prompt_tokens=usage.prompt_tokens,
completion_tokens=usage.completion_tokens,

View File

@@ -36,3 +36,10 @@ guardrails:
breakdown: true
payload: true
dev_info: true
litellm_settings:
callbacks: ["datadog"]
datadog_params:
turn_off_message_logging: true
datadog_llm_observability_params:
turn_off_message_logging: true

View File

@@ -2060,12 +2060,23 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
StandardLoggingPayloadStatus = Literal["success", "failure"]
class CostBreakdown(TypedDict):
"""
Detailed cost breakdown for a request
"""
input_cost: float # Cost of input/prompt tokens
output_cost: float # Cost of output/completion tokens (includes reasoning if applicable)
total_cost: float # Total cost (input + output + tool usage)
tool_usage_cost: float # Cost of usage of built-in tools
class StandardLoggingPayload(TypedDict):
id: str
trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
call_type: str
stream: Optional[bool]
response_cost: float
cost_breakdown: Optional[CostBreakdown] # Detailed cost breakdown
response_cost_failure_debug_info: Optional[
StandardLoggingModelCostFailureDebugInformation
]

View File

@@ -516,3 +516,143 @@ def test_standard_logging_metadata_requester_metadata(
):
result = StandardLoggingPayloadSetup.get_standard_logging_metadata(metadata)
assert result["requester_metadata"] == expected_requester_metadata
def test_cost_breakdown_in_standard_logging_payload():
"""
Test that cost breakdown fields are properly included in StandardLoggingPayload.
Tests input_cost, output_cost, tool_usage_cost, and total_cost fields.
"""
from litellm.litellm_core_utils.litellm_logging import get_standard_logging_object_payload, Logging
from litellm.types.utils import Usage
from datetime import datetime
import time
# Create a mock logging object with cost breakdown
logging_obj = Logging(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
stream=False,
call_type="completion",
start_time=datetime.now(),
litellm_call_id="test-123",
function_id="test-function"
)
# Simulate cost breakdown being stored during cost calculation
logging_obj.set_cost_breakdown(
input_cost=0.001,
output_cost=0.002,
total_cost=0.0035,
cost_for_built_in_tools_cost_usd_dollar=0.0005
)
# Mock response object
mock_response = {
"id": "chatcmpl-123",
"object": "chat.completion",
"model": "gpt-4o",
"usage": {
"prompt_tokens": 10,
"completion_tokens": 20,
"total_tokens": 30,
},
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello! How can I help you today?"
},
"finish_reason": "stop"
}
]
}
# Create kwargs
kwargs = {
"model": "gpt-4o",
"messages": [{"role": "user", "content": "Hello"}],
"response_cost": 0.0035,
"custom_llm_provider": "openai",
}
start_time = datetime.now()
end_time = datetime.now()
# Get the standard logging payload
payload = get_standard_logging_object_payload(
kwargs=kwargs,
init_response_obj=mock_response,
start_time=start_time,
end_time=end_time,
logging_obj=logging_obj,
status="success"
)
# Verify the cost breakdown field is present
assert payload is not None
assert payload["cost_breakdown"] is not None
assert payload["cost_breakdown"]["input_cost"] == 0.001
assert payload["cost_breakdown"]["output_cost"] == 0.002
assert payload["cost_breakdown"]["tool_usage_cost"] == 0.0005
assert payload["cost_breakdown"]["total_cost"] == 0.0035
assert payload["response_cost"] == 0.0035
print("✅ Cost breakdown test passed!")
def test_cost_breakdown_missing_in_standard_logging_payload():
"""
Test that cost breakdown field is None when not available (e.g., for embedding calls)
"""
from litellm.litellm_core_utils.litellm_logging import get_standard_logging_object_payload, Logging
from datetime import datetime
# Create a mock logging object without cost breakdown
logging_obj = Logging(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
stream=False,
call_type="embedding", # Non-completion call type
start_time=datetime.now(),
litellm_call_id="test-123",
function_id="test-function"
)
# No cost breakdown stored
# Mock response object
mock_response = {
"object": "list",
"data": [{"embedding": [0.1, 0.2, 0.3]}],
"model": "text-embedding-ada-002",
"usage": {"prompt_tokens": 10, "total_tokens": 10}
}
kwargs = {
"model": "text-embedding-ada-002",
"input": ["Hello"],
"response_cost": 0.0001,
"custom_llm_provider": "openai",
}
start_time = datetime.now()
end_time = datetime.now()
# Get the standard logging payload
payload = get_standard_logging_object_payload(
kwargs=kwargs,
init_response_obj=mock_response,
start_time=start_time,
end_time=end_time,
logging_obj=logging_obj,
status="success"
)
# Verify the cost breakdown field is None for non-completion calls
assert payload is not None
assert payload["cost_breakdown"] is None
assert payload["response_cost"] == 0.0001
print("✅ Cost breakdown missing test passed!")