mirror of
https://github.com/BerriAI/litellm.git
synced 2025-12-06 11:33:26 +08:00
[Feat] - Cost Tracking - show input, output, tool call cost breakdown in StandardLoggingPayload (#14921)
* add new CostBreakdown typed dict * fix CostBreakdown type * fix fix _store_cost_breakdown_in_logging_obj * fix CostBreakdown * test_cost_breakdown_in_standard_logging_payload
This commit is contained in:
@@ -11,6 +11,7 @@ Found under `kwargs["standard_logging_object"]`. This is a standard payload, log
|
||||
| `trace_id` | `str` | Trace multiple LLM calls belonging to same overall request |
|
||||
| `call_type` | `str` | Type of call |
|
||||
| `response_cost` | `float` | Cost of the response in USD ($) |
|
||||
| `cost_breakdown` | `Optional[CostBreakdown]` | Detailed cost breakdown object |
|
||||
| `response_cost_failure_debug_info` | `StandardLoggingModelCostFailureDebugInformation` | Debug information if cost tracking fails |
|
||||
| `status` | `StandardLoggingPayloadStatus` | Status of the payload |
|
||||
| `total_tokens` | `int` | Total number of tokens |
|
||||
@@ -39,6 +40,29 @@ Found under `kwargs["standard_logging_object"]`. This is a standard payload, log
|
||||
| `model_parameters` | `dict` | Model parameters |
|
||||
| `hidden_params` | `StandardLoggingHiddenParams` | Hidden parameters |
|
||||
|
||||
## Cost Breakdown
|
||||
|
||||
The `cost_breakdown` field provides detailed cost breakdown for completion requests as a `CostBreakdown` object containing:
|
||||
|
||||
- **`input_cost`**: Cost of input/prompt tokens including cache creation tokens
|
||||
- **`output_cost`**: Cost of output/completion tokens (including reasoning tokens if applicable)
|
||||
- **`tool_usage_cost`**: Cost of built-in tools usage (e.g., web search, code interpreter)
|
||||
- **`total_cost`**: Total cost of input + output + tool usage
|
||||
|
||||
**Note**: This field is populated for all call types. For non-completion calls, `input_cost` and `output_cost` may be 0.
|
||||
|
||||
The total cost relationship is: `response_cost = cost_breakdown.total_cost`
|
||||
|
||||
### CostBreakdown Type
|
||||
|
||||
```python
|
||||
class CostBreakdown(TypedDict, total=False):
|
||||
input_cost: float # Cost of input/prompt tokens in USD
|
||||
output_cost: float # Cost of output/completion tokens in USD (includes reasoning)
|
||||
tool_usage_cost: float # Cost of built-in tools usage in USD
|
||||
total_cost: float # Total cost in USD
|
||||
```
|
||||
|
||||
## StandardLoggingUserAPIKeyMetadata
|
||||
|
||||
| Field | Type | Description |
|
||||
|
||||
@@ -584,6 +584,42 @@ def _infer_call_type(
|
||||
return call_type
|
||||
|
||||
|
||||
def _store_cost_breakdown_in_logging_obj(
|
||||
litellm_logging_obj: Optional[LitellmLoggingObject],
|
||||
prompt_tokens_cost_usd_dollar: float,
|
||||
completion_tokens_cost_usd_dollar: float,
|
||||
cost_for_built_in_tools_cost_usd_dollar: float,
|
||||
total_cost_usd_dollar: float,
|
||||
) -> None:
|
||||
"""
|
||||
Helper function to store cost breakdown in the logging object.
|
||||
|
||||
Args:
|
||||
litellm_logging_obj: The logging object to store breakdown in
|
||||
call_type: Type of call (completion, etc.)
|
||||
prompt_tokens_cost_usd_dollar: Cost of input tokens
|
||||
completion_tokens_cost_usd_dollar: Cost of completion tokens (includes reasoning if applicable)
|
||||
cost_for_built_in_tools_cost_usd_dollar: Cost of built-in tools
|
||||
total_cost_usd_dollar: Total cost of request
|
||||
"""
|
||||
if (litellm_logging_obj is None):
|
||||
return
|
||||
|
||||
try:
|
||||
# Store the cost breakdown - reasoning cost is 0 since it's already included in completion cost
|
||||
litellm_logging_obj.set_cost_breakdown(
|
||||
input_cost=prompt_tokens_cost_usd_dollar,
|
||||
output_cost=completion_tokens_cost_usd_dollar,
|
||||
total_cost=total_cost_usd_dollar,
|
||||
cost_for_built_in_tools_cost_usd_dollar=cost_for_built_in_tools_cost_usd_dollar
|
||||
)
|
||||
|
||||
except Exception as breakdown_error:
|
||||
verbose_logger.debug(f"Error storing cost breakdown: {str(breakdown_error)}")
|
||||
# Don't fail the main cost calculation if breakdown storage fails
|
||||
pass
|
||||
|
||||
|
||||
def completion_cost( # noqa: PLR0915
|
||||
completion_response=None,
|
||||
model: Optional[str] = None,
|
||||
@@ -923,7 +959,7 @@ def completion_cost( # noqa: PLR0915
|
||||
_final_cost = (
|
||||
prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||
)
|
||||
_final_cost += (
|
||||
cost_for_built_in_tools = (
|
||||
StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
|
||||
model=model,
|
||||
response_object=completion_response,
|
||||
@@ -932,6 +968,17 @@ def completion_cost( # noqa: PLR0915
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
)
|
||||
_final_cost += cost_for_built_in_tools
|
||||
|
||||
# Store cost breakdown in logging object if available
|
||||
_store_cost_breakdown_in_logging_obj(
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
prompt_tokens_cost_usd_dollar=prompt_tokens_cost_usd_dollar,
|
||||
completion_tokens_cost_usd_dollar=completion_tokens_cost_usd_dollar,
|
||||
cost_for_built_in_tools_cost_usd_dollar=cost_for_built_in_tools,
|
||||
total_cost_usd_dollar=_final_cost
|
||||
)
|
||||
|
||||
return _final_cost
|
||||
except Exception as e:
|
||||
verbose_logger.debug(
|
||||
|
||||
@@ -84,6 +84,7 @@ from litellm.types.rerank import RerankResponse
|
||||
from litellm.types.router import CustomPricingLiteLLMParams
|
||||
from litellm.types.utils import (
|
||||
CallTypes,
|
||||
CostBreakdown,
|
||||
CostResponseTypes,
|
||||
DynamicPromptManagementParamLiteral,
|
||||
EmbeddingResponse,
|
||||
@@ -343,6 +344,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||
litellm_params = scrub_sensitive_keys_in_metadata(litellm_params)
|
||||
|
||||
self.litellm_params = litellm_params
|
||||
|
||||
# Initialize cost breakdown field
|
||||
self.cost_breakdown: Optional[CostBreakdown] = None
|
||||
|
||||
self.model_call_details: Dict[str, Any] = {
|
||||
"litellm_trace_id": litellm_trace_id,
|
||||
@@ -1155,6 +1159,33 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||
- self.model_call_details.get("start_time", datetime.datetime.now())
|
||||
).total_seconds() * 1000
|
||||
|
||||
def set_cost_breakdown(
|
||||
self,
|
||||
input_cost: float,
|
||||
output_cost: float,
|
||||
total_cost: float,
|
||||
cost_for_built_in_tools_cost_usd_dollar: float,
|
||||
) -> None:
|
||||
"""
|
||||
Helper method to store cost breakdown in the logging object.
|
||||
|
||||
Args:
|
||||
input_cost: Cost of input/prompt tokens
|
||||
output_cost: Cost of output/completion tokens
|
||||
cost_for_built_in_tools_cost_usd_dollar: Cost of built-in tools
|
||||
total_cost: Total cost of request
|
||||
"""
|
||||
|
||||
self.cost_breakdown = CostBreakdown(
|
||||
input_cost=input_cost,
|
||||
output_cost=output_cost,
|
||||
total_cost=total_cost,
|
||||
tool_usage_cost=cost_for_built_in_tools_cost_usd_dollar
|
||||
)
|
||||
verbose_logger.debug(
|
||||
f"Cost breakdown set - input: {input_cost}, output: {output_cost}, cost_for_built_in_tools_cost_usd_dollar: {cost_for_built_in_tools_cost_usd_dollar}, total: {total_cost}"
|
||||
)
|
||||
|
||||
def _response_cost_calculator(
|
||||
self,
|
||||
result: Union[
|
||||
@@ -4533,6 +4564,7 @@ def get_standard_logging_object_payload(
|
||||
metadata=clean_metadata,
|
||||
cache_key=clean_hidden_params["cache_key"],
|
||||
response_cost=response_cost,
|
||||
cost_breakdown=logging_obj.cost_breakdown,
|
||||
total_tokens=usage.total_tokens,
|
||||
prompt_tokens=usage.prompt_tokens,
|
||||
completion_tokens=usage.completion_tokens,
|
||||
|
||||
@@ -36,3 +36,10 @@ guardrails:
|
||||
breakdown: true
|
||||
payload: true
|
||||
dev_info: true
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["datadog"]
|
||||
datadog_params:
|
||||
turn_off_message_logging: true
|
||||
datadog_llm_observability_params:
|
||||
turn_off_message_logging: true
|
||||
@@ -2060,12 +2060,23 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
|
||||
StandardLoggingPayloadStatus = Literal["success", "failure"]
|
||||
|
||||
|
||||
class CostBreakdown(TypedDict):
|
||||
"""
|
||||
Detailed cost breakdown for a request
|
||||
"""
|
||||
input_cost: float # Cost of input/prompt tokens
|
||||
output_cost: float # Cost of output/completion tokens (includes reasoning if applicable)
|
||||
total_cost: float # Total cost (input + output + tool usage)
|
||||
tool_usage_cost: float # Cost of usage of built-in tools
|
||||
|
||||
|
||||
class StandardLoggingPayload(TypedDict):
|
||||
id: str
|
||||
trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
|
||||
call_type: str
|
||||
stream: Optional[bool]
|
||||
response_cost: float
|
||||
cost_breakdown: Optional[CostBreakdown] # Detailed cost breakdown
|
||||
response_cost_failure_debug_info: Optional[
|
||||
StandardLoggingModelCostFailureDebugInformation
|
||||
]
|
||||
|
||||
@@ -516,3 +516,143 @@ def test_standard_logging_metadata_requester_metadata(
|
||||
):
|
||||
result = StandardLoggingPayloadSetup.get_standard_logging_metadata(metadata)
|
||||
assert result["requester_metadata"] == expected_requester_metadata
|
||||
|
||||
|
||||
def test_cost_breakdown_in_standard_logging_payload():
|
||||
"""
|
||||
Test that cost breakdown fields are properly included in StandardLoggingPayload.
|
||||
Tests input_cost, output_cost, tool_usage_cost, and total_cost fields.
|
||||
"""
|
||||
from litellm.litellm_core_utils.litellm_logging import get_standard_logging_object_payload, Logging
|
||||
from litellm.types.utils import Usage
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
# Create a mock logging object with cost breakdown
|
||||
logging_obj = Logging(
|
||||
model="gpt-4o",
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
stream=False,
|
||||
call_type="completion",
|
||||
start_time=datetime.now(),
|
||||
litellm_call_id="test-123",
|
||||
function_id="test-function"
|
||||
)
|
||||
|
||||
# Simulate cost breakdown being stored during cost calculation
|
||||
logging_obj.set_cost_breakdown(
|
||||
input_cost=0.001,
|
||||
output_cost=0.002,
|
||||
total_cost=0.0035,
|
||||
cost_for_built_in_tools_cost_usd_dollar=0.0005
|
||||
)
|
||||
|
||||
# Mock response object
|
||||
mock_response = {
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion",
|
||||
"model": "gpt-4o",
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
},
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Hello! How can I help you today?"
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Create kwargs
|
||||
kwargs = {
|
||||
"model": "gpt-4o",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"response_cost": 0.0035,
|
||||
"custom_llm_provider": "openai",
|
||||
}
|
||||
|
||||
start_time = datetime.now()
|
||||
end_time = datetime.now()
|
||||
|
||||
# Get the standard logging payload
|
||||
payload = get_standard_logging_object_payload(
|
||||
kwargs=kwargs,
|
||||
init_response_obj=mock_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=logging_obj,
|
||||
status="success"
|
||||
)
|
||||
|
||||
# Verify the cost breakdown field is present
|
||||
assert payload is not None
|
||||
assert payload["cost_breakdown"] is not None
|
||||
assert payload["cost_breakdown"]["input_cost"] == 0.001
|
||||
assert payload["cost_breakdown"]["output_cost"] == 0.002
|
||||
assert payload["cost_breakdown"]["tool_usage_cost"] == 0.0005
|
||||
assert payload["cost_breakdown"]["total_cost"] == 0.0035
|
||||
assert payload["response_cost"] == 0.0035
|
||||
|
||||
print("✅ Cost breakdown test passed!")
|
||||
|
||||
|
||||
def test_cost_breakdown_missing_in_standard_logging_payload():
|
||||
"""
|
||||
Test that cost breakdown field is None when not available (e.g., for embedding calls)
|
||||
"""
|
||||
from litellm.litellm_core_utils.litellm_logging import get_standard_logging_object_payload, Logging
|
||||
from datetime import datetime
|
||||
|
||||
# Create a mock logging object without cost breakdown
|
||||
logging_obj = Logging(
|
||||
model="gpt-4o",
|
||||
messages=[{"role": "user", "content": "Hello"}],
|
||||
stream=False,
|
||||
call_type="embedding", # Non-completion call type
|
||||
start_time=datetime.now(),
|
||||
litellm_call_id="test-123",
|
||||
function_id="test-function"
|
||||
)
|
||||
|
||||
# No cost breakdown stored
|
||||
|
||||
# Mock response object
|
||||
mock_response = {
|
||||
"object": "list",
|
||||
"data": [{"embedding": [0.1, 0.2, 0.3]}],
|
||||
"model": "text-embedding-ada-002",
|
||||
"usage": {"prompt_tokens": 10, "total_tokens": 10}
|
||||
}
|
||||
|
||||
kwargs = {
|
||||
"model": "text-embedding-ada-002",
|
||||
"input": ["Hello"],
|
||||
"response_cost": 0.0001,
|
||||
"custom_llm_provider": "openai",
|
||||
}
|
||||
|
||||
start_time = datetime.now()
|
||||
end_time = datetime.now()
|
||||
|
||||
# Get the standard logging payload
|
||||
payload = get_standard_logging_object_payload(
|
||||
kwargs=kwargs,
|
||||
init_response_obj=mock_response,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=logging_obj,
|
||||
status="success"
|
||||
)
|
||||
|
||||
# Verify the cost breakdown field is None for non-completion calls
|
||||
assert payload is not None
|
||||
assert payload["cost_breakdown"] is None
|
||||
assert payload["response_cost"] == 0.0001
|
||||
|
||||
print("✅ Cost breakdown missing test passed!")
|
||||
|
||||
Reference in New Issue
Block a user