[Feat] - Cost Tracking - show input, output, tool call cost breakdown in StandardLoggingPayload (#14921)

* add new CostBreakdown typed dict * fix CostBreakdown type * fix fix _store_cost_breakdown_in_logging_obj * fix CostBreakdown * test_cost_breakdown_in_standard_logging_payload
2025-12-06 11:33:26 +08:00 · 2025-09-25 15:48:22 -07:00
parent eaa04cd8ce
commit 2dd38420a7
6 changed files with 262 additions and 1 deletions
--- a/docs/my-website/docs/proxy/logging_spec.md
+++ b/docs/my-website/docs/proxy/logging_spec.md
@@ -11,6 +11,7 @@ Found under `kwargs["standard_logging_object"]`. This is a standard payload, log
 | `trace_id` | `str` | Trace multiple LLM calls belonging to same overall request |
 | `call_type` | `str` | Type of call |
 | `response_cost` | `float` | Cost of the response in USD ($) |
+| `cost_breakdown` | `Optional[CostBreakdown]` | Detailed cost breakdown object |
 | `response_cost_failure_debug_info` | `StandardLoggingModelCostFailureDebugInformation` | Debug information if cost tracking fails |
 | `status` | `StandardLoggingPayloadStatus` | Status of the payload |
 | `total_tokens` | `int` | Total number of tokens |
@@ -39,6 +40,29 @@ Found under `kwargs["standard_logging_object"]`. This is a standard payload, log
 | `model_parameters` | `dict` | Model parameters |
 | `hidden_params` | `StandardLoggingHiddenParams` | Hidden parameters |

+## Cost Breakdown
+
+The `cost_breakdown` field provides detailed cost breakdown for completion requests as a `CostBreakdown` object containing:
+
+- **`input_cost`**: Cost of input/prompt tokens including cache creation tokens
+- **`output_cost`**: Cost of output/completion tokens (including reasoning tokens if applicable)
+- **`tool_usage_cost`**: Cost of built-in tools usage (e.g., web search, code interpreter)
+- **`total_cost`**: Total cost of input + output + tool usage
+
+**Note**: This field is populated for all call types. For non-completion calls, `input_cost` and `output_cost` may be 0.
+
+The total cost relationship is: `response_cost = cost_breakdown.total_cost`
+
+### CostBreakdown Type
+
+```python
+class CostBreakdown(TypedDict, total=False):
+    input_cost: float        # Cost of input/prompt tokens in USD
+    output_cost: float       # Cost of output/completion tokens in USD (includes reasoning)
+    tool_usage_cost: float   # Cost of built-in tools usage in USD
+    total_cost: float        # Total cost in USD
+```
+
 ## StandardLoggingUserAPIKeyMetadata

 | Field | Type | Description |
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -584,6 +584,42 @@ def _infer_call_type(
    return call_type


+def _store_cost_breakdown_in_logging_obj(
+    litellm_logging_obj: Optional[LitellmLoggingObject],
+    prompt_tokens_cost_usd_dollar: float,
+    completion_tokens_cost_usd_dollar: float,
+    cost_for_built_in_tools_cost_usd_dollar: float,
+    total_cost_usd_dollar: float,
+) -> None:
+    """
+    Helper function to store cost breakdown in the logging object.
+    
+    Args:
+        litellm_logging_obj: The logging object to store breakdown in
+        call_type: Type of call (completion, etc.)
+        prompt_tokens_cost_usd_dollar: Cost of input tokens
+        completion_tokens_cost_usd_dollar: Cost of completion tokens (includes reasoning if applicable)
+        cost_for_built_in_tools_cost_usd_dollar: Cost of built-in tools
+        total_cost_usd_dollar: Total cost of request
+    """
+    if (litellm_logging_obj is None):
+        return
+    
+    try:
+        # Store the cost breakdown - reasoning cost is 0 since it's already included in completion cost
+        litellm_logging_obj.set_cost_breakdown(
+            input_cost=prompt_tokens_cost_usd_dollar,
+            output_cost=completion_tokens_cost_usd_dollar,
+            total_cost=total_cost_usd_dollar,
+            cost_for_built_in_tools_cost_usd_dollar=cost_for_built_in_tools_cost_usd_dollar
+        )
+        
+    except Exception as breakdown_error:
+        verbose_logger.debug(f"Error storing cost breakdown: {str(breakdown_error)}")
+        # Don't fail the main cost calculation if breakdown storage fails
+        pass
+
+
 def completion_cost(  # noqa: PLR0915
    completion_response=None,
    model: Optional[str] = None,
@@ -923,7 +959,7 @@ def completion_cost(  # noqa: PLR0915
                _final_cost = (
                    prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
                )
-                _final_cost += (
+                cost_for_built_in_tools = (
                    StandardBuiltInToolCostTracking.get_cost_for_built_in_tools(
                        model=model,
                        response_object=completion_response,
@@ -932,6 +968,17 @@ def completion_cost(  # noqa: PLR0915
                        custom_llm_provider=custom_llm_provider,
                    )
                )
+                _final_cost += cost_for_built_in_tools
+                
+                # Store cost breakdown in logging object if available
+                _store_cost_breakdown_in_logging_obj(
+                    litellm_logging_obj=litellm_logging_obj,
+                    prompt_tokens_cost_usd_dollar=prompt_tokens_cost_usd_dollar,
+                    completion_tokens_cost_usd_dollar=completion_tokens_cost_usd_dollar,
+                    cost_for_built_in_tools_cost_usd_dollar=cost_for_built_in_tools,
+                    total_cost_usd_dollar=_final_cost
+                )
+                
                return _final_cost
            except Exception as e:
                verbose_logger.debug(
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -84,6 +84,7 @@ from litellm.types.rerank import RerankResponse
 from litellm.types.router import CustomPricingLiteLLMParams
 from litellm.types.utils import (
    CallTypes,
+    CostBreakdown,
    CostResponseTypes,
    DynamicPromptManagementParamLiteral,
    EmbeddingResponse,
@@ -343,6 +344,9 @@ class Logging(LiteLLMLoggingBaseClass):
            litellm_params = scrub_sensitive_keys_in_metadata(litellm_params)

        self.litellm_params = litellm_params
+        
+        # Initialize cost breakdown field
+        self.cost_breakdown: Optional[CostBreakdown] = None

        self.model_call_details: Dict[str, Any] = {
            "litellm_trace_id": litellm_trace_id,
@@ -1155,6 +1159,33 @@ class Logging(LiteLLMLoggingBaseClass):
            - self.model_call_details.get("start_time", datetime.datetime.now())
        ).total_seconds() * 1000

+    def set_cost_breakdown(
+        self,
+        input_cost: float,
+        output_cost: float,
+        total_cost: float,
+        cost_for_built_in_tools_cost_usd_dollar: float,
+    ) -> None:
+        """
+        Helper method to store cost breakdown in the logging object.
+        
+        Args:
+            input_cost: Cost of input/prompt tokens
+            output_cost: Cost of output/completion tokens 
+            cost_for_built_in_tools_cost_usd_dollar: Cost of built-in tools
+            total_cost: Total cost of request
+        """
+        
+        self.cost_breakdown = CostBreakdown(
+            input_cost=input_cost,
+            output_cost=output_cost,
+            total_cost=total_cost,
+            tool_usage_cost=cost_for_built_in_tools_cost_usd_dollar
+        )
+        verbose_logger.debug(
+            f"Cost breakdown set - input: {input_cost}, output: {output_cost}, cost_for_built_in_tools_cost_usd_dollar: {cost_for_built_in_tools_cost_usd_dollar}, total: {total_cost}"
+        )
+
    def _response_cost_calculator(
        self,
        result: Union[
@@ -4533,6 +4564,7 @@ def get_standard_logging_object_payload(
            metadata=clean_metadata,
            cache_key=clean_hidden_params["cache_key"],
            response_cost=response_cost,
+            cost_breakdown=logging_obj.cost_breakdown,
            total_tokens=usage.total_tokens,
            prompt_tokens=usage.prompt_tokens,
            completion_tokens=usage.completion_tokens,
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -36,3 +36,10 @@ guardrails:
      breakdown: true
      payload: true
      dev_info: true
+
+litellm_settings:
+  callbacks: ["datadog"]
+  datadog_params:
+    turn_off_message_logging: true
+  datadog_llm_observability_params:
+    turn_off_message_logging: true
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -2060,12 +2060,23 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
 StandardLoggingPayloadStatus = Literal["success", "failure"]


+class CostBreakdown(TypedDict):
+    """
+    Detailed cost breakdown for a request
+    """
+    input_cost: float  # Cost of input/prompt tokens
+    output_cost: float  # Cost of output/completion tokens (includes reasoning if applicable)
+    total_cost: float  # Total cost (input + output + tool usage)
+    tool_usage_cost: float  # Cost of usage of built-in tools
+
+
 class StandardLoggingPayload(TypedDict):
    id: str
    trace_id: str  # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
    call_type: str
    stream: Optional[bool]
    response_cost: float
+    cost_breakdown: Optional[CostBreakdown]  # Detailed cost breakdown
    response_cost_failure_debug_info: Optional[
        StandardLoggingModelCostFailureDebugInformation
    ]
--- a/tests/logging_callback_tests/test_standard_logging_payload.py
+++ b/tests/logging_callback_tests/test_standard_logging_payload.py
@@ -516,3 +516,143 @@ def test_standard_logging_metadata_requester_metadata(
 ):
    result = StandardLoggingPayloadSetup.get_standard_logging_metadata(metadata)
    assert result["requester_metadata"] == expected_requester_metadata
+
+
+def test_cost_breakdown_in_standard_logging_payload():
+    """
+    Test that cost breakdown fields are properly included in StandardLoggingPayload.
+    Tests input_cost, output_cost, tool_usage_cost, and total_cost fields.
+    """
+    from litellm.litellm_core_utils.litellm_logging import get_standard_logging_object_payload, Logging
+    from litellm.types.utils import Usage
+    from datetime import datetime
+    import time
+    
+    # Create a mock logging object with cost breakdown
+    logging_obj = Logging(
+        model="gpt-4o",
+        messages=[{"role": "user", "content": "Hello"}],
+        stream=False,
+        call_type="completion",
+        start_time=datetime.now(),
+        litellm_call_id="test-123",
+        function_id="test-function"
+    )
+    
+    # Simulate cost breakdown being stored during cost calculation
+    logging_obj.set_cost_breakdown(
+        input_cost=0.001,
+        output_cost=0.002,
+        total_cost=0.0035,
+        cost_for_built_in_tools_cost_usd_dollar=0.0005
+    )
+    
+    # Mock response object
+    mock_response = {
+        "id": "chatcmpl-123",
+        "object": "chat.completion",
+        "model": "gpt-4o",
+        "usage": {
+            "prompt_tokens": 10,
+            "completion_tokens": 20,
+            "total_tokens": 30,
+        },
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "Hello! How can I help you today?"
+                },
+                "finish_reason": "stop"
+            }
+        ]
+    }
+    
+    # Create kwargs
+    kwargs = {
+        "model": "gpt-4o",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "response_cost": 0.0035,
+        "custom_llm_provider": "openai",
+    }
+    
+    start_time = datetime.now()
+    end_time = datetime.now()
+    
+    # Get the standard logging payload
+    payload = get_standard_logging_object_payload(
+        kwargs=kwargs,
+        init_response_obj=mock_response,
+        start_time=start_time,
+        end_time=end_time,
+        logging_obj=logging_obj,
+        status="success"
+    )
+    
+    # Verify the cost breakdown field is present
+    assert payload is not None
+    assert payload["cost_breakdown"] is not None
+    assert payload["cost_breakdown"]["input_cost"] == 0.001
+    assert payload["cost_breakdown"]["output_cost"] == 0.002
+    assert payload["cost_breakdown"]["tool_usage_cost"] == 0.0005
+    assert payload["cost_breakdown"]["total_cost"] == 0.0035
+    assert payload["response_cost"] == 0.0035
+    
+    print("✅ Cost breakdown test passed!")
+
+
+def test_cost_breakdown_missing_in_standard_logging_payload():
+    """
+    Test that cost breakdown field is None when not available (e.g., for embedding calls)
+    """
+    from litellm.litellm_core_utils.litellm_logging import get_standard_logging_object_payload, Logging
+    from datetime import datetime
+    
+    # Create a mock logging object without cost breakdown
+    logging_obj = Logging(
+        model="gpt-4o",
+        messages=[{"role": "user", "content": "Hello"}],
+        stream=False,
+        call_type="embedding",  # Non-completion call type
+        start_time=datetime.now(),
+        litellm_call_id="test-123",
+        function_id="test-function"
+    )
+    
+    # No cost breakdown stored
+    
+    # Mock response object
+    mock_response = {
+        "object": "list",
+        "data": [{"embedding": [0.1, 0.2, 0.3]}],
+        "model": "text-embedding-ada-002",
+        "usage": {"prompt_tokens": 10, "total_tokens": 10}
+    }
+    
+    kwargs = {
+        "model": "text-embedding-ada-002",
+        "input": ["Hello"],
+        "response_cost": 0.0001,
+        "custom_llm_provider": "openai",
+    }
+    
+    start_time = datetime.now()
+    end_time = datetime.now()
+    
+    # Get the standard logging payload
+    payload = get_standard_logging_object_payload(
+        kwargs=kwargs,
+        init_response_obj=mock_response,
+        start_time=start_time,
+        end_time=end_time,
+        logging_obj=logging_obj,
+        status="success"
+    )
+    
+    # Verify the cost breakdown field is None for non-completion calls
+    assert payload is not None
+    assert payload["cost_breakdown"] is None
+    assert payload["response_cost"] == 0.0001
+    
+    print("✅ Cost breakdown missing test passed!")