Fix: Gemini image_tokens incorrectly treated as text tokens in cost calculation (#17554)

When Gemini image generation models return `text_tokens=0` with `image_tokens > 0`, the cost calculator was assuming no token breakdown existed and treating all completion tokens as text tokens, resulting in ~10x underestimation of costs. Changes: - Fix cost calculation logic to respect token breakdown when image/audio/reasoning tokens are present, even if text_tokens=0 - Add `output_cost_per_image_token` pricing for gemini-3-pro-image-preview models - Add test case reproducing the issue - Add documentation explaining image token pricing Fixes #17410
2025-12-06 11:33:26 +08:00 · 2025-12-05 20:55:38 -03:00
parent 2905feb889
commit 829b06f53f
5 changed files with 107 additions and 2 deletions
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@@ -2006,3 +2006,34 @@ curl -L -X POST 'http://localhost:4000/v1/chat/completions' \
 </TabItem>
 </Tabs>
 ### Image Generation Pricing
 Gemini image generation models (like `gemini-3-pro-image-preview`) return `image_tokens` in the response usage. These tokens are priced differently from text tokens:
 | Token Type | Price per 1M tokens | Price per token |
 |------------|---------------------|-----------------|
 | Text output | $12 | $0.000012 |
 | Image output | $120 | $0.00012 |
 The number of image tokens depends on the output resolution:
 | Resolution | Tokens per image | Cost per image |
 |------------|------------------|----------------|
 | 1K-2K (1024x1024 to 2048x2048) | 1,120 | $0.134 |
 | 4K (4096x4096) | 2,000 | $0.24 |
 LiteLLM automatically calculates costs using `output_cost_per_image_token` from the model pricing configuration.
 **Example response usage:**
 ```json
 {
    "completion_tokens_details": {
        "reasoning_tokens": 225,
        "text_tokens": 0,
        "image_tokens": 1120
    }
 }
 ```
 For more details, see [Google's Gemini pricing documentation](https://ai.google.dev/gemini-api/docs/pricing).
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@@ -583,9 +583,11 @@ def generic_cost_per_token(
        reasoning_tokens = completion_tokens_details["reasoning_tokens"]
        image_tokens = completion_tokens_details["image_tokens"]
-    if text_tokens == 0:
+    # Only assume all tokens are text if there's NO breakdown at all
    # If image_tokens, audio_tokens, or reasoning_tokens exist, respect text_tokens=0
    has_token_breakdown = image_tokens > 0 or audio_tokens > 0 or reasoning_tokens > 0
    if text_tokens == 0 and not has_token_breakdown:
        text_tokens = usage.completion_tokens
    if text_tokens == usage.completion_tokens:
        is_text_tokens_total = True
    ## TEXT COST
    completion_cost = float(text_tokens) * completion_base_cost
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -12147,6 +12147,7 @@
        "max_tokens": 65536,
        "mode": "image_generation",
        "output_cost_per_image": 0.134,
        "output_cost_per_image_token": 1.2e-04,
        "output_cost_per_token": 1.2e-05,
        "output_cost_per_token_batches": 6e-06,
        "source": "https://ai.google.dev/gemini-api/docs/pricing",
@@ -13884,6 +13885,7 @@
        "max_tokens": 65536,
        "mode": "image_generation",
        "output_cost_per_image": 0.134,
        "output_cost_per_image_token": 1.2e-04,
        "output_cost_per_token": 1.2e-05,
        "rpm": 1000,
        "tpm": 4000000,
@@ -25802,6 +25804,7 @@
        "max_tokens": 65536,
        "mode": "image_generation",
        "output_cost_per_image": 0.134,
        "output_cost_per_image_token": 1.2e-04,
        "output_cost_per_token": 1.2e-05,
        "output_cost_per_token_batches": 6e-06,
        "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -12147,6 +12147,7 @@
        "max_tokens": 65536,
        "mode": "image_generation",
        "output_cost_per_image": 0.134,
        "output_cost_per_image_token": 1.2e-04,
        "output_cost_per_token": 1.2e-05,
        "output_cost_per_token_batches": 6e-06,
        "source": "https://ai.google.dev/gemini-api/docs/pricing",
@@ -13884,6 +13885,7 @@
        "max_tokens": 65536,
        "mode": "image_generation",
        "output_cost_per_image": 0.134,
        "output_cost_per_image_token": 1.2e-04,
        "output_cost_per_token": 1.2e-05,
        "rpm": 1000,
        "tpm": 4000000,
@@ -25802,6 +25804,7 @@
        "max_tokens": 65536,
        "mode": "image_generation",
        "output_cost_per_image": 0.134,
        "output_cost_per_image_token": 1.2e-04,
        "output_cost_per_token": 1.2e-05,
        "output_cost_per_token_batches": 6e-06,
        "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
--- a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
+++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
@@ -720,6 +720,72 @@ def test_service_tier_fallback_pricing():
    assert abs(std_cost[1] - expected_standard_completion) < 1e-10, f"Standard completion cost mismatch: {std_cost[1]} vs {expected_standard_completion}"
 def test_gemini_image_generation_cost_with_zero_text_tokens():
    """
    Test that image_tokens are correctly costed when text_tokens=0.
    Reproduces issue #17410: completion_cost calculates incorrectly for
    Gemini-3-pro-image model - image_tokens were treated as text tokens
    when text_tokens=0.
    https://github.com/BerriAI/litellm/issues/17410
    """
    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
    litellm.model_cost = litellm.get_model_cost_map(url="")
    model = "gemini-3-pro-image-preview"
    custom_llm_provider = "vertex_ai"
    # Usage from the issue: text_tokens=0, image_tokens=1120, reasoning_tokens=225
    usage = Usage(
        completion_tokens=1345,
        prompt_tokens=10,
        total_tokens=1355,
        completion_tokens_details=CompletionTokensDetailsWrapper(
            accepted_prediction_tokens=None,
            audio_tokens=None,
            reasoning_tokens=225,
            rejected_prediction_tokens=None,
            text_tokens=0,  # This is the key: text_tokens=0
            image_tokens=1120,
        ),
        prompt_tokens_details=PromptTokensDetailsWrapper(
            audio_tokens=None, cached_tokens=None, text_tokens=10, image_tokens=None
        ),
    )
    model_cost_map = litellm.model_cost[model]
    prompt_cost, completion_cost = generic_cost_per_token(
        model=model,
        usage=usage,
        custom_llm_provider=custom_llm_provider,
    )
    # Expected costs:
    # - text_tokens: 0 * output_cost_per_token = 0
    # - image_tokens: 1120 * output_cost_per_image_token = 1120 * 1.2e-04 = 0.1344
    # - reasoning_tokens: 225 * output_cost_per_token = 225 * 1.2e-05 = 0.0027
    # Total completion: ~0.1371
    output_cost_per_image_token = model_cost_map.get("output_cost_per_image_token", 0)
    output_cost_per_token = model_cost_map.get("output_cost_per_token", 0)
    expected_image_cost = 1120 * output_cost_per_image_token
    expected_reasoning_cost = 225 * output_cost_per_token  # reasoning uses base token cost
    expected_completion_cost = expected_image_cost + expected_reasoning_cost
    # The bug was: all 1345 tokens were treated as text = 1345 * 1.2e-05 = 0.01614
    # Fixed: image_tokens use image pricing = ~0.137
    assert completion_cost > 0.10, (
        f"Completion cost should be > $0.10 (image tokens are expensive), got ${completion_cost:.6f}. "
        f"Bug: tokens may be incorrectly treated as text tokens."
    )
    assert round(completion_cost, 4) == round(expected_completion_cost, 4), (
        f"Expected completion cost ${expected_completion_cost:.6f}, got ${completion_cost:.6f}"
    )
 def test_bedrock_anthropic_prompt_caching():
    """Test Bedrock Anthropic models with prompt caching return correct costs."""
    model = "us.anthropic.claude-sonnet-4-5-20250929-v1:0"