From 829b06f53fb948bf4c48d8b0a3b3acedef2fe441 Mon Sep 17 00:00:00 2001 From: Cesar Garcia <128240629+Chesars@users.noreply.github.com> Date: Fri, 5 Dec 2025 20:55:38 -0300 Subject: [PATCH] Fix: Gemini image_tokens incorrectly treated as text tokens in cost calculation (#17554) When Gemini image generation models return `text_tokens=0` with `image_tokens > 0`, the cost calculator was assuming no token breakdown existed and treating all completion tokens as text tokens, resulting in ~10x underestimation of costs. Changes: - Fix cost calculation logic to respect token breakdown when image/audio/reasoning tokens are present, even if text_tokens=0 - Add `output_cost_per_image_token` pricing for gemini-3-pro-image-preview models - Add test case reproducing the issue - Add documentation explaining image token pricing Fixes #17410 --- docs/my-website/docs/providers/gemini.md | 31 +++++++++ .../litellm_core_utils/llm_cost_calc/utils.py | 6 +- ...odel_prices_and_context_window_backup.json | 3 + model_prices_and_context_window.json | 3 + .../llm_cost_calc/test_llm_cost_calc_utils.py | 66 +++++++++++++++++++ 5 files changed, 107 insertions(+), 2 deletions(-) diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md index 1b21ed8d03..f33c492f18 100644 --- a/docs/my-website/docs/providers/gemini.md +++ b/docs/my-website/docs/providers/gemini.md @@ -2006,3 +2006,34 @@ curl -L -X POST 'http://localhost:4000/v1/chat/completions' \ +### Image Generation Pricing + +Gemini image generation models (like `gemini-3-pro-image-preview`) return `image_tokens` in the response usage. These tokens are priced differently from text tokens: + +| Token Type | Price per 1M tokens | Price per token | +|------------|---------------------|-----------------| +| Text output | $12 | $0.000012 | +| Image output | $120 | $0.00012 | + +The number of image tokens depends on the output resolution: + +| Resolution | Tokens per image | Cost per image | +|------------|------------------|----------------| +| 1K-2K (1024x1024 to 2048x2048) | 1,120 | $0.134 | +| 4K (4096x4096) | 2,000 | $0.24 | + +LiteLLM automatically calculates costs using `output_cost_per_image_token` from the model pricing configuration. + +**Example response usage:** +```json +{ + "completion_tokens_details": { + "reasoning_tokens": 225, + "text_tokens": 0, + "image_tokens": 1120 + } +} +``` + +For more details, see [Google's Gemini pricing documentation](https://ai.google.dev/gemini-api/docs/pricing). + diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py index 9717f442b8..ef2183a455 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/utils.py +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -583,9 +583,11 @@ def generic_cost_per_token( reasoning_tokens = completion_tokens_details["reasoning_tokens"] image_tokens = completion_tokens_details["image_tokens"] - if text_tokens == 0: + # Only assume all tokens are text if there's NO breakdown at all + # If image_tokens, audio_tokens, or reasoning_tokens exist, respect text_tokens=0 + has_token_breakdown = image_tokens > 0 or audio_tokens > 0 or reasoning_tokens > 0 + if text_tokens == 0 and not has_token_breakdown: text_tokens = usage.completion_tokens - if text_tokens == usage.completion_tokens: is_text_tokens_total = True ## TEXT COST completion_cost = float(text_tokens) * completion_base_cost diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d4afde20e9..c26aada0a5 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -12147,6 +12147,7 @@ "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, + "output_cost_per_image_token": 1.2e-04, "output_cost_per_token": 1.2e-05, "output_cost_per_token_batches": 6e-06, "source": "https://ai.google.dev/gemini-api/docs/pricing", @@ -13884,6 +13885,7 @@ "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, + "output_cost_per_image_token": 1.2e-04, "output_cost_per_token": 1.2e-05, "rpm": 1000, "tpm": 4000000, @@ -25802,6 +25804,7 @@ "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, + "output_cost_per_image_token": 1.2e-04, "output_cost_per_token": 1.2e-05, "output_cost_per_token_batches": 6e-06, "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d4afde20e9..c26aada0a5 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -12147,6 +12147,7 @@ "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, + "output_cost_per_image_token": 1.2e-04, "output_cost_per_token": 1.2e-05, "output_cost_per_token_batches": 6e-06, "source": "https://ai.google.dev/gemini-api/docs/pricing", @@ -13884,6 +13885,7 @@ "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, + "output_cost_per_image_token": 1.2e-04, "output_cost_per_token": 1.2e-05, "rpm": 1000, "tpm": 4000000, @@ -25802,6 +25804,7 @@ "max_tokens": 65536, "mode": "image_generation", "output_cost_per_image": 0.134, + "output_cost_per_image_token": 1.2e-04, "output_cost_per_token": 1.2e-05, "output_cost_per_token_batches": 6e-06, "source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image" diff --git a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py index 3d3dd98f49..814d0f23a6 100644 --- a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py +++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py @@ -720,6 +720,72 @@ def test_service_tier_fallback_pricing(): assert abs(std_cost[1] - expected_standard_completion) < 1e-10, f"Standard completion cost mismatch: {std_cost[1]} vs {expected_standard_completion}" +def test_gemini_image_generation_cost_with_zero_text_tokens(): + """ + Test that image_tokens are correctly costed when text_tokens=0. + + Reproduces issue #17410: completion_cost calculates incorrectly for + Gemini-3-pro-image model - image_tokens were treated as text tokens + when text_tokens=0. + + https://github.com/BerriAI/litellm/issues/17410 + """ + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + model = "gemini-3-pro-image-preview" + custom_llm_provider = "vertex_ai" + + # Usage from the issue: text_tokens=0, image_tokens=1120, reasoning_tokens=225 + usage = Usage( + completion_tokens=1345, + prompt_tokens=10, + total_tokens=1355, + completion_tokens_details=CompletionTokensDetailsWrapper( + accepted_prediction_tokens=None, + audio_tokens=None, + reasoning_tokens=225, + rejected_prediction_tokens=None, + text_tokens=0, # This is the key: text_tokens=0 + image_tokens=1120, + ), + prompt_tokens_details=PromptTokensDetailsWrapper( + audio_tokens=None, cached_tokens=None, text_tokens=10, image_tokens=None + ), + ) + + model_cost_map = litellm.model_cost[model] + prompt_cost, completion_cost = generic_cost_per_token( + model=model, + usage=usage, + custom_llm_provider=custom_llm_provider, + ) + + # Expected costs: + # - text_tokens: 0 * output_cost_per_token = 0 + # - image_tokens: 1120 * output_cost_per_image_token = 1120 * 1.2e-04 = 0.1344 + # - reasoning_tokens: 225 * output_cost_per_token = 225 * 1.2e-05 = 0.0027 + # Total completion: ~0.1371 + + output_cost_per_image_token = model_cost_map.get("output_cost_per_image_token", 0) + output_cost_per_token = model_cost_map.get("output_cost_per_token", 0) + + expected_image_cost = 1120 * output_cost_per_image_token + expected_reasoning_cost = 225 * output_cost_per_token # reasoning uses base token cost + expected_completion_cost = expected_image_cost + expected_reasoning_cost + + # The bug was: all 1345 tokens were treated as text = 1345 * 1.2e-05 = 0.01614 + # Fixed: image_tokens use image pricing = ~0.137 + + assert completion_cost > 0.10, ( + f"Completion cost should be > $0.10 (image tokens are expensive), got ${completion_cost:.6f}. " + f"Bug: tokens may be incorrectly treated as text tokens." + ) + assert round(completion_cost, 4) == round(expected_completion_cost, 4), ( + f"Expected completion cost ${expected_completion_cost:.6f}, got ${completion_cost:.6f}" + ) + + def test_bedrock_anthropic_prompt_caching(): """Test Bedrock Anthropic models with prompt caching return correct costs.""" model = "us.anthropic.claude-sonnet-4-5-20250929-v1:0"