mirror of
https://github.com/BerriAI/litellm.git
synced 2025-12-06 11:33:26 +08:00
Fix: Gemini image_tokens incorrectly treated as text tokens in cost calculation (#17554)
When Gemini image generation models return `text_tokens=0` with `image_tokens > 0`, the cost calculator was assuming no token breakdown existed and treating all completion tokens as text tokens, resulting in ~10x underestimation of costs. Changes: - Fix cost calculation logic to respect token breakdown when image/audio/reasoning tokens are present, even if text_tokens=0 - Add `output_cost_per_image_token` pricing for gemini-3-pro-image-preview models - Add test case reproducing the issue - Add documentation explaining image token pricing Fixes #17410
This commit is contained in:
@@ -2006,3 +2006,34 @@ curl -L -X POST 'http://localhost:4000/v1/chat/completions' \
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
### Image Generation Pricing
|
||||
|
||||
Gemini image generation models (like `gemini-3-pro-image-preview`) return `image_tokens` in the response usage. These tokens are priced differently from text tokens:
|
||||
|
||||
| Token Type | Price per 1M tokens | Price per token |
|
||||
|------------|---------------------|-----------------|
|
||||
| Text output | $12 | $0.000012 |
|
||||
| Image output | $120 | $0.00012 |
|
||||
|
||||
The number of image tokens depends on the output resolution:
|
||||
|
||||
| Resolution | Tokens per image | Cost per image |
|
||||
|------------|------------------|----------------|
|
||||
| 1K-2K (1024x1024 to 2048x2048) | 1,120 | $0.134 |
|
||||
| 4K (4096x4096) | 2,000 | $0.24 |
|
||||
|
||||
LiteLLM automatically calculates costs using `output_cost_per_image_token` from the model pricing configuration.
|
||||
|
||||
**Example response usage:**
|
||||
```json
|
||||
{
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 225,
|
||||
"text_tokens": 0,
|
||||
"image_tokens": 1120
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
For more details, see [Google's Gemini pricing documentation](https://ai.google.dev/gemini-api/docs/pricing).
|
||||
|
||||
|
||||
@@ -583,9 +583,11 @@ def generic_cost_per_token(
|
||||
reasoning_tokens = completion_tokens_details["reasoning_tokens"]
|
||||
image_tokens = completion_tokens_details["image_tokens"]
|
||||
|
||||
if text_tokens == 0:
|
||||
# Only assume all tokens are text if there's NO breakdown at all
|
||||
# If image_tokens, audio_tokens, or reasoning_tokens exist, respect text_tokens=0
|
||||
has_token_breakdown = image_tokens > 0 or audio_tokens > 0 or reasoning_tokens > 0
|
||||
if text_tokens == 0 and not has_token_breakdown:
|
||||
text_tokens = usage.completion_tokens
|
||||
if text_tokens == usage.completion_tokens:
|
||||
is_text_tokens_total = True
|
||||
## TEXT COST
|
||||
completion_cost = float(text_tokens) * completion_base_cost
|
||||
|
||||
@@ -12147,6 +12147,7 @@
|
||||
"max_tokens": 65536,
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.134,
|
||||
"output_cost_per_image_token": 1.2e-04,
|
||||
"output_cost_per_token": 1.2e-05,
|
||||
"output_cost_per_token_batches": 6e-06,
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing",
|
||||
@@ -13884,6 +13885,7 @@
|
||||
"max_tokens": 65536,
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.134,
|
||||
"output_cost_per_image_token": 1.2e-04,
|
||||
"output_cost_per_token": 1.2e-05,
|
||||
"rpm": 1000,
|
||||
"tpm": 4000000,
|
||||
@@ -25802,6 +25804,7 @@
|
||||
"max_tokens": 65536,
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.134,
|
||||
"output_cost_per_image_token": 1.2e-04,
|
||||
"output_cost_per_token": 1.2e-05,
|
||||
"output_cost_per_token_batches": 6e-06,
|
||||
"source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
|
||||
|
||||
@@ -12147,6 +12147,7 @@
|
||||
"max_tokens": 65536,
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.134,
|
||||
"output_cost_per_image_token": 1.2e-04,
|
||||
"output_cost_per_token": 1.2e-05,
|
||||
"output_cost_per_token_batches": 6e-06,
|
||||
"source": "https://ai.google.dev/gemini-api/docs/pricing",
|
||||
@@ -13884,6 +13885,7 @@
|
||||
"max_tokens": 65536,
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.134,
|
||||
"output_cost_per_image_token": 1.2e-04,
|
||||
"output_cost_per_token": 1.2e-05,
|
||||
"rpm": 1000,
|
||||
"tpm": 4000000,
|
||||
@@ -25802,6 +25804,7 @@
|
||||
"max_tokens": 65536,
|
||||
"mode": "image_generation",
|
||||
"output_cost_per_image": 0.134,
|
||||
"output_cost_per_image_token": 1.2e-04,
|
||||
"output_cost_per_token": 1.2e-05,
|
||||
"output_cost_per_token_batches": 6e-06,
|
||||
"source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
|
||||
|
||||
@@ -720,6 +720,72 @@ def test_service_tier_fallback_pricing():
|
||||
assert abs(std_cost[1] - expected_standard_completion) < 1e-10, f"Standard completion cost mismatch: {std_cost[1]} vs {expected_standard_completion}"
|
||||
|
||||
|
||||
def test_gemini_image_generation_cost_with_zero_text_tokens():
|
||||
"""
|
||||
Test that image_tokens are correctly costed when text_tokens=0.
|
||||
|
||||
Reproduces issue #17410: completion_cost calculates incorrectly for
|
||||
Gemini-3-pro-image model - image_tokens were treated as text tokens
|
||||
when text_tokens=0.
|
||||
|
||||
https://github.com/BerriAI/litellm/issues/17410
|
||||
"""
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
model = "gemini-3-pro-image-preview"
|
||||
custom_llm_provider = "vertex_ai"
|
||||
|
||||
# Usage from the issue: text_tokens=0, image_tokens=1120, reasoning_tokens=225
|
||||
usage = Usage(
|
||||
completion_tokens=1345,
|
||||
prompt_tokens=10,
|
||||
total_tokens=1355,
|
||||
completion_tokens_details=CompletionTokensDetailsWrapper(
|
||||
accepted_prediction_tokens=None,
|
||||
audio_tokens=None,
|
||||
reasoning_tokens=225,
|
||||
rejected_prediction_tokens=None,
|
||||
text_tokens=0, # This is the key: text_tokens=0
|
||||
image_tokens=1120,
|
||||
),
|
||||
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||
audio_tokens=None, cached_tokens=None, text_tokens=10, image_tokens=None
|
||||
),
|
||||
)
|
||||
|
||||
model_cost_map = litellm.model_cost[model]
|
||||
prompt_cost, completion_cost = generic_cost_per_token(
|
||||
model=model,
|
||||
usage=usage,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
# Expected costs:
|
||||
# - text_tokens: 0 * output_cost_per_token = 0
|
||||
# - image_tokens: 1120 * output_cost_per_image_token = 1120 * 1.2e-04 = 0.1344
|
||||
# - reasoning_tokens: 225 * output_cost_per_token = 225 * 1.2e-05 = 0.0027
|
||||
# Total completion: ~0.1371
|
||||
|
||||
output_cost_per_image_token = model_cost_map.get("output_cost_per_image_token", 0)
|
||||
output_cost_per_token = model_cost_map.get("output_cost_per_token", 0)
|
||||
|
||||
expected_image_cost = 1120 * output_cost_per_image_token
|
||||
expected_reasoning_cost = 225 * output_cost_per_token # reasoning uses base token cost
|
||||
expected_completion_cost = expected_image_cost + expected_reasoning_cost
|
||||
|
||||
# The bug was: all 1345 tokens were treated as text = 1345 * 1.2e-05 = 0.01614
|
||||
# Fixed: image_tokens use image pricing = ~0.137
|
||||
|
||||
assert completion_cost > 0.10, (
|
||||
f"Completion cost should be > $0.10 (image tokens are expensive), got ${completion_cost:.6f}. "
|
||||
f"Bug: tokens may be incorrectly treated as text tokens."
|
||||
)
|
||||
assert round(completion_cost, 4) == round(expected_completion_cost, 4), (
|
||||
f"Expected completion cost ${expected_completion_cost:.6f}, got ${completion_cost:.6f}"
|
||||
)
|
||||
|
||||
|
||||
def test_bedrock_anthropic_prompt_caching():
|
||||
"""Test Bedrock Anthropic models with prompt caching return correct costs."""
|
||||
model = "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
||||
|
||||
Reference in New Issue
Block a user