mirror of
https://github.com/BerriAI/litellm.git
synced 2025-12-06 11:33:26 +08:00
Fix: Gemini image_tokens incorrectly treated as text tokens in cost calculation (#17554)
When Gemini image generation models return `text_tokens=0` with `image_tokens > 0`, the cost calculator was assuming no token breakdown existed and treating all completion tokens as text tokens, resulting in ~10x underestimation of costs. Changes: - Fix cost calculation logic to respect token breakdown when image/audio/reasoning tokens are present, even if text_tokens=0 - Add `output_cost_per_image_token` pricing for gemini-3-pro-image-preview models - Add test case reproducing the issue - Add documentation explaining image token pricing Fixes #17410
This commit is contained in:
@@ -2006,3 +2006,34 @@ curl -L -X POST 'http://localhost:4000/v1/chat/completions' \
|
|||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
### Image Generation Pricing
|
||||||
|
|
||||||
|
Gemini image generation models (like `gemini-3-pro-image-preview`) return `image_tokens` in the response usage. These tokens are priced differently from text tokens:
|
||||||
|
|
||||||
|
| Token Type | Price per 1M tokens | Price per token |
|
||||||
|
|------------|---------------------|-----------------|
|
||||||
|
| Text output | $12 | $0.000012 |
|
||||||
|
| Image output | $120 | $0.00012 |
|
||||||
|
|
||||||
|
The number of image tokens depends on the output resolution:
|
||||||
|
|
||||||
|
| Resolution | Tokens per image | Cost per image |
|
||||||
|
|------------|------------------|----------------|
|
||||||
|
| 1K-2K (1024x1024 to 2048x2048) | 1,120 | $0.134 |
|
||||||
|
| 4K (4096x4096) | 2,000 | $0.24 |
|
||||||
|
|
||||||
|
LiteLLM automatically calculates costs using `output_cost_per_image_token` from the model pricing configuration.
|
||||||
|
|
||||||
|
**Example response usage:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"completion_tokens_details": {
|
||||||
|
"reasoning_tokens": 225,
|
||||||
|
"text_tokens": 0,
|
||||||
|
"image_tokens": 1120
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
For more details, see [Google's Gemini pricing documentation](https://ai.google.dev/gemini-api/docs/pricing).
|
||||||
|
|
||||||
|
|||||||
@@ -583,9 +583,11 @@ def generic_cost_per_token(
|
|||||||
reasoning_tokens = completion_tokens_details["reasoning_tokens"]
|
reasoning_tokens = completion_tokens_details["reasoning_tokens"]
|
||||||
image_tokens = completion_tokens_details["image_tokens"]
|
image_tokens = completion_tokens_details["image_tokens"]
|
||||||
|
|
||||||
if text_tokens == 0:
|
# Only assume all tokens are text if there's NO breakdown at all
|
||||||
|
# If image_tokens, audio_tokens, or reasoning_tokens exist, respect text_tokens=0
|
||||||
|
has_token_breakdown = image_tokens > 0 or audio_tokens > 0 or reasoning_tokens > 0
|
||||||
|
if text_tokens == 0 and not has_token_breakdown:
|
||||||
text_tokens = usage.completion_tokens
|
text_tokens = usage.completion_tokens
|
||||||
if text_tokens == usage.completion_tokens:
|
|
||||||
is_text_tokens_total = True
|
is_text_tokens_total = True
|
||||||
## TEXT COST
|
## TEXT COST
|
||||||
completion_cost = float(text_tokens) * completion_base_cost
|
completion_cost = float(text_tokens) * completion_base_cost
|
||||||
|
|||||||
@@ -12147,6 +12147,7 @@
|
|||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"output_cost_per_image": 0.134,
|
"output_cost_per_image": 0.134,
|
||||||
|
"output_cost_per_image_token": 1.2e-04,
|
||||||
"output_cost_per_token": 1.2e-05,
|
"output_cost_per_token": 1.2e-05,
|
||||||
"output_cost_per_token_batches": 6e-06,
|
"output_cost_per_token_batches": 6e-06,
|
||||||
"source": "https://ai.google.dev/gemini-api/docs/pricing",
|
"source": "https://ai.google.dev/gemini-api/docs/pricing",
|
||||||
@@ -13884,6 +13885,7 @@
|
|||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"output_cost_per_image": 0.134,
|
"output_cost_per_image": 0.134,
|
||||||
|
"output_cost_per_image_token": 1.2e-04,
|
||||||
"output_cost_per_token": 1.2e-05,
|
"output_cost_per_token": 1.2e-05,
|
||||||
"rpm": 1000,
|
"rpm": 1000,
|
||||||
"tpm": 4000000,
|
"tpm": 4000000,
|
||||||
@@ -25802,6 +25804,7 @@
|
|||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"output_cost_per_image": 0.134,
|
"output_cost_per_image": 0.134,
|
||||||
|
"output_cost_per_image_token": 1.2e-04,
|
||||||
"output_cost_per_token": 1.2e-05,
|
"output_cost_per_token": 1.2e-05,
|
||||||
"output_cost_per_token_batches": 6e-06,
|
"output_cost_per_token_batches": 6e-06,
|
||||||
"source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
|
"source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
|
||||||
|
|||||||
@@ -12147,6 +12147,7 @@
|
|||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"output_cost_per_image": 0.134,
|
"output_cost_per_image": 0.134,
|
||||||
|
"output_cost_per_image_token": 1.2e-04,
|
||||||
"output_cost_per_token": 1.2e-05,
|
"output_cost_per_token": 1.2e-05,
|
||||||
"output_cost_per_token_batches": 6e-06,
|
"output_cost_per_token_batches": 6e-06,
|
||||||
"source": "https://ai.google.dev/gemini-api/docs/pricing",
|
"source": "https://ai.google.dev/gemini-api/docs/pricing",
|
||||||
@@ -13884,6 +13885,7 @@
|
|||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"output_cost_per_image": 0.134,
|
"output_cost_per_image": 0.134,
|
||||||
|
"output_cost_per_image_token": 1.2e-04,
|
||||||
"output_cost_per_token": 1.2e-05,
|
"output_cost_per_token": 1.2e-05,
|
||||||
"rpm": 1000,
|
"rpm": 1000,
|
||||||
"tpm": 4000000,
|
"tpm": 4000000,
|
||||||
@@ -25802,6 +25804,7 @@
|
|||||||
"max_tokens": 65536,
|
"max_tokens": 65536,
|
||||||
"mode": "image_generation",
|
"mode": "image_generation",
|
||||||
"output_cost_per_image": 0.134,
|
"output_cost_per_image": 0.134,
|
||||||
|
"output_cost_per_image_token": 1.2e-04,
|
||||||
"output_cost_per_token": 1.2e-05,
|
"output_cost_per_token": 1.2e-05,
|
||||||
"output_cost_per_token_batches": 6e-06,
|
"output_cost_per_token_batches": 6e-06,
|
||||||
"source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
|
"source": "https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/3-pro-image"
|
||||||
|
|||||||
@@ -720,6 +720,72 @@ def test_service_tier_fallback_pricing():
|
|||||||
assert abs(std_cost[1] - expected_standard_completion) < 1e-10, f"Standard completion cost mismatch: {std_cost[1]} vs {expected_standard_completion}"
|
assert abs(std_cost[1] - expected_standard_completion) < 1e-10, f"Standard completion cost mismatch: {std_cost[1]} vs {expected_standard_completion}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_gemini_image_generation_cost_with_zero_text_tokens():
|
||||||
|
"""
|
||||||
|
Test that image_tokens are correctly costed when text_tokens=0.
|
||||||
|
|
||||||
|
Reproduces issue #17410: completion_cost calculates incorrectly for
|
||||||
|
Gemini-3-pro-image model - image_tokens were treated as text tokens
|
||||||
|
when text_tokens=0.
|
||||||
|
|
||||||
|
https://github.com/BerriAI/litellm/issues/17410
|
||||||
|
"""
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
model = "gemini-3-pro-image-preview"
|
||||||
|
custom_llm_provider = "vertex_ai"
|
||||||
|
|
||||||
|
# Usage from the issue: text_tokens=0, image_tokens=1120, reasoning_tokens=225
|
||||||
|
usage = Usage(
|
||||||
|
completion_tokens=1345,
|
||||||
|
prompt_tokens=10,
|
||||||
|
total_tokens=1355,
|
||||||
|
completion_tokens_details=CompletionTokensDetailsWrapper(
|
||||||
|
accepted_prediction_tokens=None,
|
||||||
|
audio_tokens=None,
|
||||||
|
reasoning_tokens=225,
|
||||||
|
rejected_prediction_tokens=None,
|
||||||
|
text_tokens=0, # This is the key: text_tokens=0
|
||||||
|
image_tokens=1120,
|
||||||
|
),
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None, cached_tokens=None, text_tokens=10, image_tokens=None
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
model_cost_map = litellm.model_cost[model]
|
||||||
|
prompt_cost, completion_cost = generic_cost_per_token(
|
||||||
|
model=model,
|
||||||
|
usage=usage,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Expected costs:
|
||||||
|
# - text_tokens: 0 * output_cost_per_token = 0
|
||||||
|
# - image_tokens: 1120 * output_cost_per_image_token = 1120 * 1.2e-04 = 0.1344
|
||||||
|
# - reasoning_tokens: 225 * output_cost_per_token = 225 * 1.2e-05 = 0.0027
|
||||||
|
# Total completion: ~0.1371
|
||||||
|
|
||||||
|
output_cost_per_image_token = model_cost_map.get("output_cost_per_image_token", 0)
|
||||||
|
output_cost_per_token = model_cost_map.get("output_cost_per_token", 0)
|
||||||
|
|
||||||
|
expected_image_cost = 1120 * output_cost_per_image_token
|
||||||
|
expected_reasoning_cost = 225 * output_cost_per_token # reasoning uses base token cost
|
||||||
|
expected_completion_cost = expected_image_cost + expected_reasoning_cost
|
||||||
|
|
||||||
|
# The bug was: all 1345 tokens were treated as text = 1345 * 1.2e-05 = 0.01614
|
||||||
|
# Fixed: image_tokens use image pricing = ~0.137
|
||||||
|
|
||||||
|
assert completion_cost > 0.10, (
|
||||||
|
f"Completion cost should be > $0.10 (image tokens are expensive), got ${completion_cost:.6f}. "
|
||||||
|
f"Bug: tokens may be incorrectly treated as text tokens."
|
||||||
|
)
|
||||||
|
assert round(completion_cost, 4) == round(expected_completion_cost, 4), (
|
||||||
|
f"Expected completion cost ${expected_completion_cost:.6f}, got ${completion_cost:.6f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_bedrock_anthropic_prompt_caching():
|
def test_bedrock_anthropic_prompt_caching():
|
||||||
"""Test Bedrock Anthropic models with prompt caching return correct costs."""
|
"""Test Bedrock Anthropic models with prompt caching return correct costs."""
|
||||||
model = "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
model = "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
||||||
|
|||||||
Reference in New Issue
Block a user