mirror of
https://github.com/vllm-project/vllm.git
synced 2025-12-06 15:04:47 +08:00
[CI] Add batched audios Whisper test (#29308)
Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
@@ -2,20 +2,12 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
# imports for structured outputs tests
|
||||
import io
|
||||
import json
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import openai
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import soundfile as sf
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "openai/whisper-large-v3-turbo"
|
||||
SERVER_ARGS = ["--enforce-eager"]
|
||||
MISTRAL_FORMAT_ARGS = [
|
||||
"--tokenizer_mode",
|
||||
"mistral",
|
||||
@@ -26,22 +18,8 @@ MISTRAL_FORMAT_ARGS = [
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server():
|
||||
with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def client(server):
|
||||
async with server.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"model_name", ["openai/whisper-large-v3-turbo", "mistralai/Voxtral-Mini-3B-2507"]
|
||||
)
|
||||
@pytest.mark.parametrize("model_name", ["mistralai/Voxtral-Mini-3B-2507"])
|
||||
async def test_basic_audio(mary_had_lamb, model_name):
|
||||
server_args = ["--enforce-eager"]
|
||||
|
||||
@@ -120,176 +98,3 @@ async def test_basic_audio_gemma(foscolo):
|
||||
)
|
||||
out = json.loads(transcription)["text"]
|
||||
assert "da cui vergine nacque Venere" in out
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_non_asr_model(winning_call):
|
||||
# text to text model
|
||||
model_name = "JackFram/llama-68m"
|
||||
with RemoteOpenAIServer(model_name, SERVER_ARGS) as remote_server:
|
||||
client = remote_server.get_async_client()
|
||||
res = await client.audio.transcriptions.create(
|
||||
model=model_name, file=winning_call, language="en", temperature=0.0
|
||||
)
|
||||
err = res.error
|
||||
assert err["code"] == 400 and not res.text
|
||||
assert err["message"] == "The model does not support Transcriptions API"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bad_requests(mary_had_lamb, client):
|
||||
# invalid language
|
||||
with pytest.raises(openai.BadRequestError):
|
||||
await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME, file=mary_had_lamb, language="hh", temperature=0.0
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_long_audio_request(mary_had_lamb, client):
|
||||
mary_had_lamb.seek(0)
|
||||
audio, sr = librosa.load(mary_had_lamb)
|
||||
# Add small silence after each audio for repeatability in the split process
|
||||
audio = np.pad(audio, (0, 1600))
|
||||
repeated_audio = np.tile(audio, 10)
|
||||
# Repeated audio to buffer
|
||||
buffer = io.BytesIO()
|
||||
sf.write(buffer, repeated_audio, sr, format="WAV")
|
||||
buffer.seek(0)
|
||||
transcription = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=buffer,
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0,
|
||||
)
|
||||
out = json.loads(transcription)
|
||||
out_text = out["text"]
|
||||
out_usage = out["usage"]
|
||||
counts = out_text.count("Mary had a little lamb")
|
||||
assert counts == 10, counts
|
||||
assert out_usage["seconds"] == 161, out_usage["seconds"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_endpoints(client):
|
||||
# text to text model
|
||||
res = await client.chat.completions.create(
|
||||
model=MODEL_NAME,
|
||||
messages=[{"role": "system", "content": "You are a helpful assistant."}],
|
||||
)
|
||||
err = res.error
|
||||
assert err["code"] == 400
|
||||
assert err["message"] == "The model does not support Chat Completions API"
|
||||
|
||||
res = await client.completions.create(model=MODEL_NAME, prompt="Hello")
|
||||
err = res.error
|
||||
assert err["code"] == 400
|
||||
assert err["message"] == "The model does not support Completions API"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_response(winning_call, client):
|
||||
transcription = ""
|
||||
res_no_stream = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=winning_call,
|
||||
response_format="json",
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
)
|
||||
res = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=winning_call,
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
stream=True,
|
||||
timeout=30,
|
||||
)
|
||||
# Reconstruct from chunks and validate
|
||||
async for chunk in res:
|
||||
text = chunk.choices[0]["delta"]["content"]
|
||||
transcription += text
|
||||
|
||||
assert transcription == res_no_stream.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_options(winning_call, client):
|
||||
res = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=winning_call,
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
stream=True,
|
||||
extra_body=dict(stream_include_usage=True, stream_continuous_usage_stats=True),
|
||||
timeout=30,
|
||||
)
|
||||
final = False
|
||||
continuous = True
|
||||
async for chunk in res:
|
||||
if not len(chunk.choices):
|
||||
# final usage sent
|
||||
final = True
|
||||
else:
|
||||
continuous = continuous and hasattr(chunk, "usage")
|
||||
assert final and continuous
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sampling_params(mary_had_lamb, client):
|
||||
"""
|
||||
Compare sampling with params and greedy sampling to assert results
|
||||
are different when extreme sampling parameters values are picked.
|
||||
"""
|
||||
transcription = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
temperature=0.8,
|
||||
extra_body=dict(
|
||||
seed=42,
|
||||
repetition_penalty=1.9,
|
||||
top_k=12,
|
||||
top_p=0.4,
|
||||
min_p=0.5,
|
||||
frequency_penalty=1.8,
|
||||
presence_penalty=2.0,
|
||||
),
|
||||
)
|
||||
|
||||
greedy_transcription = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
extra_body=dict(seed=42),
|
||||
)
|
||||
|
||||
assert greedy_transcription.text != transcription.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_prompt(mary_had_lamb, client):
|
||||
prompt = "This is a speech, recorded in a phonograph."
|
||||
# Prompts should not omit the part of original prompt while transcribing.
|
||||
prefix = "The first words I spoke in the original phonograph"
|
||||
transcription = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0,
|
||||
)
|
||||
out = json.loads(transcription)["text"]
|
||||
assert prefix in out
|
||||
transcription_wprompt = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
response_format="text",
|
||||
prompt=prompt,
|
||||
temperature=0.0,
|
||||
)
|
||||
out_prompt = json.loads(transcription_wprompt)["text"]
|
||||
assert prefix in out_prompt
|
||||
|
||||
@@ -0,0 +1,237 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
# imports for structured outputs tests
|
||||
import asyncio
|
||||
import io
|
||||
import json
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import openai
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
import soundfile as sf
|
||||
|
||||
from ...utils import RemoteOpenAIServer
|
||||
|
||||
MODEL_NAME = "openai/whisper-large-v3-turbo"
|
||||
SERVER_ARGS = ["--enforce-eager"]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def server():
|
||||
with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def whisper_client(server):
|
||||
async with server.get_async_client() as async_client:
|
||||
yield async_client
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_audio(mary_had_lamb):
|
||||
server_args = ["--enforce-eager"]
|
||||
|
||||
# Based on https://github.com/openai/openai-cookbook/blob/main/examples/Whisper_prompting_guide.ipynb.
|
||||
with RemoteOpenAIServer(MODEL_NAME, server_args) as remote_server:
|
||||
client = remote_server.get_async_client()
|
||||
transcription = await client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0,
|
||||
)
|
||||
out = json.loads(transcription)
|
||||
out_text = out["text"]
|
||||
out_usage = out["usage"]
|
||||
assert "Mary had a little lamb," in out_text
|
||||
assert out_usage["seconds"] == 16, out_usage["seconds"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_basic_audio_batched(mary_had_lamb, winning_call, whisper_client):
|
||||
transcription = whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0,
|
||||
)
|
||||
transcription2 = whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=winning_call,
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0,
|
||||
)
|
||||
# Await both transcriptions by scheduling coroutines together
|
||||
transcription, transcription2 = await asyncio.gather(transcription, transcription2)
|
||||
out = json.loads(transcription)
|
||||
out_text = out["text"]
|
||||
assert "Mary had a little lamb," in out_text
|
||||
out2 = json.loads(transcription2)
|
||||
out_text2 = out2["text"]
|
||||
assert "Edgar Martinez" in out_text2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bad_requests(mary_had_lamb, whisper_client):
|
||||
# invalid language
|
||||
with pytest.raises(openai.BadRequestError):
|
||||
await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME, file=mary_had_lamb, language="hh", temperature=0.0
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_long_audio_request(mary_had_lamb, whisper_client):
|
||||
mary_had_lamb.seek(0)
|
||||
audio, sr = librosa.load(mary_had_lamb)
|
||||
# Add small silence after each audio for repeatability in the split process
|
||||
audio = np.pad(audio, (0, 1600))
|
||||
repeated_audio = np.tile(audio, 10)
|
||||
# Repeated audio to buffer
|
||||
buffer = io.BytesIO()
|
||||
sf.write(buffer, repeated_audio, sr, format="WAV")
|
||||
buffer.seek(0)
|
||||
transcription = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=buffer,
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0,
|
||||
)
|
||||
out = json.loads(transcription)
|
||||
out_text = out["text"]
|
||||
out_usage = out["usage"]
|
||||
counts = out_text.count("Mary had a little lamb")
|
||||
assert counts == 10, counts
|
||||
assert out_usage["seconds"] == 161, out_usage["seconds"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_endpoints(whisper_client):
|
||||
# text to text model
|
||||
res = await whisper_client.chat.completions.create(
|
||||
model=MODEL_NAME,
|
||||
messages=[{"role": "system", "content": "You are a helpful assistant."}],
|
||||
)
|
||||
err = res.error
|
||||
assert err["code"] == 400
|
||||
assert err["message"] == "The model does not support Chat Completions API"
|
||||
|
||||
res = await whisper_client.completions.create(model=MODEL_NAME, prompt="Hello")
|
||||
err = res.error
|
||||
assert err["code"] == 400
|
||||
assert err["message"] == "The model does not support Completions API"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_response(winning_call, whisper_client):
|
||||
transcription = ""
|
||||
res_no_stream = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=winning_call,
|
||||
response_format="json",
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
)
|
||||
res = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=winning_call,
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
stream=True,
|
||||
timeout=30,
|
||||
)
|
||||
# Reconstruct from chunks and validate
|
||||
async for chunk in res:
|
||||
text = chunk.choices[0]["delta"]["content"]
|
||||
transcription += text
|
||||
|
||||
assert transcription == res_no_stream.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_options(winning_call, whisper_client):
|
||||
res = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=winning_call,
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
stream=True,
|
||||
extra_body=dict(stream_include_usage=True, stream_continuous_usage_stats=True),
|
||||
timeout=30,
|
||||
)
|
||||
final = False
|
||||
continuous = True
|
||||
async for chunk in res:
|
||||
if not len(chunk.choices):
|
||||
# final usage sent
|
||||
final = True
|
||||
else:
|
||||
continuous = continuous and hasattr(chunk, "usage")
|
||||
assert final and continuous
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sampling_params(mary_had_lamb, whisper_client):
|
||||
"""
|
||||
Compare sampling with params and greedy sampling to assert results
|
||||
are different when extreme sampling parameters values are picked.
|
||||
"""
|
||||
transcription = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
temperature=0.8,
|
||||
extra_body=dict(
|
||||
seed=42,
|
||||
repetition_penalty=1.9,
|
||||
top_k=12,
|
||||
top_p=0.4,
|
||||
min_p=0.5,
|
||||
frequency_penalty=1.8,
|
||||
presence_penalty=2.0,
|
||||
),
|
||||
)
|
||||
|
||||
greedy_transcription = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
temperature=0.0,
|
||||
extra_body=dict(seed=42),
|
||||
)
|
||||
|
||||
assert greedy_transcription.text != transcription.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_audio_prompt(mary_had_lamb, whisper_client):
|
||||
prompt = "This is a speech, recorded in a phonograph."
|
||||
# Prompts should not omit the part of original prompt while transcribing.
|
||||
prefix = "The first words I spoke in the original phonograph"
|
||||
transcription = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
response_format="text",
|
||||
temperature=0.0,
|
||||
)
|
||||
out = json.loads(transcription)["text"]
|
||||
assert prefix in out
|
||||
transcription_wprompt = await whisper_client.audio.transcriptions.create(
|
||||
model=MODEL_NAME,
|
||||
file=mary_had_lamb,
|
||||
language="en",
|
||||
response_format="text",
|
||||
prompt=prompt,
|
||||
temperature=0.0,
|
||||
)
|
||||
out_prompt = json.loads(transcription_wprompt)["text"]
|
||||
assert prefix in out_prompt
|
||||
Reference in New Issue
Block a user