[Bug] Add Assertion for random-input-len / random-output-len (#26834)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-10-16 17:36:39 -04:00
committed by GitHub
parent 01c977e96d
commit 23583ee28c

View File

@@ -478,6 +478,22 @@ class RandomDataset(BenchmarkDataset):
batchsize: int = 1,
**kwargs,
) -> list[SampleRequest]:
# validate total input tokens (prefix + sampled) is at least 1.
num_special = int(tokenizer.num_special_tokens_to_add())
real_input_len = max(0, int(input_len) - num_special)
min_sampled_input = math.floor(real_input_len * (1.0 - float(range_ratio)))
min_total_input = int(prefix_len) + min_sampled_input
if min_total_input < 1:
raise ValueError(
"--random-input-len is too small: with tokenizer special "
f"tokens {num_special} and --random-range-ratio {range_ratio}, "
"the minimum possible total input tokens (prefix + sampled) is "
f"{min_total_input}. Increase --random-input-len and/or "
"--random-prefix-len, or decrease --random-range-ratio so that "
"prefix_len + floor(max(0, random_input_len - num_special)) "
"* (1 - range_ratio) >= 1."
)
input_lens, output_lens, offsets = self.get_sampling_params(
num_requests, range_ratio, input_len, output_len, tokenizer
)