update bench
This commit is contained in:
@@ -31,7 +31,7 @@ outputs[0]["text"]
|
|||||||
See `bench.py` for benchmark.
|
See `bench.py` for benchmark.
|
||||||
|
|
||||||
**Test Configuration:**
|
**Test Configuration:**
|
||||||
- Hardware: RTX 4070
|
- Hardware: RTX 4070 Laptop (8GB)
|
||||||
- Model: Qwen3-0.6B
|
- Model: Qwen3-0.6B
|
||||||
- Total Requests: 256 sequences
|
- Total Requests: 256 sequences
|
||||||
- Input Length: Randomly sampled between 100–1024 tokens
|
- Input Length: Randomly sampled between 100–1024 tokens
|
||||||
@@ -40,5 +40,5 @@ See `bench.py` for benchmark.
|
|||||||
**Performance Results:**
|
**Performance Results:**
|
||||||
| Inference Engine | Output Tokens | Time (s) | Throughput (tokens/s) |
|
| Inference Engine | Output Tokens | Time (s) | Throughput (tokens/s) |
|
||||||
|----------------|-------------|----------|-----------------------|
|
|----------------|-------------|----------|-----------------------|
|
||||||
| vLLM | 133,966 | 98.95 | 1353.86 |
|
| vLLM | 133,966 | 98.37 | 1361.84 |
|
||||||
| Nano-vLLM | 133,966 | 101.90 | 1314.65 |
|
| Nano-vLLM | 133,966 | 93.41 | 1434.13 |
|
||||||
|
|||||||
2
bench.py
2
bench.py
@@ -21,7 +21,7 @@ def main():
|
|||||||
|
|
||||||
llm.generate(["Benchmark: "], SamplingParams())
|
llm.generate(["Benchmark: "], SamplingParams())
|
||||||
t = time.time()
|
t = time.time()
|
||||||
llm.generate(prompt_token_ids, sampling_params)
|
llm.generate(prompt_token_ids, sampling_params, use_tqdm=False)
|
||||||
t = (time.time() - t)
|
t = (time.time() - t)
|
||||||
total_tokens = sum(sp.max_tokens for sp in sampling_params)
|
total_tokens = sum(sp.max_tokens for sp in sampling_params)
|
||||||
throughput = total_tokens / t
|
throughput = total_tokens / t
|
||||||
|
|||||||
Reference in New Issue
Block a user