From c717072f31ad86a6b0ed727287a5f0409c7b1adf Mon Sep 17 00:00:00 2001 From: Zijie Tian Date: Tue, 27 Jan 2026 04:36:17 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20--model=20argument=20?= =?UTF-8?q?to=20bench.py=20for=20configurable=20model=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously bench.py had a hardcoded model path. Now it accepts --model argument (default: Llama-3.1-8B-Instruct) to align with bench_offload.py. Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- bench.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bench.py b/bench.py index 05b8b47..9348f08 100644 --- a/bench.py +++ b/bench.py @@ -41,6 +41,8 @@ def bench_prefill(llm, num_seqs, input_len): def main(): import argparse parser = argparse.ArgumentParser(description="Benchmark nanovllm GPU performance") + parser.add_argument("--model", type=str, default="~/models/Llama-3.1-8B-Instruct", + help="Model path (default: ~/models/Llama-3.1-8B-Instruct)") parser.add_argument("--input-len", type=int, default=None, help="Input length in tokens") parser.add_argument("--output-len", type=int, default=64, help="Output length for decode benchmark (default: 64)") parser.add_argument("--max-len", type=int, default=32*1024, help="Max model length (default: 32K)") @@ -48,7 +50,7 @@ def main(): parser.add_argument("--bench-all", action="store_true", help="Run both prefill and decode benchmarks") args = parser.parse_args() - path = os.path.expanduser("~/models/Qwen3-4B-Instruct-2507/") + path = os.path.expanduser(args.model) max_len = args.max_len print(f"\n[nanovllm GPU] max_len={max_len}")