✨ feat: add --model argument to bench.py for configurable model path
Previously bench.py had a hardcoded model path. Now it accepts --model argument (default: Llama-3.1-8B-Instruct) to align with bench_offload.py. Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
4
bench.py
4
bench.py
@@ -41,6 +41,8 @@ def bench_prefill(llm, num_seqs, input_len):
|
|||||||
def main():
|
def main():
|
||||||
import argparse
|
import argparse
|
||||||
parser = argparse.ArgumentParser(description="Benchmark nanovllm GPU performance")
|
parser = argparse.ArgumentParser(description="Benchmark nanovllm GPU performance")
|
||||||
|
parser.add_argument("--model", type=str, default="~/models/Llama-3.1-8B-Instruct",
|
||||||
|
help="Model path (default: ~/models/Llama-3.1-8B-Instruct)")
|
||||||
parser.add_argument("--input-len", type=int, default=None, help="Input length in tokens")
|
parser.add_argument("--input-len", type=int, default=None, help="Input length in tokens")
|
||||||
parser.add_argument("--output-len", type=int, default=64, help="Output length for decode benchmark (default: 64)")
|
parser.add_argument("--output-len", type=int, default=64, help="Output length for decode benchmark (default: 64)")
|
||||||
parser.add_argument("--max-len", type=int, default=32*1024, help="Max model length (default: 32K)")
|
parser.add_argument("--max-len", type=int, default=32*1024, help="Max model length (default: 32K)")
|
||||||
@@ -48,7 +50,7 @@ def main():
|
|||||||
parser.add_argument("--bench-all", action="store_true", help="Run both prefill and decode benchmarks")
|
parser.add_argument("--bench-all", action="store_true", help="Run both prefill and decode benchmarks")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
path = os.path.expanduser("~/models/Qwen3-4B-Instruct-2507/")
|
path = os.path.expanduser(args.model)
|
||||||
max_len = args.max_len
|
max_len = args.max_len
|
||||||
|
|
||||||
print(f"\n[nanovllm GPU] max_len={max_len}")
|
print(f"\n[nanovllm GPU] max_len={max_len}")
|
||||||
|
|||||||
Reference in New Issue
Block a user