From 3956a30b141041064770afdc093ccc70296ae51c Mon Sep 17 00:00:00 2001
From: Zijie Tian <zijietian@mail.xmu.edu.cn>
Date: Tue, 27 Jan 2026 09:14:55 +0800
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20chore:=20add=20--use-v1=20flag?=
 =?UTF-8?q?=20to=20bench=5Fvllm.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow switching between vLLM V1/V2 engines via command line flag.
Default behavior now uses V2 (VLLM_USE_V1=0).

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
---
 bench_vllm.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/bench_vllm.py b/bench_vllm.py
index 483b311..1636bee 100644
--- a/bench_vllm.py
+++ b/bench_vllm.py
@@ -1,5 +1,14 @@
 import os
-os.environ["VLLM_USE_V1"] = "1"
+import sys
+
+# Parse --use-v1 flag before importing vllm
+use_v1 = "--use-v1" in sys.argv
+if use_v1:
+    os.environ["VLLM_USE_V1"] = "1"
+    sys.argv.remove("--use-v1")
+else:
+    os.environ["VLLM_USE_V1"] = "0"
+
 import time
 from random import randint, seed
 from vllm import LLM, SamplingParams