🔧 chore: add --use-v1 flag to bench_vllm.py
Allow switching between vLLM V1/V2 engines via command line flag. Default behavior now uses V2 (VLLM_USE_V1=0). Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -1,5 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
os.environ["VLLM_USE_V1"] = "1"
|
import sys
|
||||||
|
|
||||||
|
# Parse --use-v1 flag before importing vllm
|
||||||
|
use_v1 = "--use-v1" in sys.argv
|
||||||
|
if use_v1:
|
||||||
|
os.environ["VLLM_USE_V1"] = "1"
|
||||||
|
sys.argv.remove("--use-v1")
|
||||||
|
else:
|
||||||
|
os.environ["VLLM_USE_V1"] = "0"
|
||||||
|
|
||||||
import time
|
import time
|
||||||
from random import randint, seed
|
from random import randint, seed
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|||||||
Reference in New Issue
Block a user