Files
nano-vllm/bench.py
GeeeekExplorer fee58d44e4 fix
2025-06-12 01:00:31 +08:00

21 lines
548 B
Python

import os
import time
import torch
from nanovllm import LLM, SamplingParams
batch_size = 256
seq_len = 1024
max_tokens = 512
path = os.path.expanduser("~/huggingface/Qwen3-0.6B/")
llm = LLM(path, enforce_eager=False)
prompt_token_ids = torch.randint(0, 10240, (batch_size, seq_len)).tolist()
sampling_params = SamplingParams(temperature=0.6, ignore_eos=True, max_tokens=max_tokens)
t = time.time()
llm.generate(prompt_token_ids, sampling_params)
throughput = batch_size * max_tokens / (time.time() - t)
print(f"Throughput: {throughput: .2f}")