Files
nano-vllm/example.py
GeeeekExplorer 08c84ec08d multi file loader
2025-06-12 01:00:09 +08:00

30 lines
778 B
Python

import os
from nanovllm import LLM, SamplingParams
from transformers import AutoTokenizer
path = os.path.expanduser("~/huggingface/Qwen3-0.6B/")
tokenizer = AutoTokenizer.from_pretrained(path)
llm = LLM(path, enforce_eager=True)
sampling_params = SamplingParams(temperature=0.6, max_tokens=256)
prompts = [
"自我介绍一下吧!",
"列出100内所有素数",
]
prompts = [
tokenizer.apply_chat_template(
[{"role": "user", "content": prompt}],
tokenize=False,
add_generation_prompt=True,
enable_thinking=True
)
for prompt in prompts
]
outputs = llm.generate(prompts, sampling_params)
for prompt, output in zip(prompts, outputs):
print("\n")
print(f"Prompt: {prompt}")
print(f"Completion: {output['text']}")