use spawn

2025-06-17 22:48:44 +08:00
parent 7e42fa6f63
commit b5ace32982
3 changed files with 52 additions and 41 deletions
--- a/example.py
+++ b/example.py
@@ -3,27 +3,32 @@ from nanovllm import LLM, SamplingParams
 from transformers import AutoTokenizer


-path = os.path.expanduser("~/huggingface/Qwen3-0.6B/")
-tokenizer = AutoTokenizer.from_pretrained(path)
-llm = LLM(path, enforce_eager=True)
+def main():
+    path = os.path.expanduser("~/huggingface/Qwen3-0.6B/")
+    tokenizer = AutoTokenizer.from_pretrained(path)
+    llm = LLM(path, enforce_eager=True, tensor_parallel_size=1)

-sampling_params = SamplingParams(temperature=0.6, max_tokens=256)
-prompts = [
-    "introduce yourself",
-    "list all prime numbers within 100",
-]
-prompts = [
-    tokenizer.apply_chat_template(
-        [{"role": "user", "content": prompt}],
-        tokenize=False,
-        add_generation_prompt=True,
-        enable_thinking=True
-    )
-    for prompt in prompts
-]
-outputs = llm.generate(prompts, sampling_params)
+    sampling_params = SamplingParams(temperature=0.6, max_tokens=256)
+    prompts = [
+        "introduce yourself",
+        "list all prime numbers within 100",
+    ]
+    prompts = [
+        tokenizer.apply_chat_template(
+            [{"role": "user", "content": prompt}],
+            tokenize=False,
+            add_generation_prompt=True,
+            enable_thinking=True
+        )
+        for prompt in prompts
+    ]
+    outputs = llm.generate(prompts, sampling_params)

-for prompt, output in zip(prompts, outputs):
-    print("\n")
-    print(f"Prompt: {prompt!r}")
-    print(f"Completion: {output['text']!r}")
+    for prompt, output in zip(prompts, outputs):
+        print("\n")
+        print(f"Prompt: {prompt!r}")
+        print(f"Completion: {output['text']!r}")
+
+
+if __name__ == "__main__":
+    main()