[test] Added a simple test_prefill.py.

2025-12-23 00:26:25 +08:00
parent 4dcef16c13
commit b264de903d
1 changed files with 47 additions and 0 deletions
--- a/tests/test_prefill.py
+++ b/tests/test_prefill.py
@@ -0,0 +1,47 @@
+"""
+Test script for chunked prefill with CPU offload.
+
+Demonstrates: LLM initialization, prefill execution with CPU offload enabled.
+"""
+
+import os
+from random import randint, seed
+from nanovllm import LLM, SamplingParams
+
+# ============================================================
+# Configuration
+# ============================================================
+
+MODEL_PATH = os.path.expanduser("~/models/Qwen3-0.6B/")
+MAX_MODEL_LEN = 8192
+NUM_GPU_BLOCKS = 4
+INPUT_LEN = 4096
+
+# ============================================================
+# Main Test Script
+# ============================================================
+
+# 1. Initialize LLM with CPU offload
+llm = LLM(
+    MODEL_PATH,
+    enforce_eager=True,
+    max_model_len=MAX_MODEL_LEN,
+    max_num_batched_tokens=MAX_MODEL_LEN,
+    enable_cpu_offload=True,
+    num_gpu_blocks=NUM_GPU_BLOCKS,
+)
+
+# 2. Generate random prompt tokens
+seed(42)
+prompt_token_ids = [[randint(0, 10000) for _ in range(INPUT_LEN)]]
+
+# 3. Run prefill (max_tokens=1 to focus on prefill only)
+sampling_params = SamplingParams(temperature=0.6, ignore_eos=True, max_tokens=1)
+outputs = llm.generate(prompt_token_ids, sampling_params, use_tqdm=False)
+
+# 4. Verify output
+assert len(outputs) == 1
+assert "token_ids" in outputs[0]
+assert len(outputs[0]["token_ids"]) == 1
+
+print("test_prefill: PASSED")