[test] Added a simple test_prefill.py.

2025-12-23 00:26:25 +08:00
parent 4dcef16c13
commit b264de903d
1 changed files with 47 additions and 0 deletions
--- a/tests/test_prefill.py
+++ b/tests/test_prefill.py
@@ -0,0 +1,47 @@
 """
 Test script for chunked prefill with CPU offload.
 Demonstrates: LLM initialization, prefill execution with CPU offload enabled.
 """
 import os
 from random import randint, seed
 from nanovllm import LLM, SamplingParams
 # ============================================================
 # Configuration
 # ============================================================
 MODEL_PATH = os.path.expanduser("~/models/Qwen3-0.6B/")
 MAX_MODEL_LEN = 8192
 NUM_GPU_BLOCKS = 4
 INPUT_LEN = 4096
 # ============================================================
 # Main Test Script
 # ============================================================
 # 1. Initialize LLM with CPU offload
 llm = LLM(
    MODEL_PATH,
    enforce_eager=True,
    max_model_len=MAX_MODEL_LEN,
    max_num_batched_tokens=MAX_MODEL_LEN,
    enable_cpu_offload=True,
    num_gpu_blocks=NUM_GPU_BLOCKS,
 )
 # 2. Generate random prompt tokens
 seed(42)
 prompt_token_ids = [[randint(0, 10000) for _ in range(INPUT_LEN)]]
 # 3. Run prefill (max_tokens=1 to focus on prefill only)
 sampling_params = SamplingParams(temperature=0.6, ignore_eos=True, max_tokens=1)
 outputs = llm.generate(prompt_token_ids, sampling_params, use_tqdm=False)
 # 4. Verify output
 assert len(outputs) == 1
 assert "token_ids" in outputs[0]
 assert len(outputs[0]["token_ids"]) == 1
 print("test_prefill: PASSED")