[feat] Added num_gpu_blocks limit gpu blocks.

This commit is contained in:
Zijie Tian
2025-12-10 20:17:42 +08:00
parent 01f19ee4a6
commit 0a247ccb1b
7 changed files with 150 additions and 9 deletions

View File

@@ -36,7 +36,7 @@ def main():
path = os.path.expanduser("~/models/Qwen3-4B-Instruct-2507/")
llm = LLM(
path,
enforce_eager=True,
enforce_eager=False,
max_model_len=128 * 1024,
max_num_batched_tokens=128 * 1024,
enable_cpu_offload=True,