[feat] Added num_gpu_blocks limit gpu blocks.

2025-12-10 20:17:42 +08:00
parent 01f19ee4a6
commit 0a247ccb1b
7 changed files with 150 additions and 9 deletions
--- a/bench_offload.py
+++ b/bench_offload.py
@@ -36,7 +36,7 @@ def main():
    path = os.path.expanduser("~/models/Qwen3-4B-Instruct-2507/")
    llm = LLM(
        path,
-        enforce_eager=True,
+        enforce_eager=False,
        max_model_len=128 * 1024,
        max_num_batched_tokens=128 * 1024,
        enable_cpu_offload=True,