[WIP] Before fix bench_offload.py.

2026-01-06 18:41:08 +08:00
parent c7ac39dfbd
commit 535f2037ab
7 changed files with 66 additions and 44 deletions
--- a/nanovllm/engine/model_runner.py
+++ b/nanovllm/engine/model_runner.py
@@ -37,7 +37,7 @@ class ModelRunner:
        self.sampler = GreedySampler()
        
        #> Disable warmup for debugging
-        # self.warmup_model()
+        self.warmup_model()
        
        self.allocate_kv_cache()
        if not self.enforce_eager:
@@ -62,7 +62,7 @@ class ModelRunner:
                self.shm.unlink()
        if not self.enforce_eager:
            del self.graphs, self.graph_pool
-        torch.cuda.synchronize()
+        # torch.cuda.synchronize()
        dist.destroy_process_group()

    def loop(self):