From ffafaeb1333c03f08f18ed4503040d485d6cd733 Mon Sep 17 00:00:00 2001 From: jinghuan-Chen <275923410@qq.com> Date: Wed, 18 Jun 2025 16:17:31 +0800 Subject: [PATCH] Release CUDA Graphs resource before exit. --- nanovllm/engine/model_runner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanovllm/engine/model_runner.py b/nanovllm/engine/model_runner.py index 0c7b2ef..6f838a6 100644 --- a/nanovllm/engine/model_runner.py +++ b/nanovllm/engine/model_runner.py @@ -53,6 +53,9 @@ class ModelRunner: dist.barrier() if self.rank == 0: self.shm.unlink() + if not self.enforce_eager: + del self.graphs, self.graph_pool + torch.cuda.synchronize() dist.destroy_process_group() def loop(self):