[WIP] NEED refactor nanovllm mechenism.

This commit is contained in:
Zijie Tian
2025-12-22 23:52:56 +08:00
parent 1907b625b6
commit 4dcef16c13
10 changed files with 223 additions and 1099 deletions

View File

@@ -125,6 +125,11 @@ class OffloadEngine:
dtype=torch.int64, device="cuda"
)
# Log memory allocation
gpu_mem_mb = self.gpu_memory_bytes() / (1024 * 1024)
cpu_mem_mb = self.cpu_memory_bytes() / (1024 * 1024)
logger.info(f" GPU memory: {gpu_mem_mb:.1f} MB, CPU memory: {cpu_mem_mb:.1f} MB")
# ========== Transfer streams for async operations ==========
self.transfer_streams = [torch.cuda.Stream() for _ in range(num_streams)]
self.compute_stream = torch.cuda.current_stream()