[feat] Added num_gpu_blocks limit gpu blocks.

This commit is contained in:
Zijie Tian
2025-12-10 20:17:42 +08:00
parent 01f19ee4a6
commit 0a247ccb1b
7 changed files with 150 additions and 9 deletions

View File

@@ -22,6 +22,7 @@ class Config:
cpu_memory_gb: float = 16.0 # CPU memory limit for KV cache
offload_policy: str = "lru" # "lru", "fifo", or full class path
num_transfer_streams: int = 4 # Number of CUDA streams for async transfers
num_gpu_blocks: int = -1 # User-specified GPU blocks count, -1 = auto (use max available)
# Computed fields for offload (set in __post_init__ or by ModelRunner)
num_gpu_kvcache_blocks: int = -1