[WIP] remove num_prefetch_blocks varible.

This commit is contained in:
Zijie Tian
2025-12-24 18:22:26 +08:00
parent b264de903d
commit 782437c486
10 changed files with 465 additions and 18 deletions

View File

@@ -53,7 +53,6 @@ class OffloadEngine:
head_dim: int,
dtype: torch.dtype = torch.float16,
num_streams: int = 4,
num_prefetch_blocks: int = 2,
):
self.num_layers = num_layers
self.num_gpu_blocks = num_gpu_blocks
@@ -82,8 +81,6 @@ class OffloadEngine:
self.decode_load_slots = list(range(1, num_gpu_blocks))
self.num_decode_load_slots = len(self.decode_load_slots)
# Keep num_prefetch_blocks for compatibility (used as chunk size for loading)
self.num_prefetch_blocks = num_prefetch_blocks
self.num_gpu_slots = num_gpu_blocks # alias
logger.info(f"Unified Ring Buffer: {self.num_ring_slots} slots total")