[refactor] Refactor current gpu and cpu block allocation strategy.

This commit is contained in:
Zijie Tian
2025-12-10 21:23:31 +08:00
parent 0a247ccb1b
commit 190df5f70d
7 changed files with 906 additions and 162 deletions

View File

@@ -40,7 +40,6 @@ def main():
max_model_len=128 * 1024,
max_num_batched_tokens=128 * 1024,
enable_cpu_offload=True,
cpu_memory_gb=32.0,
)
# Warmup