[WIP] Before plan execute.

This commit is contained in:
Zijie Tian
2026-01-19 03:30:44 +08:00
parent e6e0dc5d7d
commit 9e6fdc0650
5 changed files with 377 additions and 10 deletions

View File

@@ -207,8 +207,10 @@ class Attention(nn.Module):
# Get prefilled CPU blocks (blocks from previous chunks)
cpu_block_table = kvcache_manager.get_prefilled_cpu_blocks(seq)
# Apply sparse policy if enabled (Quest returns all blocks for prefill since query=None)
# Apply sparse policy if enabled
sparse_policy = kvcache_manager.sparse_policy
# === Standard sparse policy (Quest, etc.) ===
if cpu_block_table and sparse_policy is not None:
num_chunks = getattr(context, 'num_chunks', current_chunk_idx + 1)
policy_ctx = PolicyContext(