[fix] Fixed kvcache offload problem.

This commit is contained in:
Zijie Tian
2025-12-12 01:35:30 +08:00
parent 60d24f7c12
commit 9b8165af5a
3 changed files with 96 additions and 36 deletions

View File

@@ -33,6 +33,14 @@ class Context:
# Used when batching decode offloads - we need to attend to all accumulated tokens
decode_start_pos_in_block: int = 0
# ========== Per-layer chunked attention state ==========
# Whether chunked decode/prefill is currently active (for hooks to check)
chunked_decode_active: bool = False
# CPU block IDs for the current chunk being processed
chunked_decode_chunk_ids: List[int] = field(default_factory=list)
# Current chunk index being processed
chunked_decode_current_chunk: int = 0
_CONTEXT = Context()