[feat] Need to optimized with async prefetch.

This commit is contained in:
Zijie Tian
2025-12-15 06:58:40 +08:00
parent 1081ab51ea
commit b8b6478506
9 changed files with 556 additions and 404 deletions

View File

@@ -32,6 +32,8 @@ class Context:
# Starting position within block where decode tokens began (for accumulated token tracking)
# Used when batching decode offloads - we need to attend to all accumulated tokens
decode_start_pos_in_block: int = 0
# Current chunk index for ring buffer pipeline (prefill only)
current_chunk_idx: int = 0
_CONTEXT = Context()
@@ -57,6 +59,7 @@ def set_context(
chunked_seq=None,
decode_pos_in_block=0,
decode_start_pos_in_block=0,
current_chunk_idx=0,
):
global _CONTEXT
_CONTEXT = Context(
@@ -75,6 +78,7 @@ def set_context(
chunked_seq=chunked_seq,
decode_pos_in_block=decode_pos_in_block,
decode_start_pos_in_block=decode_start_pos_in_block,
current_chunk_idx=current_chunk_idx,
)