[feat] Need to optimized with async prefetch.
This commit is contained in:
@@ -32,6 +32,8 @@ class Context:
|
||||
# Starting position within block where decode tokens began (for accumulated token tracking)
|
||||
# Used when batching decode offloads - we need to attend to all accumulated tokens
|
||||
decode_start_pos_in_block: int = 0
|
||||
# Current chunk index for ring buffer pipeline (prefill only)
|
||||
current_chunk_idx: int = 0
|
||||
|
||||
|
||||
_CONTEXT = Context()
|
||||
@@ -57,6 +59,7 @@ def set_context(
|
||||
chunked_seq=None,
|
||||
decode_pos_in_block=0,
|
||||
decode_start_pos_in_block=0,
|
||||
current_chunk_idx=0,
|
||||
):
|
||||
global _CONTEXT
|
||||
_CONTEXT = Context(
|
||||
@@ -75,6 +78,7 @@ def set_context(
|
||||
chunked_seq=chunked_seq,
|
||||
decode_pos_in_block=decode_pos_in_block,
|
||||
decode_start_pos_in_block=decode_start_pos_in_block,
|
||||
current_chunk_idx=current_chunk_idx,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user