[fix] Fixed kvcache offload bugs.
This commit is contained in:
@@ -27,6 +27,8 @@ class Context:
|
||||
prev_kv_chunks: List[Tuple[torch.Tensor, torch.Tensor]] = field(default_factory=list)
|
||||
# Current sequence being processed (for chunked prefill to load KV)
|
||||
chunked_seq: Any = None
|
||||
# Position within block for decode (used for reading from Decode区)
|
||||
decode_pos_in_block: int = 0
|
||||
|
||||
|
||||
_CONTEXT = Context()
|
||||
@@ -50,6 +52,7 @@ def set_context(
|
||||
chunk_offset=0,
|
||||
offload_engine=None,
|
||||
chunked_seq=None,
|
||||
decode_pos_in_block=0,
|
||||
):
|
||||
global _CONTEXT
|
||||
_CONTEXT = Context(
|
||||
@@ -66,6 +69,7 @@ def set_context(
|
||||
chunk_offset=chunk_offset,
|
||||
offload_engine=offload_engine,
|
||||
chunked_seq=chunked_seq,
|
||||
decode_pos_in_block=decode_pos_in_block,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user