warmup and allocate

This commit is contained in:
GeeeekExplorer
2025-06-27 01:51:57 +08:00
parent cfc4cb6710
commit 658520b788
4 changed files with 25 additions and 8 deletions

View File

@@ -63,7 +63,8 @@ class Attention(nn.Module):
context = get_context()
k_cache = self.k_cache
v_cache = self.v_cache
store_kvcache(k, v, k_cache, v_cache, context.slot_mapping)
if k_cache.numel() and v_cache.numel():
store_kvcache(k, v, k_cache, v_cache, context.slot_mapping)
if context.is_prefill:
if context.block_tables is not None: # prefix cache
k, v = k_cache, v_cache