same as vllm

This commit is contained in:
GeeeekExplorer
2025-06-27 18:50:56 +08:00
parent 658520b788
commit 1caeec8dfa
3 changed files with 20 additions and 23 deletions

View File

@@ -61,8 +61,7 @@ class Attention(nn.Module):
k = k.view(-1, self.num_kv_heads, self.head_dim)
v = v.view(-1, self.num_kv_heads, self.head_dim)
context = get_context()
k_cache = self.k_cache
v_cache = self.v_cache
k_cache, v_cache = self.k_cache, self.v_cache
if k_cache.numel() and v_cache.numel():
store_kvcache(k, v, k_cache, v_cache, context.slot_mapping)
if context.is_prefill: