same as vllm

2025-06-27 18:50:56 +08:00
parent 658520b788
commit 1caeec8dfa
3 changed files with 20 additions and 23 deletions
--- a/nanovllm/layers/attention.py
+++ b/nanovllm/layers/attention.py
@@ -61,8 +61,7 @@ class Attention(nn.Module):
        k = k.view(-1, self.num_kv_heads, self.head_dim)
        v = v.view(-1, self.num_kv_heads, self.head_dim)
        context = get_context()
-        k_cache = self.k_cache
-        v_cache = self.v_cache
+        k_cache, v_cache = self.k_cache, self.v_cache
        if k_cache.numel() and v_cache.numel():
            store_kvcache(k, v, k_cache, v_cache, context.slot_mapping)
        if context.is_prefill: