warmup and allocate

2025-06-27 01:51:57 +08:00
parent cfc4cb6710
commit 658520b788
4 changed files with 25 additions and 8 deletions
--- a/nanovllm/layers/attention.py
+++ b/nanovllm/layers/attention.py
@@ -63,7 +63,8 @@ class Attention(nn.Module):
        context = get_context()
        k_cache = self.k_cache
        v_cache = self.v_cache
-        store_kvcache(k, v, k_cache, v_cache, context.slot_mapping)
+        if k_cache.numel() and v_cache.numel():
+            store_kvcache(k, v, k_cache, v_cache, context.slot_mapping)
        if context.is_prefill:
            if context.block_tables is not None:    # prefix cache
                k, v = k_cache, v_cache