[WIP] need change flashattention to debug.

2026-01-01 00:58:22 +08:00
parent 30462fe89a
commit 965c8aff12
3 changed files with 49 additions and 3 deletions
--- a/nanovllm/kvcache/offload_engine.py
+++ b/nanovllm/kvcache/offload_engine.py
@@ -1007,9 +1007,8 @@ class OffloadEngine:
        if not self._debug_mode or not self._debug_hooks:
            return

-        # GPU cache has no layer dimension
-        k = self.k_cache_gpu[slot_idx]
-        v = self.v_cache_gpu[slot_idx]
+        # Use get_kv_for_slot for consistency with attention.py
+        k, v = self.get_kv_for_slot(slot_idx)

        for hook in self._debug_hooks:
            try: