[WIP] need change flashattention to debug.

This commit is contained in:
Zijie Tian
2026-01-01 00:58:22 +08:00
parent 30462fe89a
commit 965c8aff12
3 changed files with 49 additions and 3 deletions

View File

@@ -1007,9 +1007,8 @@ class OffloadEngine:
if not self._debug_mode or not self._debug_hooks:
return
# GPU cache has no layer dimension
k = self.k_cache_gpu[slot_idx]
v = self.v_cache_gpu[slot_idx]
# Use get_kv_for_slot for consistency with attention.py
k, v = self.get_kv_for_slot(slot_idx)
for hook in self._debug_hooks:
try: