[claudesquad] update from 'int-minference-1' on 08 Jan 26 23:22 CST

2026-01-08 23:22:38 +08:00
parent 0bfe1984ef
commit ea4e904de0
11 changed files with 853 additions and 533 deletions
--- a/nanovllm/kvcache/sparse/minference.py
+++ b/nanovllm/kvcache/sparse/minference.py
@@ -30,6 +30,7 @@ class MInferencePolicy(SparsePolicy):

    supports_prefill = True
    supports_decode = False  # MInference is prefill-only sparse strategy
+    requires_block_selection = False  # MInference only affects attention computation, not KV load

    def __init__(
        self,