[claudesquad] update from 'int-minference-1' on 08 Jan 26 23:22 CST
This commit is contained in:
@@ -25,6 +25,7 @@ class FullAttentionPolicy(SparsePolicy):
|
||||
# Full attention supports both prefill and decode
|
||||
supports_prefill = True
|
||||
supports_decode = True
|
||||
requires_block_selection = False # Load all blocks, no selective loading
|
||||
|
||||
def select_blocks(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user