[WIP] Before integrate the xattn operator.
This commit is contained in:
@@ -72,6 +72,14 @@ def create_kvcache_manager(config: "Config") -> KVCacheManager:
|
||||
'topk_blocks': getattr(config, 'sparse_topk_blocks', 8),
|
||||
'threshold_blocks': getattr(config, 'sparse_threshold_blocks', 4),
|
||||
}
|
||||
elif sparse_policy_type == SparsePolicyType.XATTN_BSA:
|
||||
policy_kwargs = {
|
||||
'block_size': getattr(config, 'sparse_block_size', 128),
|
||||
'samples_per_chunk': getattr(config, 'sparse_samples_per_chunk', 128),
|
||||
'threshold': getattr(config, 'sparse_threshold', 0.9),
|
||||
'use_triton': getattr(config, 'sparse_use_triton', True),
|
||||
'stride': getattr(config, 'sparse_stride', 8),
|
||||
}
|
||||
|
||||
sparse_policy = create_sparse_policy(sparse_policy_type, **policy_kwargs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user