✨ feat: add configurable stride and chunk_size for XAttention BSA
- Add sparse_chunk_size config option (default: 16384) - Pass stride, chunk_size, use_triton through factory function - Add --sparse-stride CLI option to test_ruler.py Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -79,6 +79,7 @@ def create_kvcache_manager(config: "Config") -> KVCacheManager:
|
||||
'threshold': getattr(config, 'sparse_threshold', 0.9),
|
||||
'use_triton': getattr(config, 'sparse_use_triton', True),
|
||||
'stride': getattr(config, 'sparse_stride', 8),
|
||||
'chunk_size': getattr(config, 'sparse_chunk_size', 16384),
|
||||
}
|
||||
|
||||
sparse_policy = create_sparse_policy(sparse_policy_type, **policy_kwargs)
|
||||
|
||||
Reference in New Issue
Block a user