[WIP] fixing attention compute error.

2025-12-30 00:31:48 +08:00
parent bf4c63c7ec
commit 89f8020d38
12 changed files with 2175 additions and 103 deletions
--- a/tests/test_chunked_attention.py
+++ b/tests/test_chunked_attention.py
@@ -93,9 +93,9 @@ TEST_CASES = [
    (1, 4, 256, 8, 128),
    (1, 4, 512, 8, 128),
    (1, 8, 512, 8, 128),
-    (1, 4, 1024, 8, 128),
-    (1, 4, 1024, 32, 128),   # More heads
-    (1, 8, 256, 8, 64),      # Smaller head dim
+    (1, 32, 1024, 8, 128),
+    (1, 32, 1024, 32, 128),   # More heads
+    (1, 32, 256, 8, 64),      # Smaller head dim
 ]

 DTYPES = [torch.float16, torch.bfloat16]