From e436ec861fe0d4ab55eeb31b0da41ed9b914f161 Mon Sep 17 00:00:00 2001 From: Zijie Tian Date: Wed, 28 Jan 2026 14:21:23 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20config:=20update=20test=5F?= =?UTF-8?q?ruler.py=20defaults?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - max_new_tokens: 128 → 16 (sufficient for NIAH answers) - block_size: 1024 → 4096 (better performance) Co-Authored-By: Claude Opus 4.5 --- tests/test_ruler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_ruler.py b/tests/test_ruler.py index 1c977c0..1451f6f 100644 --- a/tests/test_ruler.py +++ b/tests/test_ruler.py @@ -108,7 +108,7 @@ DEFAULT_MODEL = os.path.expanduser("~/models/Llama-3.1-8B-Instruct") # Note: max_model_len must be > max_input_len to leave room for output tokens # 64k benchmark has inputs up to 65536 tokens, so we need 65536 + 128 = 65664 DEFAULT_MAX_MODEL_LEN = 65664 -DEFAULT_MAX_NEW_TOKENS = 128 # Larger for multi-value tasks +DEFAULT_MAX_NEW_TOKENS = 16 # Sufficient for NIAH single-value answers # Task categories for evaluation NIAH_TASKS = ["niah_single_1", "niah_single_2", "niah_single_3", @@ -323,7 +323,7 @@ def run_ruler_benchmark( max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS, enable_cpu_offload: bool = False, num_gpu_blocks: int = 4, - block_size: int = 1024, + block_size: int = 4096, num_kv_buffers: int = 4, gpu_utilization: float = 0.9, enforce_eager: bool = True, @@ -528,8 +528,8 @@ if __name__ == "__main__": help="Enable CPU offload mode") parser.add_argument("--num-gpu-blocks", type=int, default=4, help="Number of GPU blocks for CPU offload (default: 4)") - parser.add_argument("--block-size", type=int, default=1024, - help="KV cache block size (default: 1024)") + parser.add_argument("--block-size", type=int, default=4096, + help="KV cache block size (default: 4096)") parser.add_argument("--num-kv-buffers", type=int, default=4, help="Number of KV buffers for ring buffer (default: 4)") parser.add_argument("--gpu-utilization", type=float, default=0.9,