From f5b48402761a7835c52059a18ea96308355041b2 Mon Sep 17 00:00:00 2001 From: PeterDing Date: Fri, 4 Jul 2025 14:29:12 +0800 Subject: [PATCH] fix(model_runner): correct position indexing to be 0-based - Change position calculation from len(seq) to len(seq) - 1 --- nanovllm/engine/model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanovllm/engine/model_runner.py b/nanovllm/engine/model_runner.py index d48a0eb..aaa29cf 100644 --- a/nanovllm/engine/model_runner.py +++ b/nanovllm/engine/model_runner.py @@ -167,7 +167,7 @@ class ModelRunner: context_lens = [] for seq in seqs: input_ids.append(seq.last_token) - positions.append(len(seq)) + positions.append(len(seq) - 1) context_lens.append(len(seq)) slot_mapping.append(seq.block_table[-1] * self.block_size + seq.last_block_num_tokens - 1) input_ids = torch.tensor(input_ids, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True)