Merge pull request #67 from PeterDing/fix/decoding-positions

fix(model_runner): correct position indexing to be 0-based
This commit is contained in:
Xingkai Yu
2025-08-31 18:05:45 +08:00
committed by GitHub

View File

@@ -167,7 +167,7 @@ class ModelRunner:
context_lens = []
for seq in seqs:
input_ids.append(seq.last_token)
positions.append(len(seq))
positions.append(len(seq) - 1)
context_lens.append(len(seq))
slot_mapping.append(seq.block_table[-1] * self.block_size + seq.last_block_num_tokens - 1)
input_ids = torch.tensor(input_ids, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True)