fix(model_runner): correct position indexing to be 0-based

- Change position calculation from len(seq) to len(seq) - 1
This commit is contained in:
PeterDing
2025-07-04 14:29:12 +08:00
parent 38baf0bbe4
commit f5b4840276

View File

@@ -167,7 +167,7 @@ class ModelRunner:
context_lens = [] context_lens = []
for seq in seqs: for seq in seqs:
input_ids.append(seq.last_token) input_ids.append(seq.last_token)
positions.append(len(seq)) positions.append(len(seq) - 1)
context_lens.append(len(seq)) context_lens.append(len(seq))
slot_mapping.append(seq.block_table[-1] * self.block_size + seq.last_block_num_tokens - 1) slot_mapping.append(seq.block_table[-1] * self.block_size + seq.last_block_num_tokens - 1)
input_ids = torch.tensor(input_ids, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True) input_ids = torch.tensor(input_ids, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True)