Merge pull request #67 from PeterDing/fix/decoding-positions
fix(model_runner): correct position indexing to be 0-based
This commit is contained in:
@@ -167,7 +167,7 @@ class ModelRunner:
|
||||
context_lens = []
|
||||
for seq in seqs:
|
||||
input_ids.append(seq.last_token)
|
||||
positions.append(len(seq))
|
||||
positions.append(len(seq) - 1)
|
||||
context_lens.append(len(seq))
|
||||
slot_mapping.append(seq.block_table[-1] * self.block_size + seq.last_block_num_tokens - 1)
|
||||
input_ids = torch.tensor(input_ids, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True)
|
||||
|
||||
Reference in New Issue
Block a user