fix
This commit is contained in:
@@ -44,7 +44,7 @@ class ModelRunner:
|
||||
module.v_cache = self.kv_cache[1, layer_id]
|
||||
layer_id += 1
|
||||
|
||||
def preare_block_tables(self, seqs: list[Sequence]):
|
||||
def prepare_block_tables(self, seqs: list[Sequence]):
|
||||
max_len = max(len(seq.block_table) for seq in seqs)
|
||||
block_tables = [
|
||||
seq.block_table + [-1] * (max_len - len(seq.block_table))
|
||||
@@ -84,7 +84,7 @@ class ModelRunner:
|
||||
assert len(input_ids) == cu_seqlens_q[-1]
|
||||
if cu_seqlens_k[-1] > cu_seqlens_q[-1]: # prefix cache
|
||||
context_lens = torch.tensor([len(seq) for seq in seqs], dtype=torch.int32, pin_memory=True).cuda(non_blocking=True)
|
||||
block_tables = self.preare_block_tables(seqs)
|
||||
block_tables = self.prepare_block_tables(seqs)
|
||||
input_ids = torch.tensor(input_ids, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True)
|
||||
positions = torch.tensor(positions, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True)
|
||||
cu_seqlens_q = torch.tensor(cu_seqlens_q, dtype=torch.int32, pin_memory=True).cuda(non_blocking=True)
|
||||
@@ -107,7 +107,7 @@ class ModelRunner:
|
||||
positions = torch.tensor(positions, dtype=torch.int64, pin_memory=True).cuda(non_blocking=True)
|
||||
slot_mapping = torch.tensor(slot_mapping, dtype=torch.int32, pin_memory=True).cuda(non_blocking=True)
|
||||
context_lens = torch.tensor(context_lens, dtype=torch.int32, pin_memory=True).cuda(non_blocking=True)
|
||||
block_tables = self.preare_block_tables(seqs)
|
||||
block_tables = self.prepare_block_tables(seqs)
|
||||
set_context(False, slot_mapping=slot_mapping, context_lens=context_lens, block_tables=block_tables)
|
||||
return input_ids, positions
|
||||
|
||||
|
||||
Reference in New Issue
Block a user