support tensor parallel
This commit is contained in:
@@ -14,8 +14,6 @@ class Scheduler:
|
||||
self.block_manager = BlockManager(config.num_kvcache_blocks, config.kvcache_block_size)
|
||||
self.waiting: deque[Sequence] = deque()
|
||||
self.running: deque[Sequence] = deque()
|
||||
self.num_finished = 0
|
||||
self.num_tokens = 0
|
||||
|
||||
def is_finished(self):
|
||||
return not self.waiting and not self.running
|
||||
@@ -67,11 +65,9 @@ class Scheduler:
|
||||
self.waiting.appendleft(seq)
|
||||
|
||||
def postprocess(self, seqs: list[Sequence], token_ids: list[int]) -> list[bool]:
|
||||
self.num_tokens += len(token_ids)
|
||||
for seq, token_id in zip(seqs, token_ids):
|
||||
seq.append_token(token_id)
|
||||
if (not seq.ignore_eos and token_id == self.eos) or seq.num_completion_tokens == seq.max_tokens:
|
||||
seq.status = SequenceStatus.FINISHED
|
||||
self.block_manager.deallocate(seq)
|
||||
self.running.remove(seq)
|
||||
self.num_finished += 1
|
||||
|
||||
Reference in New Issue
Block a user