fix
This commit is contained in:
@@ -4,7 +4,7 @@ from transformers import AutoConfig, AutoTokenizer
|
||||
|
||||
from nanovllm.config import Config
|
||||
from nanovllm.sampling_params import SamplingParams
|
||||
from nanovllm.engine.sequence import Sequence, SequenceStatus
|
||||
from nanovllm.engine.sequence import Sequence
|
||||
from nanovllm.engine.scheduler import Scheduler
|
||||
from nanovllm.engine.model_runner import ModelRunner
|
||||
|
||||
@@ -34,7 +34,7 @@ class LLMEngine:
|
||||
seqs, is_prefill = self.scheduler.schedule()
|
||||
token_ids = self.model_runner.run(seqs, is_prefill)
|
||||
self.scheduler.postprocess(seqs, token_ids)
|
||||
outputs = [(seq.seq_id, seq[seq.num_prompt_tokens:]) for seq in seqs if seq.status == SequenceStatus.FINISHED]
|
||||
outputs = [(seq.seq_id, seq.completion_token_ids) for seq in seqs if seq.is_finished]
|
||||
num_tokens = sum(len(seq) for seq in seqs) if is_prefill else -len(seqs)
|
||||
return outputs, num_tokens
|
||||
|
||||
|
||||
Reference in New Issue
Block a user