[refactor] Delete unnesscessory test, and refacrtor the offload prefix cache.

This commit is contained in:
Zijie Tian
2026-01-05 20:31:42 +08:00
parent 247c5312d9
commit e554d5482b
20 changed files with 258 additions and 3630 deletions

View File

@@ -62,6 +62,8 @@ class LLMEngine:
token_ids = self.model_runner.call("run", seqs, is_prefill)
self.scheduler.postprocess(seqs, token_ids)
outputs = [(seq.seq_id, seq.completion_token_ids) for seq in seqs if seq.is_finished]
#> Calculate number of tokens processed
num_tokens = sum(len(seq) for seq in seqs) if is_prefill else -len(seqs)
return outputs, num_tokens