[feat] Added chunked prefill and kvcache offload mechenism.

This commit is contained in:
Zijie Tian
2025-12-10 03:47:37 +08:00
parent 204fe2b38f
commit 0b6f19242d
25 changed files with 4414 additions and 61 deletions

View File

@@ -31,7 +31,7 @@ class LLMEngine:
self.model_runner = ModelRunner(config, 0, self.events)
self.tokenizer = AutoTokenizer.from_pretrained(config.model, use_fast=True)
config.eos = self.tokenizer.eos_token_id
self.scheduler = Scheduler(config)
self.scheduler = Scheduler(config, self.model_runner.kvcache_manager)
atexit.register(self.exit)
def exit(self):