[feat] Added chunked prefill and kvcache offload mechenism.
This commit is contained in:
@@ -31,7 +31,7 @@ class LLMEngine:
|
||||
self.model_runner = ModelRunner(config, 0, self.events)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(config.model, use_fast=True)
|
||||
config.eos = self.tokenizer.eos_token_id
|
||||
self.scheduler = Scheduler(config)
|
||||
self.scheduler = Scheduler(config, self.model_runner.kvcache_manager)
|
||||
atexit.register(self.exit)
|
||||
|
||||
def exit(self):
|
||||
|
||||
Reference in New Issue
Block a user