This commit is contained in:
GeeeekExplorer
2025-06-15 10:31:48 +08:00
parent c1fd4ea3c2
commit fc778a4da9
10 changed files with 19 additions and 22 deletions

View File

@@ -1,10 +1,11 @@
import os
from dataclasses import dataclass
from transformers import AutoConfig
@dataclass
class Config:
model: str = ''
model: str
max_num_batched_tokens: int = 32768
max_num_seqs: int = 512
max_model_len: int = 4096
@@ -17,5 +18,8 @@ class Config:
num_kvcache_blocks: int = -1
def __post_init__(self):
assert self.model
assert os.path.isdir(self.model)
assert self.kvcache_block_size % 256 == 0
assert 1 <= self.tensor_parallel_size <= 8
self.hf_config = AutoConfig.from_pretrained(self.model)
self.max_model_len = min(self.max_model_len, self.hf_config.max_position_embeddings)