from dataclasses import dataclass from transformers import AutoConfig @dataclass class Config: model: str = '' max_num_batched_tokens: int = 32768 max_num_seqs: int = 512 max_model_len: int = 4096 gpu_memory_utilization: float = 0.9 tensor_parallel_size: int = 1 enforce_eager: bool = False hf_config: AutoConfig | None = None eos: int = -1 kvcache_block_size: int = 256 num_kvcache_blocks: int = -1 def __post_init__(self): assert self.model assert self.kvcache_block_size % 256 == 0