Files
nano-vllm/nanovllm/config.py
GeeeekExplorer f16adb729e refactor
2025-06-12 09:41:12 +08:00

20 lines
517 B
Python

from dataclasses import dataclass
from transformers import AutoConfig
@dataclass
class Config:
model: str = ''
max_num_batched_tokens: int = 32768
max_num_seqs: int = 512
max_model_len: int = 4096
gpu_memory_utilization: float = 0.9
enforce_eager: bool = False
hf_config: AutoConfig | None = None
eos: int = -1
kvcache_block_size: int = 256
num_kvcache_blocks: int = -1
def __post_init__(self):
assert self.model
assert self.kvcache_block_size % 256 == 0