Files
nano-vllm/nanovllm/config.py
2025-06-15 01:31:24 +08:00

22 lines
552 B
Python

from dataclasses import dataclass
from transformers import AutoConfig
@dataclass
class Config:
model: str = ''
max_num_batched_tokens: int = 32768
max_num_seqs: int = 512
max_model_len: int = 4096
gpu_memory_utilization: float = 0.9
tensor_parallel_size: int = 1
enforce_eager: bool = False
hf_config: AutoConfig | None = None
eos: int = -1
kvcache_block_size: int = 256
num_kvcache_blocks: int = -1
def __post_init__(self):
assert self.model
assert self.kvcache_block_size % 256 == 0