Files
nano-vllm/nanovllm/config.py
GeeeekExplorer b98e1ca305 fix
2025-06-10 21:25:54 +08:00

20 lines
514 B
Python

from dataclasses import dataclass
from transformers import AutoConfig
@dataclass
class Config:
model: str = ''
max_num_batched_tokens: int = 32768
max_num_seqs: int = 512
max_model_len: int = 4096
gpu_memory_utilization: float = 0.95
enforce_eager: bool = False
hf_config: AutoConfig | None = None
eos: int = -1
kvcache_block_size: int = 256
num_kvcache_blocks: int = -1
def __post_init__(self):
assert self.model
assert self.kvcache_block_size == 256