warmup and allocate
This commit is contained in:
@@ -6,7 +6,7 @@ from transformers import AutoConfig
|
||||
@dataclass
|
||||
class Config:
|
||||
model: str
|
||||
max_num_batched_tokens: int = 32768
|
||||
max_num_batched_tokens: int = 16384
|
||||
max_num_seqs: int = 512
|
||||
max_model_len: int = 4096
|
||||
gpu_memory_utilization: float = 0.9
|
||||
@@ -23,3 +23,4 @@ class Config:
|
||||
assert 1 <= self.tensor_parallel_size <= 8
|
||||
self.hf_config = AutoConfig.from_pretrained(self.model)
|
||||
self.max_model_len = min(self.max_model_len, self.hf_config.max_position_embeddings)
|
||||
assert self.max_num_batched_tokens >= self.max_model_len
|
||||
|
||||
Reference in New Issue
Block a user