support tensor parallel

2025-06-15 01:31:24 +08:00
parent b6136383c9
commit 53b3ef2e32
9 changed files with 102 additions and 31 deletions
--- a/nanovllm/config.py
+++ b/nanovllm/config.py
@@ -9,6 +9,7 @@ class Config:
    max_num_seqs: int = 512
    max_model_len: int = 4096
    gpu_memory_utilization: float = 0.9
+    tensor_parallel_size: int = 1
    enforce_eager: bool = False
    hf_config: AutoConfig | None = None
    eos: int = -1
@@ -17,4 +18,4 @@ class Config:

    def __post_init__(self):
        assert self.model
-        assert self.kvcache_block_size % 256 == 0
+        assert self.kvcache_block_size % 256 == 0