[claudesquad] update from 'add-llama-1' on 10 Jan 26 21:03 CST

This commit is contained in:
Zijie Tian
2026-01-10 21:03:45 +08:00
parent 6575099a06
commit 03a8c033cb
10 changed files with 858 additions and 7 deletions

View File

@@ -6,7 +6,7 @@ from multiprocessing.shared_memory import SharedMemory
from nanovllm.config import Config
from nanovllm.engine.sequence import Sequence
from nanovllm.models.qwen3 import Qwen3ForCausalLM
from nanovllm.models import get_model_class
from nanovllm.layers.sampler import GreedySampler
from nanovllm.utils.context import set_context, get_context, reset_context
from nanovllm.utils.loader import load_model
@@ -32,7 +32,8 @@ class ModelRunner:
default_dtype = torch.get_default_dtype()
torch.set_default_dtype(hf_config.torch_dtype)
torch.set_default_device("cuda")
self.model = Qwen3ForCausalLM(hf_config)
model_class = get_model_class(hf_config)
self.model = model_class(hf_config)
load_model(self.model, config.model)
self.sampler = GreedySampler()