Merge branch 'zijie/add-llama-1': Add multi-model support
- Add model registry system for dynamic model loading - Implement LlamaForCausalLM with Llama3 RoPE scaling - Register Qwen3ForCausalLM and Qwen2ForCausalLM - Update ModelRunner to use get_model_class() for dynamic model selection Tested: needle 32k test PASSED Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,7 +6,7 @@ from multiprocessing.shared_memory import SharedMemory
|
||||
|
||||
from nanovllm.config import Config, SparsePolicyType
|
||||
from nanovllm.engine.sequence import Sequence
|
||||
from nanovllm.models.qwen3 import Qwen3ForCausalLM
|
||||
from nanovllm.models import get_model_class
|
||||
from nanovllm.layers.sampler import GreedySampler
|
||||
from nanovllm.utils.context import set_context, get_context, reset_context
|
||||
from nanovllm.utils.loader import load_model
|
||||
@@ -32,7 +32,8 @@ class ModelRunner:
|
||||
default_dtype = torch.get_default_dtype()
|
||||
torch.set_default_dtype(hf_config.torch_dtype)
|
||||
torch.set_default_device("cuda")
|
||||
self.model = Qwen3ForCausalLM(hf_config)
|
||||
model_class = get_model_class(hf_config)
|
||||
self.model = model_class(hf_config)
|
||||
load_model(self.model, config.model)
|
||||
self.sampler = GreedySampler()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user