Separate Qwen2 from Qwen3 implementation: - Qwen2: Uses QKV bias, no QK norm - Qwen3: Has optional QK norm when no bias Tested with Qwen2.5-7B-Instruct-1M, RULER niah_single_1 passed. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
12 lines
377 B
Python
12 lines
377 B
Python
"""Model registry and model implementations."""
|
|
|
|
from nanovllm.models.registry import register_model, get_model_class, MODEL_REGISTRY
|
|
|
|
# Import models to trigger registration
|
|
from nanovllm.models import qwen2
|
|
from nanovllm.models import qwen3
|
|
from nanovllm.models import llama
|
|
from nanovllm.models import glm4
|
|
|
|
__all__ = ["register_model", "get_model_class", "MODEL_REGISTRY"]
|