KeyLM-75M-Instruct / modeling_keylm.py
Eclipse-Senpai's picture
main commit
a7de577 verified
raw
history blame contribute delete
861 Bytes
"""KeyLM model implementation.
KeyLM-75M uses a Qwen3-style decoder (GQA + RoPE + SwiGLU + per-head
QK-RMSNorm). Rather than vendor a full copy of the transformer, the classes
below specialise the upstream Qwen3 implementation and bind it to KeyLMConfig
so the model loads under its own name via `trust_remote_code=True`.
"""
try:
from transformers.models.qwen3.modeling_qwen3 import Qwen3ForCausalLM, Qwen3Model
except ImportError as exc: # pragma: no cover - guidance for old transformers
raise ImportError(
"KeyLM requires a transformers version that ships the Qwen3 model "
"(transformers>=4.51). Please upgrade transformers."
) from exc
from .configuration_keylm import KeyLM75MConfig
class KeyLM75MModel(Qwen3Model):
config_class = KeyLM75MConfig
class KeyLM75M(Qwen3ForCausalLM):
config_class = KeyLM75MConfig