File size: 861 Bytes

8dbce4f

"""KeyLM model implementation.

KeyLM-75M uses a Qwen3-style decoder (GQA + RoPE + SwiGLU + per-head
QK-RMSNorm). Rather than vendor a full copy of the transformer, the classes
below specialise the upstream Qwen3 implementation and bind it to KeyLMConfig
so the model loads under its own name via `trust_remote_code=True`.
"""

try:
    from transformers.models.qwen3.modeling_qwen3 import Qwen3ForCausalLM, Qwen3Model
except ImportError as exc:  # pragma: no cover - guidance for old transformers
    raise ImportError(
        "KeyLM requires a transformers version that ships the Qwen3 model "
        "(transformers>=4.51). Please upgrade transformers."
    ) from exc

from .configuration_keylm import KeyLM75MConfig


class KeyLM75MModel(Qwen3Model):
    config_class = KeyLM75MConfig


class KeyLM75M(Qwen3ForCausalLM):
    config_class = KeyLM75MConfig