"""KeyLM model implementation. KeyLM-75M uses a Qwen3-style decoder (GQA + RoPE + SwiGLU + per-head QK-RMSNorm). Rather than vendor a full copy of the transformer, the classes below specialise the upstream Qwen3 implementation and bind it to KeyLMConfig so the model loads under its own name via `trust_remote_code=True`. """ try: from transformers.models.qwen3.modeling_qwen3 import Qwen3ForCausalLM, Qwen3Model except ImportError as exc: # pragma: no cover - guidance for old transformers raise ImportError( "KeyLM requires a transformers version that ships the Qwen3 model " "(transformers>=4.51). Please upgrade transformers." ) from exc from .configuration_keylm import KeyLM75MConfig class KeyLM75MModel(Qwen3Model): config_class = KeyLM75MConfig class KeyLM75M(Qwen3ForCausalLM): config_class = KeyLM75MConfig