File size: 861 Bytes
8dbce4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
"""KeyLM model implementation.

KeyLM-75M uses a Qwen3-style decoder (GQA + RoPE + SwiGLU + per-head
QK-RMSNorm). Rather than vendor a full copy of the transformer, the classes
below specialise the upstream Qwen3 implementation and bind it to KeyLMConfig
so the model loads under its own name via `trust_remote_code=True`.
"""

try:
    from transformers.models.qwen3.modeling_qwen3 import Qwen3ForCausalLM, Qwen3Model
except ImportError as exc:  # pragma: no cover - guidance for old transformers
    raise ImportError(
        "KeyLM requires a transformers version that ships the Qwen3 model "
        "(transformers>=4.51). Please upgrade transformers."
    ) from exc

from .configuration_keylm import KeyLM75MConfig


class KeyLM75MModel(Qwen3Model):
    config_class = KeyLM75MConfig


class KeyLM75M(Qwen3ForCausalLM):
    config_class = KeyLM75MConfig