File size: 459 Bytes
8dbce4f
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
"""KeyLM model configuration.

KeyLM-75M is a from-scratch small language model. Its decoder block is a
Qwen3-style layout (grouped-query attention, RoPE, SwiGLU, and per-head
QK-RMSNorm), so the configuration inherits Qwen3Config and only overrides the
``model_type`` so the model carries its own identity on the Hub.
"""

from transformers.models.qwen3.configuration_qwen3 import Qwen3Config


class KeyLM75MConfig(Qwen3Config):
    model_type = "keylm75m"