KeyLM-75M / configuration_keylm.py

Add KeyLM-75M base model (bf16, from-scratch, ~18B tokens)

8dbce4f verified 2 days ago

459 Bytes

	"""KeyLM model configuration.

	KeyLM-75M is a from-scratch small language model. Its decoder block is a
	Qwen3-style layout (grouped-query attention, RoPE, SwiGLU, and per-head
	QK-RMSNorm), so the configuration inherits Qwen3Config and only overrides the
	``model_type`` so the model carries its own identity on the Hub.
	"""

	from transformers.models.qwen3.configuration_qwen3 import Qwen3Config


	class KeyLM75MConfig(Qwen3Config):
	model_type = "keylm75m"