TuKoResearch
/

gslm-ulm

Model card Files Files and versions

klemenk commited on Oct 3, 2025

Commit

959fd59

·

verified ·

1 Parent(s): bfab334

Create configuration_gslm.py

Files changed (1) hide show

configuration_gslm.py +50 -0

configuration_gslm.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from transformers import PretrainedConfig
+class GSLMConfig(PretrainedConfig):
+    model_type = "gslm"
+    def __init__(
+        self,
+        vocab_size=200,          # number of discrete units
+        n_layer=12,
+        n_head=12,
+        n_embd=768,
+        seq_len=4096,
+        dropout=0.1,
+        bias=False,              # use bias in linear/ln layers
+        pos_embed="sinusoidal",  # 'sinusoidal' | 'learned' | 'rope' | 'none'
+        use_rope=None,           # kept for AuriStream API parity; if set, overrides pos_embed
+        rope_theta=500000,
+        n_pred_steps=1,          # >1 enables auxiliary future heads like AuriStream
+        activation="gelu",       # 'gelu' | 'silu'
+        norm_type="layernorm",   # 'layernorm' (fairseq compat) | 'rmsnorm'
+        attn_impl="fused_qkv",   # 'fused_qkv' (AuriStream-like) | 'separate_qkv' (fairseq-like)
+        tie_word_embeddings=True,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.vocab_size = int(vocab_size)
+        self.n_layer = int(n_layer)
+        self.n_head = int(n_head)
+        self.n_embd = int(n_embd)
+        self.seq_len = int(seq_len)
+        self.dropout = float(dropout)
+        self.bias = bool(bias)
+        # Positional embedding config
+        if use_rope is not None:
+            self.pos_embed = "rope" if use_rope else "learned"
+        else:
+            self.pos_embed = pos_embed
+        self.rope_theta = float(rope_theta)
+        # Multi-step heads
+        self.n_pred_steps = int(n_pred_steps)
+        # Blocks
+        self.activation = activation
+        self.norm_type = norm_type
+        self.attn_impl = attn_impl
+        # HF compat
+        self.tie_word_embeddings = bool(tie_word_embeddings)