rex1-base / configuration_rex.py
DavidSeyserHF's picture
Upload REX1 step 29000
a61b335 verified
"""Hugging Face configuration for REX."""
from __future__ import annotations
from transformers import PretrainedConfig
class RexConfig(PretrainedConfig):
model_type = "rex"
def __init__(
self,
vocab_size: int = 50_257,
max_seq_len: int = 2048,
d_model: int = 1536,
n_heads: int = 16,
n_layers: int = 8,
recurrence_steps: int = 2,
ffn_dim: int = 3968,
dropout: float = 0.0,
norm_eps: float = 1e-5,
tie_embeddings: bool = True,
use_step_embeddings: bool = True,
initializer_range: float = 0.02,
tokenizer_name: str = "gpt2",
**kwargs,
):
super().__init__(tie_word_embeddings=tie_embeddings, **kwargs)
self.vocab_size = vocab_size
self.max_seq_len = max_seq_len
self.d_model = d_model
self.n_heads = n_heads
self.n_layers = n_layers
self.recurrence_steps = recurrence_steps
self.ffn_dim = ffn_dim
self.dropout = dropout
self.norm_eps = norm_eps
self.tie_embeddings = tie_embeddings
self.use_step_embeddings = use_step_embeddings
self.initializer_range = initializer_range
self.tokenizer_name = tokenizer_name
def to_core_dict(self) -> dict[str, object]:
return {
"vocab_size": self.vocab_size,
"max_seq_len": self.max_seq_len,
"d_model": self.d_model,
"n_heads": self.n_heads,
"n_layers": self.n_layers,
"recurrence_steps": self.recurrence_steps,
"ffn_dim": self.ffn_dim,
"dropout": self.dropout,
"norm_eps": self.norm_eps,
"tie_embeddings": self.tie_embeddings,
"use_step_embeddings": self.use_step_embeddings,
"initializer_range": self.initializer_range,
}