UltraBase / configuration_ultrabase.py
56m's picture
Upload 6 files
2144393 verified
Raw
History Blame Contribute Delete
1.06 kB
from transformers import PretrainedConfig
class UltraBaseConfig(PretrainedConfig):
model_type = "ultrabase"
def __init__(
self,
vocab_size=49152,
d_model=256,
n_layers=16,
n_heads=12,
latent_dim=64,
head_dim=16,
bypass_rate=0.375,
num_private_experts=6,
num_shared_experts=1,
d_ff=256,
bos_token_id=0,
eos_token_id=0,
tie_word_embeddings=True,
**kwargs
):
super().__init__(
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
tie_word_embeddings=tie_word_embeddings,
**kwargs
)
self.vocab_size = vocab_size
self.d_model = d_model
self.n_layers = n_layers
self.n_heads = n_heads
self.latent_dim = latent_dim
self.head_dim = head_dim
self.bypass_rate = bypass_rate
self.num_private_experts = num_private_experts
self.num_shared_experts = num_shared_experts
self.d_ff = d_ff