Dwarf-15M / configuration_dwarf.py
ThingsAI's picture
Upload folder using huggingface_hub
a6a57d5 verified
Raw
History Blame Contribute Delete
854 Bytes
from transformers import PretrainedConfig
class DwarfConfig(PretrainedConfig):
model_type = "dwarf"
def __init__(self, vocab_size=8202, d_model=320, n_layers=12, n_heads=5,
n_kv_heads=1, d_ff=864, max_seq_len=2048, rope_theta=10000.0,
norm_eps=1e-5, head_dim=64, **kwargs):
self.vocab_size = vocab_size
self.d_model = d_model
self.n_layers = n_layers
self.n_heads = n_heads
self.n_kv_heads = n_kv_heads
self.d_ff = d_ff
self.max_seq_len = max_seq_len
self.rope_theta = rope_theta
self.norm_eps = norm_eps
self.head_dim = head_dim
self.num_hidden_layers = n_layers
self.hidden_size = d_model
self.num_attention_heads = n_heads
self.num_key_value_heads = n_kv_heads
super().__init__(**kwargs)