Seed-0.5B / configuration_seed.py
merterbak's picture
Upload folder using huggingface_hub
d68c7eb verified
from transformers import PretrainedConfig
class SeedConfig(PretrainedConfig):
model_type = "seed"
def __init__(
self,
vocab_size: int = 64000,
n_embd: int = 1024,
n_layer: int = 28,
n_head: int = 16,
n_kv_head: int = 8,
head_dim: int = 128,
mlp_hidden_dim: int = 3072,
block_size: int = 4096,
bias: bool = False,
dropout: float = 0.0,
rope_theta: float = 1000000.0,
rope_scaling_type: str = "none",
rope_scaling_factor: float = 1.0,
rms_norm_eps: float = 1e-6,
tie_word_embeddings: bool = True,
**kwargs,
):
self.vocab_size = vocab_size
self.n_embd = n_embd
self.n_layer = n_layer
self.n_head = n_head
self.n_kv_head = n_kv_head
self.head_dim = head_dim
self.mlp_hidden_dim = mlp_hidden_dim
self.block_size = block_size
self.bias = bias
self.dropout = dropout
self.rope_theta = rope_theta
self.rope_scaling_type = rope_scaling_type
self.rope_scaling_factor = rope_scaling_factor
self.rms_norm_eps = rms_norm_eps
self.hidden_size = n_embd
self.num_hidden_layers = n_layer
self.num_attention_heads = n_head
self.num_key_value_heads = n_kv_head
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)