ThomasTheMaker's picture
Upload folder using huggingface_hub
feba2ad verified
raw
history blame
662 Bytes
"""
Model Config
Specifies the hyperparameters for the Pico model/model architecture.
"""
from dataclasses import dataclass
from typing import Optional
from ._constants import BATCH_SIZE, MAX_SEQ_LEN, VOCAB_SIZE
@dataclass
class ModelConfig:
model_type: str = "pico_decoder"
# Pico Decoder default hyperparameters
d_model: int = 768
n_layers: int = 12
vocab_size: int = VOCAB_SIZE
batch_size: int = BATCH_SIZE
max_seq_len: int = MAX_SEQ_LEN
attention_n_heads: int = 12
attention_n_kv_heads: Optional[int] = 4
activation_hidden_dim: int = 3072
norm_eps: float = 1e-6
position_emb_theta: float = 10000.0