Upload folder using huggingface_hub

feba2ad verified 6 months ago

662 Bytes

	"""
	Model Config

	Specifies the hyperparameters for the Pico model/model architecture.
	"""

	from dataclasses import dataclass
	from typing import Optional

	from ._constants import BATCH_SIZE, MAX_SEQ_LEN, VOCAB_SIZE


	@dataclass
	class ModelConfig:
	model_type: str = "pico_decoder"

	# Pico Decoder default hyperparameters

	d_model: int = 768
	n_layers: int = 12

	vocab_size: int = VOCAB_SIZE
	batch_size: int = BATCH_SIZE
	max_seq_len: int = MAX_SEQ_LEN

	attention_n_heads: int = 12
	attention_n_kv_heads: Optional[int] = 4

	activation_hidden_dim: int = 3072

	norm_eps: float = 1e-6

	position_emb_theta: float = 10000.0