File size: 1,102 Bytes
6e14144 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | from dataclasses import dataclass, field, asdict
@dataclass
class ModelConfig:
vocab_size: int = 8192
n_layer: int = 8
n_head: int = 8
n_embd: int = 512
block_size: int = 1024
rope_base: float = 10000.0
mlp_mult: int = 4
dropout: float = 0.0
tie_embeddings: bool = True
@property
def head_dim(self) -> int:
assert self.n_embd % self.n_head == 0
return self.n_embd // self.n_head
@dataclass
class TrainConfig:
out_dir: str = "checkpoints"
data_dir: str = "data"
tokenizer_path: str = "data/tokenizer.json"
batch_size: int = 32
grad_accum: int = 4
max_steps: int = 20000
eval_interval: int = 500
eval_iters: int = 100
log_interval: int = 20
save_interval: int = 2000
lr: float = 6e-4
min_lr: float = 6e-5
warmup_steps: int = 200
weight_decay: float = 0.1
beta1: float = 0.9
beta2: float = 0.95
grad_clip: float = 1.0
dtype: str = "bfloat16"
compile: bool = True
seed: int = 1337
device: str = "cuda"
def to_dict(self):
return asdict(self)
|