from dataclasses import dataclass from pathlib import Path @dataclass class SmallGPTConfig: block_size: int = 48 batch_size: int = 20 d_model: int = 96 n_heads: int = 4 n_layers: int = 3 dropout: float = 0.1 learning_rate: float = 2.5e-3 bootstrap_steps: int = 80 cpu_threads: int = 4 seed: int = 42 @property def root_dir(self) -> Path: return Path(__file__).resolve().parents[1] @property def checkpoint_path(self) -> Path: return self.root_dir / "artifacts" / "small_gpt_checkpoint.pt"