| """HawkGPT 0.3 — Config: fast learning, math-focused, efficient.""" | |
| import os | |
| PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| DATA_DIR = os.path.join(PROJECT_DIR, "data") | |
| CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints") | |
| LOG_DIR = os.path.join(PROJECT_DIR, "logs") | |
| TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json") | |
| DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt") | |
| MAX_SEQ_LEN = 256 | |
| TRAIN_SPLIT = 0.9 | |
| VOCAB_SIZE = 32000 | |
| # Model — ~30M params | |
| EMBED_DIM = 512 | |
| NUM_HEADS = 8 | |
| NUM_LAYERS = 8 | |
| FF_DIM = 2048 | |
| DROPOUT = 0.05 # Low dropout — we want to memorize math | |
| BATCH_SIZE = 16 | |
| GRAD_ACCUM_STEPS = 4 # Effective batch = 64 | |
| LEARNING_RATE = 5e-4 # Aggressive LR for fast convergence | |
| WEIGHT_DECAY = 0.01 | |
| WARMUP_STEPS = 200 | |
| MAX_EPOCHS = 30 | |
| PATIENCE = 10 | |
| MIXED_PRECISION = False # float32 — stable for 30M model | |
| MAX_GRAD_NORM = 1.0 | |
| # Checkpointing | |
| SAVE_EVERY_N_EPOCHS = 1 # Save checkpoint every N epochs | |
| KEEP_TOP_N_CHECKPOINTS = 3 | |