"""HawkGPT 0.3 — Config: fast learning, math-focused, efficient.""" import os PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(PROJECT_DIR, "data") CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints") LOG_DIR = os.path.join(PROJECT_DIR, "logs") TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json") DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt") MAX_SEQ_LEN = 256 TRAIN_SPLIT = 0.9 VOCAB_SIZE = 32000 # Model — ~30M params EMBED_DIM = 512 NUM_HEADS = 8 NUM_LAYERS = 8 FF_DIM = 2048 DROPOUT = 0.05 # Low dropout — we want to memorize math BATCH_SIZE = 16 GRAD_ACCUM_STEPS = 4 # Effective batch = 64 LEARNING_RATE = 5e-4 # Aggressive LR for fast convergence WEIGHT_DECAY = 0.01 WARMUP_STEPS = 200 MAX_EPOCHS = 30 PATIENCE = 10 MIXED_PRECISION = False # float32 — stable for 30M model MAX_GRAD_NORM = 1.0 # Checkpointing SAVE_EVERY_N_EPOCHS = 1 # Save checkpoint every N epochs KEEP_TOP_N_CHECKPOINTS = 3