"""HawkGPT 0.2 — Model config. Larger model for math + dialogue.""" import os PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(PROJECT_DIR, "data") CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints") LOG_DIR = os.path.join(PROJECT_DIR, "logs") TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json") DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt") MAX_SEQ_LEN = 128 TRAIN_SPLIT = 0.9 VOCAB_SIZE = 32000 # Model — 12M params EMBED_DIM = 512 NUM_HEADS = 8 NUM_LAYERS = 8 FF_DIM = 2048 DROPOUT = 0.1 BATCH_SIZE = 16 LEARNING_RATE = 3e-4 WEIGHT_DECAY = 0.01 WARMUP_STEPS = 200 MAX_EPOCHS = 50 PATIENCE = 15 MIXED_PRECISION = False MAX_GRAD_NORM = 1.0