| """HawkGPT 0.2 — Model config. Larger model for math + dialogue.""" | |
| import os | |
| PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| DATA_DIR = os.path.join(PROJECT_DIR, "data") | |
| CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints") | |
| LOG_DIR = os.path.join(PROJECT_DIR, "logs") | |
| TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json") | |
| DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt") | |
| MAX_SEQ_LEN = 128 | |
| TRAIN_SPLIT = 0.9 | |
| VOCAB_SIZE = 32000 | |
| # Model — 12M params | |
| EMBED_DIM = 512 | |
| NUM_HEADS = 8 | |
| NUM_LAYERS = 8 | |
| FF_DIM = 2048 | |
| DROPOUT = 0.1 | |
| BATCH_SIZE = 16 | |
| LEARNING_RATE = 3e-4 | |
| WEIGHT_DECAY = 0.01 | |
| WARMUP_STEPS = 200 | |
| MAX_EPOCHS = 50 | |
| PATIENCE = 15 | |
| MIXED_PRECISION = False | |
| MAX_GRAD_NORM = 1.0 | |