| """HawkGPT 0.4 — Config: optimized for speed.""" | |
| import os | |
| PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| DATA_DIR = os.path.join(PROJECT_DIR, "data") | |
| CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints") | |
| LOG_DIR = os.path.join(PROJECT_DIR, "logs") | |
| TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json") | |
| DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt") | |
| # Pre-tokenized files | |
| DATA_INPUTS_PATH = os.path.join(DATA_DIR, "inputs.npy") | |
| DATA_TARGETS_PATH = os.path.join(DATA_DIR, "targets.npy") | |
| MAX_SEQ_LEN = 256 | |
| VOCAB_SIZE = 32000 | |
| # Model — 27M params, optimized arch | |
| EMBED_DIM = 512 | |
| NUM_HEADS = 8 | |
| NUM_KV_HEADS = 2 # GQA: 8 query heads, 2 KV heads | |
| NUM_LAYERS = 8 | |
| FF_DIM = 2048 | |
| DROPOUT = 0.0 # No dropout — faster, better memorization | |
| BATCH_SIZE = 32 # GQA freed VRAM, can double batch | |
| LEARNING_RATE = 3e-4 # Conservative LR — parallel block + 8 layers | |
| WEIGHT_DECAY = 0.01 | |
| WARMUP_STEPS = 500 | |
| MAX_EPOCHS = 30 | |
| PATIENCE = 10 | |
| MIXED_PRECISION = False # float32 — стабильнее, нет NaN | |
| MAX_GRAD_NORM = 1.0 | |