"""HawkGPT 0.4 — Config: optimized for speed.""" import os PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(PROJECT_DIR, "data") CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints") LOG_DIR = os.path.join(PROJECT_DIR, "logs") TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json") DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt") # Pre-tokenized files DATA_INPUTS_PATH = os.path.join(DATA_DIR, "inputs.npy") DATA_TARGETS_PATH = os.path.join(DATA_DIR, "targets.npy") MAX_SEQ_LEN = 256 VOCAB_SIZE = 32000 # Model — 27M params, optimized arch EMBED_DIM = 512 NUM_HEADS = 8 NUM_KV_HEADS = 2 # GQA: 8 query heads, 2 KV heads NUM_LAYERS = 8 FF_DIM = 2048 DROPOUT = 0.0 # No dropout — faster, better memorization BATCH_SIZE = 32 # GQA freed VRAM, can double batch LEARNING_RATE = 3e-4 # Conservative LR — parallel block + 8 layers WEIGHT_DECAY = 0.01 WARMUP_STEPS = 500 MAX_EPOCHS = 30 PATIENCE = 10 MIXED_PRECISION = False # float32 — стабильнее, нет NaN MAX_GRAD_NORM = 1.0