"""HawkGPT 0.5 — Config: massive data, mixed precision, turbo training.""" import os PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(PROJECT_DIR, "data") CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints") LOG_DIR = os.path.join(PROJECT_DIR, "logs") TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json") DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt") DATA_INPUTS_PATH = os.path.join(DATA_DIR, "inputs.npy") DATA_TARGETS_PATH = os.path.join(DATA_DIR, "targets.npy") MAX_SEQ_LEN = 256 VOCAB_SIZE = 32000 # Model — proven arch from v0.4 (GQA + RMSNorm + ALiBi) EMBED_DIM = 512 NUM_HEADS = 8 NUM_KV_HEADS = 2 NUM_LAYERS = 8 FF_DIM = 2048 DROPOUT = 0.0 # Training — mixed precision for 2x speed on RTX 4070 tensor cores BATCH_SIZE = 96 LEARNING_RATE = 6e-4 WEIGHT_DECAY = 0.01 WARMUP_STEPS = 1000 # more data = longer warmup MAX_EPOCHS = 30 PATIENCE = 10 MIXED_PRECISION = True # float16 — tensor cores go brrr MAX_GRAD_NORM = 1.0 EMA_DECAY = 0.999 # Exponential Moving Average — free quality boost