HawkGPT-v0.5 / config.py
HawkLabofficial's picture
Upload config.py with huggingface_hub
f5fb5e4 verified
Raw
History Blame Contribute Delete
1.09 kB
"""HawkGPT 0.5 — Config: massive data, mixed precision, turbo training."""
import os
PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(PROJECT_DIR, "data")
CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints")
LOG_DIR = os.path.join(PROJECT_DIR, "logs")
TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json")
DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt")
DATA_INPUTS_PATH = os.path.join(DATA_DIR, "inputs.npy")
DATA_TARGETS_PATH = os.path.join(DATA_DIR, "targets.npy")
MAX_SEQ_LEN = 256
VOCAB_SIZE = 32000
# Model — proven arch from v0.4 (GQA + RMSNorm + ALiBi)
EMBED_DIM = 512
NUM_HEADS = 8
NUM_KV_HEADS = 2
NUM_LAYERS = 8
FF_DIM = 2048
DROPOUT = 0.0
# Training — mixed precision for 2x speed on RTX 4070 tensor cores
BATCH_SIZE = 96
LEARNING_RATE = 6e-4
WEIGHT_DECAY = 0.01
WARMUP_STEPS = 1000 # more data = longer warmup
MAX_EPOCHS = 30
PATIENCE = 10
MIXED_PRECISION = True # float16 — tensor cores go brrr
MAX_GRAD_NORM = 1.0
EMA_DECAY = 0.999 # Exponential Moving Average — free quality boost