HawkGPT-v0.2 / config.py
HawkLabofficial's picture
Upload config.py with huggingface_hub
94aec8f verified
Raw
History Blame Contribute Delete
712 Bytes
"""HawkGPT 0.2 — Model config. Larger model for math + dialogue."""
import os
PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(PROJECT_DIR, "data")
CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints")
LOG_DIR = os.path.join(PROJECT_DIR, "logs")
TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json")
DATA_TEXT_PATH = os.path.join(DATA_DIR, "training_corpus.txt")
MAX_SEQ_LEN = 128
TRAIN_SPLIT = 0.9
VOCAB_SIZE = 32000
# Model — 12M params
EMBED_DIM = 512
NUM_HEADS = 8
NUM_LAYERS = 8
FF_DIM = 2048
DROPOUT = 0.1
BATCH_SIZE = 16
LEARNING_RATE = 3e-4
WEIGHT_DECAY = 0.01
WARMUP_STEPS = 200
MAX_EPOCHS = 50
PATIENCE = 15
MIXED_PRECISION = False
MAX_GRAD_NORM = 1.0