HawkGPT-v0.1 / config.py
HawkLabofficial's picture
Upload config.py with huggingface_hub
d832056 verified
Raw
History Blame Contribute Delete
742 Bytes
"""GPT-2 style model configuration for RTX 4070 (12GB VRAM)."""
import os
PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(PROJECT_DIR, "data")
CHECKPOINT_DIR = os.path.join(PROJECT_DIR, "checkpoints")
LOG_DIR = os.path.join(PROJECT_DIR, "logs")
TOKENIZER_PATH = os.path.join(DATA_DIR, "tokenizer.json")
DATA_TEXT_PATH = os.path.join(DATA_DIR, "russian_dialogues.txt")
MAX_SEQ_LEN = 64
TRAIN_SPLIT = 0.9
VOCAB_SIZE = 16000
# Model
EMBED_DIM = 256
NUM_HEADS = 4
NUM_LAYERS = 6
FF_DIM = 1024
DROPOUT = 0.15
BATCH_SIZE = 32
LEARNING_RATE = 5e-4
WEIGHT_DECAY = 0.01
WARMUP_STEPS = 50
MAX_EPOCHS = 100
PATIENCE = 20
MIXED_PRECISION = False # float32 — more stable for small models
MAX_GRAD_NORM = 1.0