lumia-tiny / train_tiny.yaml
samcheng0's picture
Upload train_tiny.yaml with huggingface_hub
e17bdac verified
Raw
History Blame Contribute Delete
773 Bytes
# PCT-V3 Training Config — QLoRA + BPE tokenizer
# Script: scripts/model_tiny.py
data:
hf_repo: "AI-MO/NuminaMath-CoT"
hf_split: "train"
hf_num_eval: 50
max_seq_length: 2048
training:
output_dir: "outputs/tiny-qlora"
run_name: "pct-v3-qlora"
per_device_train_batch_size: 8
per_device_eval_batch_size: 8
gradient_accumulation_steps: 2
max_grad_norm: 1.0
num_train_epochs: 1
max_steps: 50000
learning_rate: 5.0e-4
lr_scheduler_type: "cosine"
warmup_ratio: 0.1
weight_decay: 0.1
use_cpu: false
compile: false
logging_steps: 5
save_steps: 500
seed: 42
hf_repo_id: "samcheng0/lumia-tiny"
qlora:
enabled: true
r: 8
alpha: 16
dropout: 0.0
cft:
enabled: true
resume_checkpoint: "checkpoint.pt"
reset_embeddings: true