| # PCT-V3 Training Config — QLoRA + BPE tokenizer | |
| # Script: scripts/model_tiny.py | |
| data: | |
| hf_repo: "AI-MO/NuminaMath-CoT" | |
| hf_split: "train" | |
| hf_num_eval: 50 | |
| max_seq_length: 2048 | |
| training: | |
| output_dir: "outputs/tiny-qlora" | |
| run_name: "pct-v3-qlora" | |
| per_device_train_batch_size: 8 | |
| per_device_eval_batch_size: 8 | |
| gradient_accumulation_steps: 2 | |
| max_grad_norm: 1.0 | |
| num_train_epochs: 1 | |
| max_steps: 50000 | |
| learning_rate: 5.0e-4 | |
| lr_scheduler_type: "cosine" | |
| warmup_ratio: 0.1 | |
| weight_decay: 0.1 | |
| use_cpu: false | |
| compile: false | |
| logging_steps: 5 | |
| save_steps: 500 | |
| seed: 42 | |
| hf_repo_id: "samcheng0/lumia-tiny" | |
| qlora: | |
| enabled: true | |
| r: 8 | |
| alpha: 16 | |
| dropout: 0.0 | |
| cft: | |
| enabled: true | |
| resume_checkpoint: "checkpoint.pt" | |
| reset_embeddings: true | |