| export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 | |
| torchrun --nproc-per-node 8 -m open_lm.main \ | |
| --model open_lm_1b \ | |
| --train-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/train_data.jsonl \ | |
| --val-data /home/ubuntu/model_sft/open_lm_wozai/open_lm_wozai/open_lm/val_data.jsonl \ | |
| --workers 1 \ | |
| --dataset-resampled \ | |
| --precision amp_bfloat16 \ | |
| --grad-checkpointing \ | |
| --log-every-n-steps 20 \ | |
| --grad-clip-norm 1 \ | |
| --data-key jsonl \ | |
| --val-data-key jsonl \ | |
| --dataset-type jsonl \ | |
| --lr 1e-5 \ | |
| --fsdp --fsdp-amp \ | |
| --warmup 400 \ | |
| --wd 0.1 \ | |
| --beta2 0.95 \ | |
| --epochs 5 \ | |
| --report-to tensorboard \ | |
| --name open_lm_alpaca \ | |