File size: 1,061 Bytes
7f974df | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | Start training (first run):
python train.py ^
--config 150M ^
--data_dir tokenizer/data ^
--batch_size 2 ^
--grad_accum 16 ^
--grad_checkpoint ^
--dtype bf16 ^
--max_steps 5000 ^
--run_dir runs/sllm_150m ^
--log_every 10 ^
--save_every 500 ^
--val_every 500 ^
--val_steps 20 ^
--warmup_steps 200
Resume from where you stopped:
python train.py --resume --data_dir tokenizer/data --batch_size 2 --grad_accum 16 --grad_checkpoint --dtype bf16 --extra_steps 5000 --run_dir runs/sllm_150m --log_every 10 --save_every 500 --val_every 500 --val_steps 20 --warmup_steps 200
Plot while training (in a second terminal):
conda activate pytorch
cd c:\geetesh\aimldl\projects\sllm
python plot_training.py --run_dir runs/sllm_150m --live --interval 30
python finetune/prepare_data.py
python finetune/sft_train.py --base_ckpt runs/sllm_150m/ckpt_0011500.pt --run_dir runs/sllm_150m_chat --max_steps 2500 --batch_size 4 --grad_accum 8 --grad_checkpoint
python finetune/chat.py --run_dir runs/sllm_150m_chat
|