source ~/.zshrc conda activate llama-factory # echo "[$(date)] SFT Training Start" # CUDA_VISIBLE_DEVICES=0,1,2,3 FORCE_TORCHRUN=1 llamafactory-cli train qwen2.5_full_sft.yaml > logs/train_sft.log 2>&1 && \ # echo "[$(date)] SFT Training End" # echo "[$(date)] DPO Training Start" # CUDA_VISIBLE_DEVICES=0,1,2,3 FORCE_TORCHRUN=1 llamafactory-cli train qwen2.5_lora_dpo.yaml > logs/train_dpo.log 2>&1 && \ # echo "[$(date)] DPO Training End" # echo "[$(date)] Merging Checkpoints" # CUDA_VISIBLE_DEVICES=0 zsh run_merge_fix.sh /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/sft 600 800 > logs/merge.log 2>&1 && \ # echo "[$(date)] Merging Checkpoints End" # conda activate optima-vllm echo "[$(date)] Inference Start" CUDA_VISIBLE_DEVICES=0 zsh infer.sh 0 true 600 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-600 en-de > logs/infer_600_en-de.log 2>&1& CUDA_VISIBLE_DEVICES=1 zsh infer.sh 1 true 600 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-600 en-fr > logs/infer_600_en-fr.log 2>&1& CUDA_VISIBLE_DEVICES=2 zsh infer.sh 2 true 800 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-800 en-fr > logs/infer_800_en-fr.log 2>&1& CUDA_VISIBLE_DEVICES=3 zsh infer.sh 3 true 1000 /data/wyt/codes/DocDPO/sft/checkpoints_multilang/ted_base_balanced_en_zhdefr_320/dpo/merged/checkpoint-1000 en-fr > logs/infer_1000_en-fr.log 2>&1& echo "[$(date)] Inference End"