File size: 2,045 Bytes
91daf98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# set it to your data path
data_path=data/sl_data
# by default set it to CADFusion/exp
exp_path=exp/model_ckpt
# by default set it to CADFusion/data
vf_path=data/vf_data
train_data=$data_path/train.json
eval_data=$data_path/val.json
# This script requires your SL run named as xxxx0, because for each VF stage, the final digit increments
# to show the number of VF rounds finished.
# e.g. SL name: CAD-0
# base_name: CAD- (remove the last digit, the script autofills it)
# VF run 1: CAD-1 (automatically)
# VF run 2: CAD-2 (automatically)
# ...
base_name=model_name_you_trained_for_SL_with_last_digit_removed
run_name=${base_name}0
./scripts/generate_samples.sh $run_name test "--full --device-map auto"
./scripts/generate_samples.sh $run_name train "--sample-len 1000 --device-map auto"
./scripts/make_dpo_data.sh $run_name --score-only
./scripts/make_dpo_data.sh $run_name-train "--gpu 0"
for LOOP in 1 2 3 4 5
do
echo "Starting VF round $LOOP"
run_name=$base_name$LOOP
dpo_training_path=$vf_path/$base_name$((LOOP-1))-train.json
dpo_run_name=$base_name$LOOP-dpo
dpo_save_path=$exp_path/$dpo_run_name
sft_run_name=$base_name$LOOP
python src/train/dpo.py --run-name $dpo_run_name --pretrained-path $exp_path/$base_name$((LOOP-1)) --data-path $dpo_training_path --output-path $dpo_save_path
python src/train/llama_finetune.py --num-epochs 1 --run-name $sft_run_name --data-path $train_data --eval-data-path $eval_data --eval-freq 3000 --pretrained-path $dpo_save_path --expdir $exp_path
./scripts/generate_samples.sh $dpo_run_name test "--full --device-map auto"
./scripts/generate_samples.sh $run_name test "--full --device-map auto"
./scripts/generate_samples.sh $run_name train "--sample-len 1000 --device-map auto"
./scripts/make_dpo_data.sh $dpo_run_name --score-only
./scripts/make_dpo_data.sh $run_name "--score-only --gpu 0"
./scripts/make_dpo_data.sh $run_name-train "--gpu 0"
done
|