| | #!/bin/bash |
| |
|
| | scripts=( |
| | |
| | |
| | |
| | "evaluate_musr_object_placements_raw_vs_finetuned.py" |
| | "evaluate_musr_murder_mystery_raw_vs_finetuned.py" |
| | "evaluate_musr_team_allocation_raw_vs_finetuned.py" |
| | "evaluate_medqa_raw_vs_finetuned.py" |
| | "evaluate_gsm8k_raw_vs_finetuned.py" |
| | "evaluate_aime_raw_vs_finetuned.py" |
| | "evaluate_aimo_raw_vs_finetuned.py" |
| | "evaluate_art_raw_vs_finetuned.py" |
| | "evaluate_copa_raw_vs_finetuned_guess_effect.py" |
| | "evaluate_goEmotion_raw_vs_finetuned.py" |
| | ) |
| |
|
| | OUTPUT_DIR="/home/moein_salimi/users/Parsa/AbductiveReasoning/SFT/Evaluation/14B" |
| |
|
| | ROOT_DIR="./SFT/Evaluation/14B" |
| |
|
| | BASE_RESULTS_DIR="/home/moein_salimi/users/Parsa/AbductiveReasoning/SFT/results_sft_14b" |
| |
|
| | |
| | RAW_MODEL_PATH="/home/moein_salimi/PLLMS/unsloth-Qwen2.5-14B-Instruct-bnb-4bit" |
| | |
| | RUN_NAME="SFT_dt12.11.19:13_e6_unsloth_Qwen2.5_14B_Instruct_bnb_4bit_bnb_4bit_lr5e-06_t0.0_r64_b4_SFT_Implementation" |
| |
|
| | TRAINING_DIR="$BASE_RESULTS_DIR/Training_${RUN_NAME}" |
| | FINAL_DIR="$BASE_RESULTS_DIR/${RUN_NAME}" |
| |
|
| | if [ -d "$TRAINING_DIR/checkpoint" ]; then |
| | CHECKPOINT_DIR="$TRAINING_DIR/checkpoint" |
| | TRAINING_BASE="$TRAINING_DIR" |
| | elif [ -d "$FINAL_DIR/checkpoint" ]; then |
| | CHECKPOINT_DIR="$FINAL_DIR/checkpoint" |
| | TRAINING_BASE="$FINAL_DIR" |
| | else |
| | echo "ERROR: Could not find checkpoint directory." |
| | echo "Tried:" |
| | echo " $TRAINING_DIR/checkpoint" |
| | echo " $FINAL_DIR/checkpoint" |
| | exit 1 |
| | fi |
| |
|
| | echo "Using checkpoint directory: $CHECKPOINT_DIR" |
| | echo |
| |
|
| | COMMON_ARGS="--cuda_device 0 --evaluate_checkpoints 1" |
| |
|
| | declare -A BATCH_SIZES=( |
| | ["evaluate_neulr_deductive_raw_vs_finetuned.py"]=8 |
| | ["evaluate_neulr_inductive_raw_vs_finetuned.py"]=8 |
| | ["evaluate_neulr_abductive_raw_vs_finetuned.py"]=8 |
| | ["evaluate_medqa_raw_vs_finetuned.py"]=16 |
| | ["evaluate_musr_murder_mystery_raw_vs_finetuned.py"]=8 |
| | ["evaluate_musr_object_placements_raw_vs_finetuned.py"]=2 |
| | ["evaluate_musr_team_allocation_raw_vs_finetuned.py"]=16 |
| | ["evaluate_gsm8k_raw_vs_finetuned.py"]=16 |
| | ["evaluate_aime_raw_vs_finetuned.py"]=8 |
| | ["evaluate_aimo_raw_vs_finetuned.py"]=8 |
| | ["evaluate_art_raw_vs_finetuned.py"]=64 |
| | ["evaluate_copa_raw_vs_finetuned_guess_effect.py"]=64 |
| | ["evaluate_goEmotion_raw_vs_finetuned.py"]=16 |
| | ) |
| |
|
| | export TRAINING_BASE |
| |
|
| | for ckpt_name in $(ls -1 "$CHECKPOINT_DIR" | grep '^checkpoint-' | sort -t- -k2,2n); do |
| | ckpt="$CHECKPOINT_DIR/$ckpt_name" |
| | [ -d "$ckpt" ] || continue |
| |
|
| | echo "=====================================" |
| | echo "Using checkpoint: $ckpt" |
| | echo "=====================================" |
| |
|
| | for script in "${scripts[@]}"; do |
| | batch_size="${BATCH_SIZES[$script]:-256}" |
| |
|
| | echo "Running $script with checkpoint $ckpt (batch_size=$batch_size) ..." |
| | python3 ./Evaluation/"$script" \ |
| | $COMMON_ARGS \ |
| | --batch_size "$batch_size" \ |
| | --checkpoint_path "$ckpt" \ |
| | --run "$RUN_NAME" \ |
| | --raw_path "$RAW_MODEL_PATH" \ |
| | --output_path "$OUTPUT_DIR" |
| |
|
| | echo "Finished $script" |
| | echo "-------------------------------------" |
| | done |
| | python3 ./Evaluation/create_table.py \ |
| | --root "$ROOT_DIR" \ |
| | --out_csv "./SFT/Evaluation//metrics_summary.xlsx" \ |
| | --run "$RUN_NAME" \ |
| | --base_model_name "qwen2.5-14B" \ |
| | --base_result_dir "$BASE_RESULTS_DIR" \ |
| | --train_data "UniADILR" |
| | done |
| |
|