File size: 3,754 Bytes
e6fad38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | #!/bin/bash
scripts=(
# "evaluate_neulr_deductive_raw_vs_finetuned.py"
# "evaluate_neulr_inductive_raw_vs_finetuned.py"
# "evaluate_neulr_abductive_raw_vs_finetuned.py"
"evaluate_musr_object_placements_raw_vs_finetuned.py"
"evaluate_musr_murder_mystery_raw_vs_finetuned.py"
"evaluate_musr_team_allocation_raw_vs_finetuned.py"
"evaluate_medqa_raw_vs_finetuned.py"
"evaluate_gsm8k_raw_vs_finetuned.py"
"evaluate_aime_raw_vs_finetuned.py"
"evaluate_aimo_raw_vs_finetuned.py"
"evaluate_art_raw_vs_finetuned.py"
"evaluate_copa_raw_vs_finetuned_guess_effect.py"
"evaluate_goEmotion_raw_vs_finetuned.py"
)
OUTPUT_DIR="/home/moein_salimi/users/Parsa/AbductiveReasoning/SFT/Evaluation/14B"
ROOT_DIR="./SFT/Evaluation/14B"
BASE_RESULTS_DIR="/home/moein_salimi/users/Parsa/AbductiveReasoning/SFT/results_sft_14b"
# RAW_MODEL_PATH="/home/moein_salimi/PLLMS/unsloth-Qwen2.5-3B-Instruct-unsloth-bnb-4bit"
RAW_MODEL_PATH="/home/moein_salimi/PLLMS/unsloth-Qwen2.5-14B-Instruct-bnb-4bit"
# RUN_NAME="dt11.18.17:40_e20_unsloth_Qwen2.5_3B_Instruct_unsloth_bnb_4bit_bnb_4bit_lr1e-05_t0.7_ε0.2_r64_b16"
RUN_NAME="SFT_dt12.11.19:13_e6_unsloth_Qwen2.5_14B_Instruct_bnb_4bit_bnb_4bit_lr5e-06_t0.0_r64_b4_SFT_Implementation"
TRAINING_DIR="$BASE_RESULTS_DIR/Training_${RUN_NAME}"
FINAL_DIR="$BASE_RESULTS_DIR/${RUN_NAME}"
if [ -d "$TRAINING_DIR/checkpoint" ]; then
CHECKPOINT_DIR="$TRAINING_DIR/checkpoint"
TRAINING_BASE="$TRAINING_DIR"
elif [ -d "$FINAL_DIR/checkpoint" ]; then
CHECKPOINT_DIR="$FINAL_DIR/checkpoint"
TRAINING_BASE="$FINAL_DIR"
else
echo "ERROR: Could not find checkpoint directory."
echo "Tried:"
echo " $TRAINING_DIR/checkpoint"
echo " $FINAL_DIR/checkpoint"
exit 1
fi
echo "Using checkpoint directory: $CHECKPOINT_DIR"
echo
COMMON_ARGS="--cuda_device 0 --evaluate_checkpoints 1"
declare -A BATCH_SIZES=(
["evaluate_neulr_deductive_raw_vs_finetuned.py"]=8
["evaluate_neulr_inductive_raw_vs_finetuned.py"]=8
["evaluate_neulr_abductive_raw_vs_finetuned.py"]=8
["evaluate_medqa_raw_vs_finetuned.py"]=16
["evaluate_musr_murder_mystery_raw_vs_finetuned.py"]=8
["evaluate_musr_object_placements_raw_vs_finetuned.py"]=2 # Note that each batch has 4 questions!
["evaluate_musr_team_allocation_raw_vs_finetuned.py"]=16
["evaluate_gsm8k_raw_vs_finetuned.py"]=16
["evaluate_aime_raw_vs_finetuned.py"]=8
["evaluate_aimo_raw_vs_finetuned.py"]=8
["evaluate_art_raw_vs_finetuned.py"]=64
["evaluate_copa_raw_vs_finetuned_guess_effect.py"]=64
["evaluate_goEmotion_raw_vs_finetuned.py"]=16
)
export TRAINING_BASE
for ckpt_name in $(ls -1 "$CHECKPOINT_DIR" | grep '^checkpoint-' | sort -t- -k2,2n); do
ckpt="$CHECKPOINT_DIR/$ckpt_name"
[ -d "$ckpt" ] || continue
echo "====================================="
echo "Using checkpoint: $ckpt"
echo "====================================="
for script in "${scripts[@]}"; do
batch_size="${BATCH_SIZES[$script]:-256}"
echo "Running $script with checkpoint $ckpt (batch_size=$batch_size) ..."
python3 ./Evaluation/"$script" \
$COMMON_ARGS \
--batch_size "$batch_size" \
--checkpoint_path "$ckpt" \
--run "$RUN_NAME" \
--raw_path "$RAW_MODEL_PATH" \
--output_path "$OUTPUT_DIR"
echo "Finished $script"
echo "-------------------------------------"
done
python3 ./Evaluation/create_table.py \
--root "$ROOT_DIR" \
--out_csv "./SFT/Evaluation//metrics_summary.xlsx" \
--run "$RUN_NAME" \
--base_model_name "qwen2.5-14B" \
--base_result_dir "$BASE_RESULTS_DIR" \
--train_data "UniADILR"
done
|