|
|
#!/usr/bin/env bash |
|
|
|
|
|
|
|
|
|
|
|
export CUDA_VISIBLE_DEVICES="0" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CHECKPOINT_DIR="./checkpoints/MATE_custom" |
|
|
TEST_DATA="./finetune_dataset/custom/test" |
|
|
|
|
|
best_stats_values=(0 0 0 0 0 0 "None") |
|
|
declare -r COR=0 LABEL=1 PRED=2 ACC=3 REC=4 F1=5 MODEL=6 |
|
|
|
|
|
for model in "${CHECKPOINT_DIR}"/*.pt; do |
|
|
[ -f "$model" ] || continue |
|
|
|
|
|
output=$(python eval_tools.py \ |
|
|
--MATE_model "${model}" \ |
|
|
--test_ds "${TEST_DATA}" \ |
|
|
--task MATE \ |
|
|
--gcn_layers 4 \ |
|
|
--device cuda:0 2>&1) |
|
|
|
|
|
correct=$(echo "$output" | grep -o 'Correct:[0-9]*' | cut -d':' -f2) |
|
|
label=$(echo "$output" | grep -o 'Label:[0-9]*' | cut -d':' -f2) |
|
|
prediction=$(echo "$output" | grep -o 'Prediction:[0-9]*' | cut -d':' -f2) |
|
|
accuracy=$(echo "$output" | grep -o 'Accuracy:[0-9.]*' | cut -d':' -f2) |
|
|
recall=$(echo "$output" | grep -o 'Recall:[0-9.]*' | cut -d':' -f2) |
|
|
f1=$(echo "$output" | grep -o 'F1:[0-9.]*' | cut -d':' -f2) |
|
|
|
|
|
echo -e "\nModel: $(basename "$model")" |
|
|
echo "Correct : ${correct:-N/A}" |
|
|
echo "Label : ${label:-N/A}" |
|
|
echo "Prediction : ${prediction:-N/A}" |
|
|
echo "Accuracy : ${accuracy:-N/A}" |
|
|
echo "Recall : ${recall:-N/A}" |
|
|
echo "F1 : ${f1:-N/A}" |
|
|
|
|
|
if [[ "${f1:-0}" =~ ^[0-9.]+$ ]]; then |
|
|
is_better=$(awk -v f1="$f1" -v best="${best_stats_values[$F1]}" 'BEGIN { print (f1 > best) ? 1 : 0 }') |
|
|
|
|
|
if [ "$is_better" -eq 1 ]; then |
|
|
best_stats_values[$COR]=${correct:-0} |
|
|
best_stats_values[$LABEL]=${label:-0} |
|
|
best_stats_values[$PRED]=${prediction:-0} |
|
|
best_stats_values[$ACC]=${accuracy:-0} |
|
|
best_stats_values[$REC]=${recall:-0} |
|
|
best_stats_values[$F1]=${f1:-0} |
|
|
best_stats_values[$MODEL]=$(basename "$model") |
|
|
fi |
|
|
fi |
|
|
done |
|
|
|
|
|
echo -e "\n========== MATE Best Results ==========" |
|
|
echo "Best Model: ${best_stats_values[$MODEL]}" |
|
|
echo "F1 : ${best_stats_values[$F1]}" |
|
|
echo "Accuracy: ${best_stats_values[$ACC]}" |
|
|
echo "Recall : ${best_stats_values[$REC]}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CHECKPOINT_DIR="./checkpoints/MASC_custom" |
|
|
TEST_DATA="./finetune_dataset/custom/test" |
|
|
|
|
|
masc_best_stats=(0 0 0 0 0 "None") |
|
|
MASC_COR=0; MASC_LABEL=1; MASC_PRED=2; MASC_ACC=3; MASC_F1=4; MASC_MODEL=5 |
|
|
|
|
|
for model in "${CHECKPOINT_DIR}"/*.pt; do |
|
|
[ -f "$model" ] || continue |
|
|
|
|
|
output=$(python eval_tools.py \ |
|
|
--MASC_model "${model}" \ |
|
|
--test_ds "${TEST_DATA}" \ |
|
|
--task MASC \ |
|
|
--gcn_layers 4 \ |
|
|
--device cuda:0 2>&1) |
|
|
|
|
|
correct=$(echo "$output" | grep -o 'Correct:[0-9]*' | cut -d':' -f2) |
|
|
label=$(echo "$output" | grep -o 'Label:[0-9]*' | cut -d':' -f2) |
|
|
prediction=$(echo "$output" | grep -o 'Prediction:[0-9]*' | cut -d':' -f2) |
|
|
accuracy=$(echo "$output" | grep -o 'Accuracy:[0-9.]*' | cut -d':' -f2) |
|
|
f1=$(echo "$output" | grep -o 'Macro_f1:[0-9.]*' | cut -d':' -f2) |
|
|
|
|
|
echo -e "\nModel: $(basename "$model")" |
|
|
echo "Correct : ${correct:-N/A}" |
|
|
echo "Label : ${label:-N/A}" |
|
|
echo "Prediction : ${prediction:-N/A}" |
|
|
echo "Accuracy : ${accuracy:-N/A}" |
|
|
echo "Macro_f1 : ${f1:-N/A}" |
|
|
|
|
|
if [[ "${f1:-0}" =~ ^[0-9.]+$ ]]; then |
|
|
is_better=$(awk -v f1="$f1" -v best="${masc_best_stats[$MASC_F1]}" 'BEGIN { print (f1 > best) ? 1 : 0 }') |
|
|
|
|
|
if [ "$is_better" -eq 1 ]; then |
|
|
masc_best_stats[$MASC_COR]=${correct:-0} |
|
|
masc_best_stats[$MASC_LABEL]=${label:-0} |
|
|
masc_best_stats[$MASC_PRED]=${prediction:-0} |
|
|
masc_best_stats[$MASC_ACC]=${accuracy:-0} |
|
|
masc_best_stats[$MASC_F1]=${f1:-0} |
|
|
masc_best_stats[$MASC_MODEL]=$(basename "$model") |
|
|
fi |
|
|
fi |
|
|
done |
|
|
|
|
|
echo -e "\n========== MASC Best Results ==========" |
|
|
echo "Best Model: ${masc_best_stats[$MASC_MODEL]}" |
|
|
echo "Macro F1: ${masc_best_stats[$MASC_F1]}" |
|
|
echo "Accuracy: ${masc_best_stats[$MASC_ACC]}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BEST_MATE=$(ls -1 ./checkpoints/MATE_custom/best_f1:*.pt 2>/dev/null | sort -t: -k2 -rn | head -1) |
|
|
|
|
|
BEST_MASC=$(ls -1 ./checkpoints/MASC_custom/best_f1:*.pt 2>/dev/null | sort -t: -k2 -rn | head -1) |
|
|
|
|
|
if [ -n "$BEST_MATE" ] && [ -n "$BEST_MASC" ]; then |
|
|
echo -e "\n========== MABSA Evaluation ==========" |
|
|
echo "Using MATE: $(basename "$BEST_MATE")" |
|
|
echo "Using MASC: $(basename "$BEST_MASC")" |
|
|
|
|
|
python eval_tools.py \ |
|
|
--MATE_model "$BEST_MATE" \ |
|
|
--MASC_model "$BEST_MASC" \ |
|
|
--test_ds ./finetune_dataset/custom/test \ |
|
|
--task MABSA \ |
|
|
--gcn_layers 4 \ |
|
|
--device cuda:0 |
|
|
else |
|
|
echo -e "\n========== MABSA Evaluation ==========" |
|
|
echo "Skipped: Need both MATE and MASC best models" |
|
|
[ -z "$BEST_MATE" ] && echo " - Missing MATE model in ./checkpoints/MATE_custom/" |
|
|
[ -z "$BEST_MASC" ] && echo " - Missing MASC model in ./checkpoints/MASC_custom/" |
|
|
fi |