#!/bin/bash # ───────────────────────────────────────────────────────── # evaluate.sh — Run evaluation on test set # Usage: bash scripts/evaluate.sh [RUN_ID] # # If RUN_ID is given, it's passed via --run_id and drives the # checkpoint path + results dir. Otherwise we fall back to the # last run recorded in checkpoints/run_id.txt. # ───────────────────────────────────────────────────────── set -e PROJ_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" cd "$PROJ_DIR" # ── Config ──────────────────────────────────────────────── MODEL_CONFIG="configs/model_config.yaml" TRAIN_CONFIG="configs/train_config.yaml" OUTPUT_DIR="results/" CHEXBERT_PATH="" # optional: path to chexbert.pth for ClinicalF1 # ── Resolve run_id ──────────────────────────────────────── RUN_ID="${1:-}" if [ -z "$RUN_ID" ] && [ -f "checkpoints/run_id.txt" ]; then RUN_ID="$(cat checkpoints/run_id.txt | tr -d '[:space:]')" fi if [ -z "$RUN_ID" ]; then echo "Could not resolve RUN_ID. Pass it as arg 1, or run training first." exit 1 fi CHECKPOINT="checkpoints/${RUN_ID}/stage2_instruct/stage2_final.pt" echo "==========================================" echo " CXR VLM Evaluation" echo " Run id : $RUN_ID" echo " Checkpoint: $CHECKPOINT" echo " Output : $OUTPUT_DIR/$RUN_ID" echo "==========================================" python -m evaluation.evaluate \ --model_config "$MODEL_CONFIG" \ --train_config "$TRAIN_CONFIG" \ --checkpoint "$CHECKPOINT" \ --run_id "$RUN_ID" \ --task all \ --split test \ --output_dir "$OUTPUT_DIR" \ --chexbert_path "$CHEXBERT_PATH" \ --batch_size 8 \ --max_new_tokens 300 echo "Evaluation complete! Results saved to $OUTPUT_DIR/$RUN_ID"