File size: 2,161 Bytes

28b13fc

#!/bin/bash
# ─────────────────────────────────────────────────────────
# evaluate.sh — Run evaluation on test set
# Usage: bash scripts/evaluate.sh [RUN_ID]
#
# If RUN_ID is given, it's passed via --run_id and drives the
# checkpoint path + results dir. Otherwise we fall back to the
# last run recorded in checkpoints/run_id.txt.
# ─────────────────────────────────────────────────────────

set -e

PROJ_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$PROJ_DIR"

# ── Config ────────────────────────────────────────────────
MODEL_CONFIG="configs/model_config.yaml"
TRAIN_CONFIG="configs/train_config.yaml"
OUTPUT_DIR="results/"
CHEXBERT_PATH=""   # optional: path to chexbert.pth for ClinicalF1

# ── Resolve run_id ────────────────────────────────────────
RUN_ID="${1:-}"
if [ -z "$RUN_ID" ] && [ -f "checkpoints/run_id.txt" ]; then
    RUN_ID="$(cat checkpoints/run_id.txt | tr -d '[:space:]')"
fi
if [ -z "$RUN_ID" ]; then
    echo "Could not resolve RUN_ID. Pass it as arg 1, or run training first."
    exit 1
fi

CHECKPOINT="checkpoints/${RUN_ID}/stage2_instruct/stage2_final.pt"

echo "=========================================="
echo " CXR VLM Evaluation"
echo " Run id   : $RUN_ID"
echo " Checkpoint: $CHECKPOINT"
echo " Output    : $OUTPUT_DIR/$RUN_ID"
echo "=========================================="

python -m evaluation.evaluate \
    --model_config  "$MODEL_CONFIG" \
    --train_config  "$TRAIN_CONFIG" \
    --checkpoint    "$CHECKPOINT" \
    --run_id        "$RUN_ID" \
    --task          all \
    --split         test \
    --output_dir    "$OUTPUT_DIR" \
    --chexbert_path "$CHEXBERT_PATH" \
    --batch_size    8 \
    --max_new_tokens 300

echo "Evaluation complete! Results saved to $OUTPUT_DIR/$RUN_ID"