#!/usr/bin/env bash # Evaluate Arcspan checkpoint on CyNER test set. # Usage: bash scripts/eval_cyner.sh [CHECKPOINT] [DEVICE] set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" CHECKPOINT="${1:-$ROOT/checkpoints/cyner_v1}" DEVICE="${2:-cuda}" echo "=== Arcspan CyNER Evaluation ===" echo " Checkpoint: $CHECKPOINT" echo " Test data: $ROOT/data/processed/cyner_test.jsonl" echo "" RESULTS_DIR="$ROOT/results" mkdir -p "$RESULTS_DIR" # 1. Viterbi decode (our primary mode) echo "--- Viterbi Decode ---" opf eval "$ROOT/data/processed/cyner_test.jsonl" \ --checkpoint "$CHECKPOINT" \ --device "$DEVICE" \ --decode-mode viterbi \ --per-class --label-counts \ --predictions-out "$RESULTS_DIR/cyner_test_preds_viterbi.jsonl" \ --metrics-out "$RESULTS_DIR/cyner_test_metrics_viterbi.json" \ --timings-out "$RESULTS_DIR/cyner_test_timings_viterbi.json" echo "" # 2. Argmax decode (ablation comparison) echo "--- Argmax Decode ---" opf eval "$ROOT/data/processed/cyner_test.jsonl" \ --checkpoint "$CHECKPOINT" \ --device "$DEVICE" \ --decode-mode argmax \ --per-class --label-counts \ --metrics-out "$RESULTS_DIR/cyner_test_metrics_argmax.json" echo "" echo "=== Done. Results in $RESULTS_DIR ==="