| # Evaluate Arcspan checkpoint on CyNER test set. | |
| # Usage: bash scripts/eval_cyner.sh [CHECKPOINT] [DEVICE] | |
| set -euo pipefail | |
| ROOT="$(cd "$(dirname "$0")/.." && pwd)" | |
| CHECKPOINT="${1:-$ROOT/checkpoints/cyner_v1}" | |
| DEVICE="${2:-cuda}" | |
| echo "=== Arcspan CyNER Evaluation ===" | |
| echo " Checkpoint: $CHECKPOINT" | |
| echo " Test data: $ROOT/data/processed/cyner_test.jsonl" | |
| echo "" | |
| RESULTS_DIR="$ROOT/results" | |
| mkdir -p "$RESULTS_DIR" | |
| # 1. Viterbi decode (our primary mode) | |
| echo "--- Viterbi Decode ---" | |
| opf eval "$ROOT/data/processed/cyner_test.jsonl" \ | |
| --checkpoint "$CHECKPOINT" \ | |
| --device "$DEVICE" \ | |
| --decode-mode viterbi \ | |
| --per-class --label-counts \ | |
| --predictions-out "$RESULTS_DIR/cyner_test_preds_viterbi.jsonl" \ | |
| --metrics-out "$RESULTS_DIR/cyner_test_metrics_viterbi.json" \ | |
| --timings-out "$RESULTS_DIR/cyner_test_timings_viterbi.json" | |
| echo "" | |
| # 2. Argmax decode (ablation comparison) | |
| echo "--- Argmax Decode ---" | |
| opf eval "$ROOT/data/processed/cyner_test.jsonl" \ | |
| --checkpoint "$CHECKPOINT" \ | |
| --device "$DEVICE" \ | |
| --decode-mode argmax \ | |
| --per-class --label-counts \ | |
| --metrics-out "$RESULTS_DIR/cyner_test_metrics_argmax.json" | |
| echo "" | |
| echo "=== Done. Results in $RESULTS_DIR ===" | |