File size: 1,232 Bytes
3dac39e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | #!/usr/bin/env bash
# Evaluate Arcspan checkpoint on CyNER test set.
# Usage: bash scripts/eval_cyner.sh [CHECKPOINT] [DEVICE]
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
CHECKPOINT="${1:-$ROOT/checkpoints/cyner_v1}"
DEVICE="${2:-cuda}"
echo "=== Arcspan CyNER Evaluation ==="
echo " Checkpoint: $CHECKPOINT"
echo " Test data: $ROOT/data/processed/cyner_test.jsonl"
echo ""
RESULTS_DIR="$ROOT/results"
mkdir -p "$RESULTS_DIR"
# 1. Viterbi decode (our primary mode)
echo "--- Viterbi Decode ---"
opf eval "$ROOT/data/processed/cyner_test.jsonl" \
--checkpoint "$CHECKPOINT" \
--device "$DEVICE" \
--decode-mode viterbi \
--per-class --label-counts \
--predictions-out "$RESULTS_DIR/cyner_test_preds_viterbi.jsonl" \
--metrics-out "$RESULTS_DIR/cyner_test_metrics_viterbi.json" \
--timings-out "$RESULTS_DIR/cyner_test_timings_viterbi.json"
echo ""
# 2. Argmax decode (ablation comparison)
echo "--- Argmax Decode ---"
opf eval "$ROOT/data/processed/cyner_test.jsonl" \
--checkpoint "$CHECKPOINT" \
--device "$DEVICE" \
--decode-mode argmax \
--per-class --label-counts \
--metrics-out "$RESULTS_DIR/cyner_test_metrics_argmax.json"
echo ""
echo "=== Done. Results in $RESULTS_DIR ==="
|