File size: 1,232 Bytes
3dac39e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env bash
# Evaluate Arcspan checkpoint on CyNER test set.
# Usage: bash scripts/eval_cyner.sh [CHECKPOINT] [DEVICE]
set -euo pipefail

ROOT="$(cd "$(dirname "$0")/.." && pwd)"
CHECKPOINT="${1:-$ROOT/checkpoints/cyner_v1}"
DEVICE="${2:-cuda}"

echo "=== Arcspan CyNER Evaluation ==="
echo "  Checkpoint: $CHECKPOINT"
echo "  Test data:  $ROOT/data/processed/cyner_test.jsonl"
echo ""

RESULTS_DIR="$ROOT/results"
mkdir -p "$RESULTS_DIR"

# 1. Viterbi decode (our primary mode)
echo "--- Viterbi Decode ---"
opf eval "$ROOT/data/processed/cyner_test.jsonl" \
  --checkpoint "$CHECKPOINT" \
  --device "$DEVICE" \
  --decode-mode viterbi \
  --per-class --label-counts \
  --predictions-out "$RESULTS_DIR/cyner_test_preds_viterbi.jsonl" \
  --metrics-out "$RESULTS_DIR/cyner_test_metrics_viterbi.json" \
  --timings-out "$RESULTS_DIR/cyner_test_timings_viterbi.json"

echo ""

# 2. Argmax decode (ablation comparison)
echo "--- Argmax Decode ---"
opf eval "$ROOT/data/processed/cyner_test.jsonl" \
  --checkpoint "$CHECKPOINT" \
  --device "$DEVICE" \
  --decode-mode argmax \
  --per-class --label-counts \
  --metrics-out "$RESULTS_DIR/cyner_test_metrics_argmax.json"

echo ""
echo "=== Done. Results in $RESULTS_DIR ==="