File size: 2,466 Bytes

3dac39e

#!/bin/bash
# Round 6: Training with LR schedule, focal loss, LLRD + CLEANED DATA
# 6a = 5-class with cosine LR + focal loss + LLRD on cleaned training data
# Data fixes: +1590 IOC spans, -3844 ExploitDB titles, -64 bare extension labels
# GPU: RTX PRO 6000 96GB
set -euo pipefail
cd ~/alkyline

export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
PATIENCE=3

DATA=data/processed
LABELS=data/label_spaces

echo "===== Round 6a: CLEANED 5-class (cosine + focal + LLRD) ====="
opf train "$DATA/enriched_5class_train_cleaned.jsonl" \
  --validation-dataset "$DATA/enriched_5class_valid_cleaned.jsonl" \
  --label-space-json "$LABELS/cyner_5class.json" \
  --output-dir checkpoints/r6a_enriched_5class \
  --overwrite-output \
  --epochs 15 --batch-size 4 --grad-accum-steps 2 \
  --learning-rate 5e-5 \
  --warmup-fraction 0.1 --lr-schedule cosine \
  --loss-fn focal --focal-gamma 2.0 \
  --llrd-factor 0.9 \
  --device cuda 2>&1 | tee train_r6a.log &
TRAIN_PID=$!

bash scripts/early_stop_monitor.sh train_r6a.log $PATIENCE $TRAIN_PID &
MONITOR_PID=$!

wait $TRAIN_PID 2>/dev/null || true
kill $MONITOR_PID 2>/dev/null || true

# Find best epoch checkpoint
BEST_EPOCH=$(grep '^epoch' train_r6a.log | awk -F'[ :/=]' '{for(i=1;i<=NF;i++){if($i=="val_loss")print $(i+1)" "$2}}' | sort -n | head -1 | awk '{print $2}')
echo "Best epoch: $BEST_EPOCH"

CKPT=checkpoints/r6a_enriched_5class
if [ ! -f "$CKPT/model.safetensors" ] && [ -n "$BEST_EPOCH" ]; then
  echo "Training killed early — using epoch $BEST_EPOCH checkpoint"
  CKPT="checkpoints/r6a_enriched_5class/epoch_${BEST_EPOCH}"
fi

echo "===== Eval R6a on enriched test ====="
opf eval "$DATA/enriched_5class_test.jsonl" \
  --checkpoint "$CKPT" \
  --decode-mode viterbi --per-class --label-counts \
  --device cuda 2>&1 | tee eval_r6a_enriched.log

echo "===== Eval R6a on CyNER test ====="
opf eval "$DATA/cyner_test.jsonl" \
  --checkpoint "$CKPT" \
  --decode-mode viterbi --per-class --label-counts \
  --device cuda 2>&1 | tee eval_r6a_cyner.log

echo "===== Eval R6a on SecureBERT2 test ====="
opf eval "$DATA/securebert2_test.jsonl" \
  --checkpoint "$CKPT" \
  --decode-mode viterbi --per-class --label-counts \
  --device cuda 2>&1 | tee eval_r6a_sb2.log

echo "===== Viterbi Grid Search ====="
python3 scripts/viterbi_grid_search.py \
  --checkpoint "$CKPT" \
  --eval-data "$DATA/enriched_5class_valid_cleaned.jsonl" \
  --device cuda 2>&1 | tee viterbi_r6a.log

echo "===== ROUND 6 DONE ====="