#!/bin/bash # Round 6: Training with LR schedule, focal loss, LLRD + CLEANED DATA # 6a = 5-class with cosine LR + focal loss + LLRD on cleaned training data # Data fixes: +1590 IOC spans, -3844 ExploitDB titles, -64 bare extension labels # GPU: RTX PRO 6000 96GB set -euo pipefail cd ~/alkyline export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True PATIENCE=3 DATA=data/processed LABELS=data/label_spaces echo "===== Round 6a: CLEANED 5-class (cosine + focal + LLRD) =====" opf train "$DATA/enriched_5class_train_cleaned.jsonl" \ --validation-dataset "$DATA/enriched_5class_valid_cleaned.jsonl" \ --label-space-json "$LABELS/cyner_5class.json" \ --output-dir checkpoints/r6a_enriched_5class \ --overwrite-output \ --epochs 15 --batch-size 4 --grad-accum-steps 2 \ --learning-rate 5e-5 \ --warmup-fraction 0.1 --lr-schedule cosine \ --loss-fn focal --focal-gamma 2.0 \ --llrd-factor 0.9 \ --device cuda 2>&1 | tee train_r6a.log & TRAIN_PID=$! bash scripts/early_stop_monitor.sh train_r6a.log $PATIENCE $TRAIN_PID & MONITOR_PID=$! wait $TRAIN_PID 2>/dev/null || true kill $MONITOR_PID 2>/dev/null || true # Find best epoch checkpoint BEST_EPOCH=$(grep '^epoch' train_r6a.log | awk -F'[ :/=]' '{for(i=1;i<=NF;i++){if($i=="val_loss")print $(i+1)" "$2}}' | sort -n | head -1 | awk '{print $2}') echo "Best epoch: $BEST_EPOCH" CKPT=checkpoints/r6a_enriched_5class if [ ! -f "$CKPT/model.safetensors" ] && [ -n "$BEST_EPOCH" ]; then echo "Training killed early — using epoch $BEST_EPOCH checkpoint" CKPT="checkpoints/r6a_enriched_5class/epoch_${BEST_EPOCH}" fi echo "===== Eval R6a on enriched test =====" opf eval "$DATA/enriched_5class_test.jsonl" \ --checkpoint "$CKPT" \ --decode-mode viterbi --per-class --label-counts \ --device cuda 2>&1 | tee eval_r6a_enriched.log echo "===== Eval R6a on CyNER test =====" opf eval "$DATA/cyner_test.jsonl" \ --checkpoint "$CKPT" \ --decode-mode viterbi --per-class --label-counts \ --device cuda 2>&1 | tee eval_r6a_cyner.log echo "===== Eval R6a on SecureBERT2 test =====" opf eval "$DATA/securebert2_test.jsonl" \ --checkpoint "$CKPT" \ --decode-mode viterbi --per-class --label-counts \ --device cuda 2>&1 | tee eval_r6a_sb2.log echo "===== Viterbi Grid Search =====" python3 scripts/viterbi_grid_search.py \ --checkpoint "$CKPT" \ --eval-data "$DATA/enriched_5class_valid_cleaned.jsonl" \ --device cuda 2>&1 | tee viterbi_r6a.log echo "===== ROUND 6 DONE ====="