#!/bin/bash # # Train NFQA Model with Pre-Split Datasets # # This script trains the NFQA classification model using manually split # train/validation/test datasets for balanced training. # # Usage: # bash run_training_manual.sh # # Or with custom parameters: # bash run_training_manual.sh --epochs 15 --batch-size 32 # set -e # Exit on error # Default paths TRAIN_FILE="../output/train_balanced.jsonl" VAL_FILE="../output/val_balanced.jsonl" TEST_FILE="../output/test_balanced.jsonl" OUTPUT_DIR="../output/training/nfqa_model_balanced" # Default training parameters MODEL_NAME="xlm-roberta-base" EPOCHS=6 BATCH_SIZE=16 LEARNING_RATE=2e-5 MAX_LENGTH=128 WARMUP_RATIO=0.1 WEIGHT_DECAY=0.1 DROPOUT=0.2 echo "================================================================================" echo "NFQA Model Training - Manual Split Mode" echo "================================================================================" echo "" echo "Training Configuration:" echo " Train file: $TRAIN_FILE" echo " Validation file: $VAL_FILE" echo " Test file: $TEST_FILE" echo " Output directory: $OUTPUT_DIR" echo " Model: $MODEL_NAME" echo " Epochs: $EPOCHS" echo " Batch size: $BATCH_SIZE" echo " Learning rate: $LEARNING_RATE" echo " Max length: $MAX_LENGTH" echo " Warmup ratio: $WARMUP_RATIO" echo " Weight decay: $WEIGHT_DECAY" echo " Dropout: $DROPOUT" echo "" echo "================================================================================" echo "" # Check if required files exist if [ ! -f "$TRAIN_FILE" ]; then echo "❌ Error: Training file not found: $TRAIN_FILE" echo "" echo "Please run the data splitting script first:" echo " cd ../cleaning" echo " python split_train_test_val.py --input ../output/webfaq_nfqa_combined_highquality.jsonl" exit 1 fi if [ ! -f "$VAL_FILE" ]; then echo "❌ Error: Validation file not found: $VAL_FILE" exit 1 fi if [ ! -f "$TEST_FILE" ]; then echo "❌ Error: Test file not found: $TEST_FILE" exit 1 fi # Create output directory mkdir -p "$OUTPUT_DIR" # Run training python train_nfqa_model.py \ --train "$TRAIN_FILE" \ --val "$VAL_FILE" \ --test "$TEST_FILE" \ --output-dir "$OUTPUT_DIR" \ --model-name "$MODEL_NAME" \ --epochs "$EPOCHS" \ --batch-size "$BATCH_SIZE" \ --learning-rate "$LEARNING_RATE" \ --max-length "$MAX_LENGTH" \ --warmup-ratio "$WARMUP_RATIO" \ --weight-decay "$WEIGHT_DECAY" \ --dropout "$DROPOUT" \ "$@" # Pass any additional arguments from command line # Check if training was successful if [ $? -eq 0 ]; then echo "" echo "================================================================================" echo "✅ Training completed successfully!" echo "================================================================================" echo "" echo "Model saved to: $OUTPUT_DIR" echo "" echo "Generated files:" echo " - best_model/ (best checkpoint based on validation F1)" echo " - final_model/ (final epoch checkpoint)" echo " - training_history.json (training metrics)" echo " - training_curves.png (loss/accuracy/F1 plots)" echo " - test_results.json (final test metrics)" echo " - classification_report.txt (per-category performance)" echo " - confusion_matrix.png (confusion matrix visualization)" echo "" echo "Next steps:" echo " 1. Review training curves: $OUTPUT_DIR/training_curves.png" echo " 2. Check test results: $OUTPUT_DIR/test_results.json" echo " 3. Analyze confusion matrix: $OUTPUT_DIR/confusion_matrix.png" echo " 4. Deploy model from: $OUTPUT_DIR/best_model/" echo "" else echo "" echo "================================================================================" echo "❌ Training failed!" echo "================================================================================" echo "" echo "Please check the error messages above and try again." exit 1 fi