#!/bin/bash # ═══════════════════════════════════════════════════════════════ # RAE Training Launcher # ═══════════════════════════════════════════════════════════════ # # Two paths: # A) AutoTrain — simple, no-code, standard SFT on RAE data # B) Custom — RAE multi-objective loss, full control # # Usage: # ./scripts/run_training.sh autotrain # Path A # ./scripts/run_training.sh custom # Path B # ./scripts/run_training.sh # Defaults to custom # ═══════════════════════════════════════════════════════════════ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" cd "$PROJECT_DIR" MODE="${1:-custom}" echo "═══════════════════════════════════════════════════════" echo " RAE TRAINING METHODOLOGY" echo " 'The hand is slow so the mind can be fast later.'" echo " Mode: $MODE" echo "═══════════════════════════════════════════════════════" # Validate data exists if [ ! -f "data/rae_training_data/train.jsonl" ]; then echo "⚠ Training data not found. Generating..." bash scripts/generate_dataset.sh fi # Check GPU python -c "import torch; print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU only\"}')" 2>/dev/null || echo "GPU: Not detected" case "$MODE" in autotrain) echo "" echo "▶ Path A: AutoTrain (Standard SFT on RAE-structured data)" echo " The handwriting effect comes from DATA STRUCTURE, not custom loss." echo "" # Ensure HF credentials if [ -z "${HF_USERNAME:-}" ] || [ -z "${HF_TOKEN:-}" ]; then echo "Set HF_USERNAME and HF_TOKEN environment variables:" echo " export HF_USERNAME=your_username" echo " export HF_TOKEN=your_write_token" exit 1 fi pip install -q autotrain-advanced 2>/dev/null || true autotrain --config configs/autotrain_rae_sft.yaml ;; custom) echo "" echo "▶ Path B: Custom RAE Trainer (Multi-Objective Loss)" echo " Phase-weighted CE + coherence + compression loss." echo "" pip install -q transformers accelerate peft bitsandbytes trl datasets wandb 2>/dev/null || true python src/train_rae.py configs/rae_training_config.json ;; *) echo "Unknown mode: $MODE" echo "Usage: ./run_training.sh [autotrain|custom]" exit 1 ;; esac echo "" echo "═══════════════════════════════════════════════════════" echo " TRAINING COMPLETE" echo " Run evaluation: python evaluation/eval_rae_model.py" echo "═══════════════════════════════════════════════════════"