File size: 3,546 Bytes
f05d627 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | #!/bin/bash
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# RAE Training Launcher
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
#
# Two paths:
# A) AutoTrain β simple, no-code, standard SFT on RAE data
# B) Custom β RAE multi-objective loss, full control
#
# Usage:
# ./scripts/run_training.sh autotrain # Path A
# ./scripts/run_training.sh custom # Path B
# ./scripts/run_training.sh # Defaults to custom
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$PROJECT_DIR"
MODE="${1:-custom}"
echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
echo " RAE TRAINING METHODOLOGY"
echo " 'The hand is slow so the mind can be fast later.'"
echo " Mode: $MODE"
echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
# Validate data exists
if [ ! -f "data/rae_training_data/train.jsonl" ]; then
echo "β Training data not found. Generating..."
bash scripts/generate_dataset.sh
fi
# Check GPU
python -c "import torch; print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \"CPU only\"}')" 2>/dev/null || echo "GPU: Not detected"
case "$MODE" in
autotrain)
echo ""
echo "βΆ Path A: AutoTrain (Standard SFT on RAE-structured data)"
echo " The handwriting effect comes from DATA STRUCTURE, not custom loss."
echo ""
# Ensure HF credentials
if [ -z "${HF_USERNAME:-}" ] || [ -z "${HF_TOKEN:-}" ]; then
echo "Set HF_USERNAME and HF_TOKEN environment variables:"
echo " export HF_USERNAME=your_username"
echo " export HF_TOKEN=your_write_token"
exit 1
fi
pip install -q autotrain-advanced 2>/dev/null || true
autotrain --config configs/autotrain_rae_sft.yaml
;;
custom)
echo ""
echo "βΆ Path B: Custom RAE Trainer (Multi-Objective Loss)"
echo " Phase-weighted CE + coherence + compression loss."
echo ""
pip install -q transformers accelerate peft bitsandbytes trl datasets wandb 2>/dev/null || true
python src/train_rae.py configs/rae_training_config.json
;;
*)
echo "Unknown mode: $MODE"
echo "Usage: ./run_training.sh [autotrain|custom]"
exit 1
;;
esac
echo ""
echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
echo " TRAINING COMPLETE"
echo " Run evaluation: python evaluation/eval_rae_model.py"
echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|