#!/bin/bash # Download all training data sources # Run: bash scripts/download_datasets.sh set -e mkdir -p data/raw/wi_locness data/raw/jfleg data/raw/gyafc data/raw/custom_dyslexia echo "=== Downloading JFLEG (JHU Fluency-Extended GUG) ===" if [ ! -d "data/raw/jfleg_repo" ]; then git clone https://github.com/keisks/jfleg.git data/raw/jfleg_repo cp data/raw/jfleg_repo/test/*.src data/raw/jfleg/ 2>/dev/null || true cp data/raw/jfleg_repo/test/*.ref* data/raw/jfleg/ 2>/dev/null || true echo " ✓ JFLEG downloaded" else echo " ✓ JFLEG already exists" fi echo "" echo "=== Manual Downloads Required ===" echo "" echo "W&I+LOCNESS (35k pairs, gold standard GEC):" echo " → Register at: https://www.cl.cam.ac.uk/research/nl/bea2019st/" echo " → Place files in: data/raw/wi_locness/" echo "" echo "GYAFC (105k pairs, formality transfer):" echo " → Request access at: https://github.com/raosudha89/GYAFC-corpus" echo " → Place files in: data/raw/gyafc/" echo "" echo "=== Dataset download complete ===" echo "Check manually downloaded datasets before proceeding."