File size: 1,101 Bytes
3df5819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/bin/bash
# Download all training data sources
# Run: bash scripts/download_datasets.sh

set -e

mkdir -p data/raw/wi_locness data/raw/jfleg data/raw/gyafc data/raw/custom_dyslexia

echo "=== Downloading JFLEG (JHU Fluency-Extended GUG) ==="
if [ ! -d "data/raw/jfleg_repo" ]; then
    git clone https://github.com/keisks/jfleg.git data/raw/jfleg_repo
    cp data/raw/jfleg_repo/test/*.src data/raw/jfleg/ 2>/dev/null || true
    cp data/raw/jfleg_repo/test/*.ref* data/raw/jfleg/ 2>/dev/null || true
    echo "  ✓ JFLEG downloaded"
else
    echo "  ✓ JFLEG already exists"
fi

echo ""
echo "=== Manual Downloads Required ==="
echo ""
echo "W&I+LOCNESS (35k pairs, gold standard GEC):"
echo "  → Register at: https://www.cl.cam.ac.uk/research/nl/bea2019st/"
echo "  → Place files in: data/raw/wi_locness/"
echo ""
echo "GYAFC (105k pairs, formality transfer):"
echo "  → Request access at: https://github.com/raosudha89/GYAFC-corpus"
echo "  → Place files in: data/raw/gyafc/"
echo ""
echo "=== Dataset download complete ==="
echo "Check manually downloaded datasets before proceeding."