File size: 871 Bytes
0e15c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#!/bin/bash
# Download The Well dataset locally for faster training.
# HF streaming works but is slow (~3-4s/batch); local data is ~10x faster.
#
# Usage:
#   ./download_data.sh                              # downloads default dataset
#   ./download_data.sh active_matter                # specify dataset
#   ./download_data.sh turbulent_radiative_layer_2D train  # specific split

DATASET="${1:-turbulent_radiative_layer_2D}"
SPLIT="${2:-}"  # empty = all splits
BASE_PATH="${WELL_DATA_PATH:-/home/alexw/data/the_well}"

echo "Downloading The Well: dataset=$DATASET, split=${SPLIT:-all}, path=$BASE_PATH"

if [ -n "$SPLIT" ]; then
    the-well-download --base-path "$BASE_PATH" --dataset "$DATASET" --split "$SPLIT"
else
    the-well-download --base-path "$BASE_PATH" --dataset "$DATASET"
fi

echo "Done. Use --no-streaming --local_path $BASE_PATH in training scripts."