the-well-diffusion / download_data.sh
AlexWortega's picture
Upload download_data.sh with huggingface_hub
0e15c6f verified
#!/bin/bash
# Download The Well dataset locally for faster training.
# HF streaming works but is slow (~3-4s/batch); local data is ~10x faster.
#
# Usage:
# ./download_data.sh # downloads default dataset
# ./download_data.sh active_matter # specify dataset
# ./download_data.sh turbulent_radiative_layer_2D train # specific split
DATASET="${1:-turbulent_radiative_layer_2D}"
SPLIT="${2:-}" # empty = all splits
BASE_PATH="${WELL_DATA_PATH:-/home/alexw/data/the_well}"
echo "Downloading The Well: dataset=$DATASET, split=${SPLIT:-all}, path=$BASE_PATH"
if [ -n "$SPLIT" ]; then
the-well-download --base-path "$BASE_PATH" --dataset "$DATASET" --split "$SPLIT"
else
the-well-download --base-path "$BASE_PATH" --dataset "$DATASET"
fi
echo "Done. Use --no-streaming --local_path $BASE_PATH in training scripts."