#!/bin/bash # Download The Well dataset locally for faster training. # HF streaming works but is slow (~3-4s/batch); local data is ~10x faster. # # Usage: # ./download_data.sh # downloads default dataset # ./download_data.sh active_matter # specify dataset # ./download_data.sh turbulent_radiative_layer_2D train # specific split DATASET="${1:-turbulent_radiative_layer_2D}" SPLIT="${2:-}" # empty = all splits BASE_PATH="${WELL_DATA_PATH:-/home/alexw/data/the_well}" echo "Downloading The Well: dataset=$DATASET, split=${SPLIT:-all}, path=$BASE_PATH" if [ -n "$SPLIT" ]; then the-well-download --base-path "$BASE_PATH" --dataset "$DATASET" --split "$SPLIT" else the-well-download --base-path "$BASE_PATH" --dataset "$DATASET" fi echo "Done. Use --no-streaming --local_path $BASE_PATH in training scripts."