| # Download The Well dataset locally for faster training. | |
| # HF streaming works but is slow (~3-4s/batch); local data is ~10x faster. | |
| # | |
| # Usage: | |
| # ./download_data.sh # downloads default dataset | |
| # ./download_data.sh active_matter # specify dataset | |
| # ./download_data.sh turbulent_radiative_layer_2D train # specific split | |
| DATASET="${1:-turbulent_radiative_layer_2D}" | |
| SPLIT="${2:-}" # empty = all splits | |
| BASE_PATH="${WELL_DATA_PATH:-/home/alexw/data/the_well}" | |
| echo "Downloading The Well: dataset=$DATASET, split=${SPLIT:-all}, path=$BASE_PATH" | |
| if [ -n "$SPLIT" ]; then | |
| the-well-download --base-path "$BASE_PATH" --dataset "$DATASET" --split "$SPLIT" | |
| else | |
| the-well-download --base-path "$BASE_PATH" --dataset "$DATASET" | |
| fi | |
| echo "Done. Use --no-streaming --local_path $BASE_PATH in training scripts." | |