File size: 645 Bytes
7bef20f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | #!/bin/bash
# Data preparation script for VibeToken training.
# Set DATA_DIR to control where datasets are stored (defaults to ./data).
#
# Usage:
# export DATA_DIR=/mnt/fastssd/datasets # optional, defaults to ./data
# bash setup.sh
DATA_DIR="${DATA_DIR:-./data}"
echo "Using DATA_DIR=${DATA_DIR}"
# Download ImageNet-1k via HuggingFace
export HF_HUB_ENABLE_HF_TRANSFER=1
huggingface-cli download ILSVRC/imagenet-1k --repo-type dataset --local-dir "${DATA_DIR}/imagenet-1k"
# Convert to WebDataset format
python data/convert_imagenet_to_wds.py \
--input_dir "${DATA_DIR}/imagenet-1k" \
--output_dir "${DATA_DIR}/imagenet_wds"
|