VibeToken / setup.sh
APGASU's picture
scripts
7bef20f verified
#!/bin/bash
# Data preparation script for VibeToken training.
# Set DATA_DIR to control where datasets are stored (defaults to ./data).
#
# Usage:
# export DATA_DIR=/mnt/fastssd/datasets # optional, defaults to ./data
# bash setup.sh
DATA_DIR="${DATA_DIR:-./data}"
echo "Using DATA_DIR=${DATA_DIR}"
# Download ImageNet-1k via HuggingFace
export HF_HUB_ENABLE_HF_TRANSFER=1
huggingface-cli download ILSVRC/imagenet-1k --repo-type dataset --local-dir "${DATA_DIR}/imagenet-1k"
# Convert to WebDataset format
python data/convert_imagenet_to_wds.py \
--input_dir "${DATA_DIR}/imagenet-1k" \
--output_dir "${DATA_DIR}/imagenet_wds"