MotionVLA / organize_data.sh
AlmightyFish's picture
model
16a250e
Raw
History Blame Contribute Delete
2.62 kB
#!/bin/bash
# organize_data.sh
# Moves large data files into the HuggingFace repo structure.
# Run this ONCE before uploading to HuggingFace.
#
# What this moves:
# images/ (7.8GB, 53K files) → data/images/
# motions_tokens/ (338MB, 41,971 .pt) → data/motions_tokens/
# motions_raw/ (4.8GB, 41,971 .pt) → data/motions_raw/
#
# Source paths (edit if needed):
SRC_IMAGES="/Users/bytedance/Downloads/MotionVLA/motionvla/data/vimogen_full/images"
SRC_TOKENS="/Users/bytedance/Downloads/MotionVLA/motionvla/data/vimogen_full/motions_dsfast_v4"
SRC_RAW="/Users/bytedance/Downloads/MotionVLA/motionvla/data/vimogen_full/in_the_wild_video"
# Destination (relative to this script's directory):
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
DST_IMAGES="$SCRIPT_DIR/data/images"
DST_TOKENS="$SCRIPT_DIR/data/motions_tokens"
DST_RAW="$SCRIPT_DIR/data/motions_raw"
set -e
echo "=================================================="
echo " MotionVLA Data Organizer"
echo "=================================================="
echo ""
echo "Source images : $SRC_IMAGES"
echo "Source tokens : $SRC_TOKENS"
echo "Source raw : $SRC_RAW"
echo ""
echo "Destination : $SCRIPT_DIR/data/"
echo ""
echo "Press ENTER to continue, Ctrl+C to cancel..."
read
# Step 1: Move images
if [ -d "$SRC_IMAGES" ]; then
echo "[1/3] Moving images (7.8GB) ..."
mkdir -p "$DST_IMAGES"
mv "$SRC_IMAGES"/* "$DST_IMAGES"/
echo " Done: $(ls "$DST_IMAGES" | wc -l) files"
else
echo "[1/3] SKIP: $SRC_IMAGES not found"
fi
# Step 2: Move motion tokens (v4, Qwen vocab space)
if [ -d "$SRC_TOKENS" ]; then
echo "[2/3] Moving motion tokens (338MB) ..."
mkdir -p "$DST_TOKENS"
mv "$SRC_TOKENS"/* "$DST_TOKENS"/
echo " Done: $(ls "$DST_TOKENS" | wc -l) files"
else
echo "[2/3] SKIP: $SRC_TOKENS not found"
fi
# Step 3: Move raw 276-dim motions
if [ -d "$SRC_RAW" ]; then
echo "[3/3] Moving raw 276-dim motions (4.8GB) ..."
mkdir -p "$DST_RAW"
mv "$SRC_RAW"/* "$DST_RAW"/
echo " Done: $(ls "$DST_RAW" | wc -l) files"
else
echo "[3/3] SKIP: $SRC_RAW not found"
fi
echo ""
echo "=================================================="
echo " Data organization complete!"
echo " Total size:"
du -sh "$SCRIPT_DIR/data/" 2>/dev/null
echo "=================================================="
echo ""
echo "Next steps:"
echo " 1. Upload to HuggingFace:"
echo " huggingface-cli upload <your-hf-username>/MotionVLA-Dataset . --repo-type dataset"
echo " 2. Upload model checkpoints:"
echo " huggingface-cli upload <your-hf-username>/MotionVLA checkpoints/ --repo-type model"