TTI / Reward /robometer /scripts /download_data.sh
JosephBai's picture
Upload folder using huggingface_hub
857c2e9 verified
#!/usr/bin/env bash
set -euo pipefail
# ------------------------------------------------------------------
# Fast dataset setup via either Hugging Face CLI downloads (default)
# or git-lfs clones from the Hub.
#
# * HF CLI path requires: `pip install huggingface_hub` (provides `hf` CLI)
# * Git path requires: `git` and `git-lfs`
# ------------------------------------------------------------------
# ------------------------------
# Parse command‑line arguments
# ------------------------------
METHOD=${RFM_DOWNLOAD_METHOD:-hf} # hf | git
BASE_DIR_DEFAULT=${ROBOMETER_DATASET_PATH:-${RFM_DATASET_PATH:-./robometer_dataset}}
BASE_DIR="$BASE_DIR_DEFAULT"
while [[ $# -gt 0 ]]; do
case "$1" in
--git) METHOD="git"; shift ;;
--hf) METHOD="hf"; shift ;;
--method=*) METHOD="${1#*=}"; shift ;;
--method) METHOD="$2"; shift 2 ;;
--dir|--base-dir|-d)
BASE_DIR="$2"; shift 2 ;;
*) BASE_DIR="$1"; shift ;;
esac
done
# ------------------------------
# Sanity checks for the chosen method
# ------------------------------
case "$METHOD" in
hf)
if ! command -v hf >/dev/null 2>&1; then
echo "Error: 'hf' CLI not found. Install with:" >&2
echo " uv pip install huggingface_hub (or ensure your venv is activated)" >&2
exit 1
fi
;;
git)
if ! command -v git >/dev/null 2>&1; then
echo "Error: git not found. Please install git." >&2
exit 1
fi
if ! git lfs version >/dev/null 2>&1; then
echo "Warning: git-lfs not found. You may end up with pointer files." >&2
echo " Install git-lfs for full downloads." >&2
fi
;;
*)
echo "Error: Unknown METHOD='${METHOD}'. Use 'hf' or 'git'." >&2
exit 1
;;
esac
mkdir -p "${BASE_DIR}"
# ------------------------------
# Helper to download a dataset repo
# ------------------------------
download_dataset() {
local repo_id="$1" # e.g., abraranwar/libero_rfm
local name="${repo_id##*/}" # last path segment as folder name
local target_dir="${BASE_DIR}/${name}"
echo "Downloading ${repo_id} -> ${target_dir} via ${METHOD}"
if [[ "$METHOD" == "hf" ]]; then
hf download "${repo_id}" \
--repo-type dataset \
--local-dir "${target_dir}"
else
local url="https://huggingface.co/datasets/${repo_id}.git"
if [[ -d "${target_dir}/.git" ]]; then
echo "Updating existing clone at ${target_dir}"
git -C "${target_dir}" remote set-url origin "${url}" || true
git -C "${target_dir}" fetch --all --tags
git -C "${target_dir}" pull --ff-only
else
git clone "${url}" "${target_dir}"
fi
if git lfs version >/dev/null 2>&1; then
git -C "${target_dir}" lfs install --local >/dev/null 2>&1 || true
git -C "${target_dir}" lfs pull || true
fi
fi
}
# ------------------------------
# Retry helper – keeps trying until success
# ------------------------------
retry_until_success() {
local cmd="$1"
local desc="${2:-$cmd}"
echo "=== ${desc} ==="
until eval "$cmd"; do
echo "❌ $desc failed – retrying in 30s …"
sleep 30
done
echo "✅ $desc succeeded."
}
# ------------------------------
# List of all repos to download (including the ones that were previously commented out)
# ------------------------------
repos=(
abraranwar/libero_rfm
abraranwar/agibotworld_alpha_rfm
abraranwar/agibotworld_alpha_headcam_rfm
abraranwar/usc_koch_rewind_rfm
ykorkmaz/libero_failure_rfm
aliangdw/metaworld
jesbu1/oxe_rfm
jesbu1/galaxea_rfm
jesbu1/molmoact_rfm
jesbu1/ph2d_rfm
jesbu1/epic_rfm
jesbu1/failsafe_rfm
jesbu1/h2r_rfm
jesbu1/roboarena_0825_rfm
jesbu1/oxe_rfm_eval
anqil/rh20t_subset_rfm # can be replaced with anqil/rh20t_rfm full dataset
jesbu1/humanoid_everyday_rfm
jesbu1/motif_rfm
jesbu1/auto_eval_rfm
jesbu1/soar_rfm
jesbu1/racer_rfm
jesbu1/egodex_rfm
aliangdw/usc_xarm_policy_ranking
aliangdw/usc_franka_policy_ranking
aliangdw/utd_so101_policy_ranking
aliangdw/utd_so101_human
jesbu1/mit_franka_p-rank_rfm
jesbu1/utd_so101_clean_policy_ranking_top
jesbu1/utd_so101_clean_policy_ranking_wrist
jesbu1/usc_koch_human_robot_paired
jesbu1/usc_koch_p_ranking_rfm
#jesbu1/roboreward_rfm
jesbu1/roboreward_rfm_high_res
jesbu1/rfm_new_mit_franka_rfm_nowrist
ykorkmaz/usc_trossen_rfm
aliangdw/robofac_rbm
)
# ------------------------------
# Download each repo with retry logic
# ------------------------------
for repo in "${repos[@]}"; do
# Skip lines that are still commented out (start with '#')
[[ "$repo" == \#* ]] && continue
retry_until_success "download_dataset $repo" "$repo"
done
echo ""
echo "Done. Set ROBOMETER_DATASET_PATH=${BASE_DIR} for training/eval."