File size: 4,795 Bytes
857c2e9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | #!/usr/bin/env bash
set -euo pipefail
# ------------------------------------------------------------------
# Fast dataset setup via either Hugging Face CLI downloads (default)
# or git-lfs clones from the Hub.
#
# * HF CLI path requires: `pip install huggingface_hub` (provides `hf` CLI)
# * Git path requires: `git` and `git-lfs`
# ------------------------------------------------------------------
# ------------------------------
# Parse command‑line arguments
# ------------------------------
METHOD=${RFM_DOWNLOAD_METHOD:-hf} # hf | git
BASE_DIR_DEFAULT=${ROBOMETER_DATASET_PATH:-${RFM_DATASET_PATH:-./robometer_dataset}}
BASE_DIR="$BASE_DIR_DEFAULT"
while [[ $# -gt 0 ]]; do
case "$1" in
--git) METHOD="git"; shift ;;
--hf) METHOD="hf"; shift ;;
--method=*) METHOD="${1#*=}"; shift ;;
--method) METHOD="$2"; shift 2 ;;
--dir|--base-dir|-d)
BASE_DIR="$2"; shift 2 ;;
*) BASE_DIR="$1"; shift ;;
esac
done
# ------------------------------
# Sanity checks for the chosen method
# ------------------------------
case "$METHOD" in
hf)
if ! command -v hf >/dev/null 2>&1; then
echo "Error: 'hf' CLI not found. Install with:" >&2
echo " uv pip install huggingface_hub (or ensure your venv is activated)" >&2
exit 1
fi
;;
git)
if ! command -v git >/dev/null 2>&1; then
echo "Error: git not found. Please install git." >&2
exit 1
fi
if ! git lfs version >/dev/null 2>&1; then
echo "Warning: git-lfs not found. You may end up with pointer files." >&2
echo " Install git-lfs for full downloads." >&2
fi
;;
*)
echo "Error: Unknown METHOD='${METHOD}'. Use 'hf' or 'git'." >&2
exit 1
;;
esac
mkdir -p "${BASE_DIR}"
# ------------------------------
# Helper to download a dataset repo
# ------------------------------
download_dataset() {
local repo_id="$1" # e.g., abraranwar/libero_rfm
local name="${repo_id##*/}" # last path segment as folder name
local target_dir="${BASE_DIR}/${name}"
echo "Downloading ${repo_id} -> ${target_dir} via ${METHOD}"
if [[ "$METHOD" == "hf" ]]; then
hf download "${repo_id}" \
--repo-type dataset \
--local-dir "${target_dir}"
else
local url="https://huggingface.co/datasets/${repo_id}.git"
if [[ -d "${target_dir}/.git" ]]; then
echo "Updating existing clone at ${target_dir}"
git -C "${target_dir}" remote set-url origin "${url}" || true
git -C "${target_dir}" fetch --all --tags
git -C "${target_dir}" pull --ff-only
else
git clone "${url}" "${target_dir}"
fi
if git lfs version >/dev/null 2>&1; then
git -C "${target_dir}" lfs install --local >/dev/null 2>&1 || true
git -C "${target_dir}" lfs pull || true
fi
fi
}
# ------------------------------
# Retry helper – keeps trying until success
# ------------------------------
retry_until_success() {
local cmd="$1"
local desc="${2:-$cmd}"
echo "=== ${desc} ==="
until eval "$cmd"; do
echo "❌ $desc failed – retrying in 30s …"
sleep 30
done
echo "✅ $desc succeeded."
}
# ------------------------------
# List of all repos to download (including the ones that were previously commented out)
# ------------------------------
repos=(
abraranwar/libero_rfm
abraranwar/agibotworld_alpha_rfm
abraranwar/agibotworld_alpha_headcam_rfm
abraranwar/usc_koch_rewind_rfm
ykorkmaz/libero_failure_rfm
aliangdw/metaworld
jesbu1/oxe_rfm
jesbu1/galaxea_rfm
jesbu1/molmoact_rfm
jesbu1/ph2d_rfm
jesbu1/epic_rfm
jesbu1/failsafe_rfm
jesbu1/h2r_rfm
jesbu1/roboarena_0825_rfm
jesbu1/oxe_rfm_eval
anqil/rh20t_subset_rfm # can be replaced with anqil/rh20t_rfm full dataset
jesbu1/humanoid_everyday_rfm
jesbu1/motif_rfm
jesbu1/auto_eval_rfm
jesbu1/soar_rfm
jesbu1/racer_rfm
jesbu1/egodex_rfm
aliangdw/usc_xarm_policy_ranking
aliangdw/usc_franka_policy_ranking
aliangdw/utd_so101_policy_ranking
aliangdw/utd_so101_human
jesbu1/mit_franka_p-rank_rfm
jesbu1/utd_so101_clean_policy_ranking_top
jesbu1/utd_so101_clean_policy_ranking_wrist
jesbu1/usc_koch_human_robot_paired
jesbu1/usc_koch_p_ranking_rfm
#jesbu1/roboreward_rfm
jesbu1/roboreward_rfm_high_res
jesbu1/rfm_new_mit_franka_rfm_nowrist
ykorkmaz/usc_trossen_rfm
aliangdw/robofac_rbm
)
# ------------------------------
# Download each repo with retry logic
# ------------------------------
for repo in "${repos[@]}"; do
# Skip lines that are still commented out (start with '#')
[[ "$repo" == \#* ]] && continue
retry_until_success "download_dataset $repo" "$repo"
done
echo ""
echo "Done. Set ROBOMETER_DATASET_PATH=${BASE_DIR} for training/eval."
|