File size: 4,795 Bytes
857c2e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env bash
set -euo pipefail

# ------------------------------------------------------------------
# Fast dataset setup via either Hugging Face CLI downloads (default)
# or git-lfs clones from the Hub.
#
#  * HF CLI path requires: `pip install huggingface_hub` (provides `hf` CLI)
#  * Git path requires: `git` and `git-lfs`
# ------------------------------------------------------------------

# ------------------------------
# Parse command‑line arguments
# ------------------------------
METHOD=${RFM_DOWNLOAD_METHOD:-hf}   # hf | git
BASE_DIR_DEFAULT=${ROBOMETER_DATASET_PATH:-${RFM_DATASET_PATH:-./robometer_dataset}}
BASE_DIR="$BASE_DIR_DEFAULT"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --git)           METHOD="git"; shift ;;
    --hf)            METHOD="hf"; shift ;;
    --method=*)      METHOD="${1#*=}"; shift ;;
    --method)        METHOD="$2"; shift 2 ;;
    --dir|--base-dir|-d)
                      BASE_DIR="$2"; shift 2 ;;
    *)               BASE_DIR="$1"; shift ;;
  esac
done

# ------------------------------
# Sanity checks for the chosen method
# ------------------------------
case "$METHOD" in
  hf)
    if ! command -v hf >/dev/null 2>&1; then
      echo "Error: 'hf' CLI not found. Install with:" >&2
      echo "   uv pip install huggingface_hub (or ensure your venv is activated)" >&2
      exit 1
    fi
    ;;
  git)
    if ! command -v git >/dev/null 2>&1; then
      echo "Error: git not found. Please install git." >&2
      exit 1
    fi
    if ! git lfs version >/dev/null 2>&1; then
      echo "Warning: git-lfs not found. You may end up with pointer files." >&2
      echo "   Install git-lfs for full downloads." >&2
    fi
    ;;
  *)
    echo "Error: Unknown METHOD='${METHOD}'. Use 'hf' or 'git'." >&2
    exit 1
    ;;
esac

mkdir -p "${BASE_DIR}"

# ------------------------------
# Helper to download a dataset repo
# ------------------------------
download_dataset() {
  local repo_id="$1"                # e.g., abraranwar/libero_rfm
  local name="${repo_id##*/}"       # last path segment as folder name
  local target_dir="${BASE_DIR}/${name}"

  echo "Downloading ${repo_id} -> ${target_dir} via ${METHOD}"
  if [[ "$METHOD" == "hf" ]]; then
    hf download "${repo_id}" \
      --repo-type dataset \
      --local-dir "${target_dir}"
  else
    local url="https://huggingface.co/datasets/${repo_id}.git"
    if [[ -d "${target_dir}/.git" ]]; then
      echo "Updating existing clone at ${target_dir}"
      git -C "${target_dir}" remote set-url origin "${url}" || true
      git -C "${target_dir}" fetch --all --tags
      git -C "${target_dir}" pull --ff-only
    else
      git clone "${url}" "${target_dir}"
    fi
    if git lfs version >/dev/null 2>&1; then
      git -C "${target_dir}" lfs install --local >/dev/null 2>&1 || true
      git -C "${target_dir}" lfs pull || true
    fi
  fi
}

# ------------------------------
# Retry helper – keeps trying until success
# ------------------------------
retry_until_success() {
  local cmd="$1"
  local desc="${2:-$cmd}"

  echo "=== ${desc} ==="
  until eval "$cmd"; do
    echo "❌ $desc failed – retrying in 30s …"
    sleep 30
  done
  echo "✅ $desc succeeded."
}

# ------------------------------
# List of all repos to download (including the ones that were previously commented out)
# ------------------------------
repos=(
  abraranwar/libero_rfm
  abraranwar/agibotworld_alpha_rfm   
  abraranwar/agibotworld_alpha_headcam_rfm   
  abraranwar/usc_koch_rewind_rfm
  ykorkmaz/libero_failure_rfm
  aliangdw/metaworld
  jesbu1/oxe_rfm
  jesbu1/galaxea_rfm
  jesbu1/molmoact_rfm
  jesbu1/ph2d_rfm
  jesbu1/epic_rfm
  jesbu1/failsafe_rfm
  jesbu1/h2r_rfm
  jesbu1/roboarena_0825_rfm
  jesbu1/oxe_rfm_eval
  anqil/rh20t_subset_rfm # can be replaced with anqil/rh20t_rfm full dataset
  jesbu1/humanoid_everyday_rfm
  jesbu1/motif_rfm
  jesbu1/auto_eval_rfm
  jesbu1/soar_rfm 
  jesbu1/racer_rfm
  jesbu1/egodex_rfm
  aliangdw/usc_xarm_policy_ranking
  aliangdw/usc_franka_policy_ranking
  aliangdw/utd_so101_policy_ranking
  aliangdw/utd_so101_human
  jesbu1/mit_franka_p-rank_rfm
  jesbu1/utd_so101_clean_policy_ranking_top
  jesbu1/utd_so101_clean_policy_ranking_wrist
  jesbu1/usc_koch_human_robot_paired
  jesbu1/usc_koch_p_ranking_rfm
  #jesbu1/roboreward_rfm
  jesbu1/roboreward_rfm_high_res
  jesbu1/rfm_new_mit_franka_rfm_nowrist
  ykorkmaz/usc_trossen_rfm
  aliangdw/robofac_rbm
)

# ------------------------------
# Download each repo with retry logic
# ------------------------------
for repo in "${repos[@]}"; do
  # Skip lines that are still commented out (start with '#')
  [[ "$repo" == \#* ]] && continue

  retry_until_success "download_dataset $repo" "$repo"
done

echo ""
echo "Done. Set ROBOMETER_DATASET_PATH=${BASE_DIR} for training/eval."