hynky commited on
Commit
0dba2e6
·
verified ·
1 Parent(s): 0bacbbc

Publish LeRobot eval viewer Space

Browse files
Files changed (3) hide show
  1. README.md +8 -5
  2. app.py +370 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,13 +1,16 @@
1
  ---
2
- title: Lerobot Eval Viewer
3
- emoji: 🏆
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.14.0
8
- python_version: '3.13'
9
  app_file: app.py
10
  pinned: false
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: LeRobot Eval Viewer
3
+ emoji: 📊
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.49.1
 
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # LeRobot Eval Viewer
13
+
14
+ Gradio + Rerun viewer for LeRobot eval artifacts stored in a Hugging Face Bucket.
15
+
16
+ Default bucket: `macrodata/lerobot-evals`.
app.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ """Browse LeRobot eval artifacts stored in a Hugging Face Bucket."""
4
+
5
+ from __future__ import annotations
6
+
7
+ import argparse
8
+ import json
9
+ import os
10
+ import re
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ import gradio as gr
16
+ from gradio_rerun import Rerun
17
+ from huggingface_hub import download_bucket_files, list_bucket_tree
18
+
19
+
20
+ DEFAULT_BUCKET = "macrodata/lerobot-evals"
21
+ DEFAULT_CACHE_DIR = Path(os.environ.get("LEROBOT_EVAL_VIEWER_CACHE", "~/.cache/lerobot/eval_viewer")).expanduser()
22
+ RUN_MANIFEST_RE = re.compile(r"^runs/(?P<run_id>[^/]+)/(?P<run_time>[^/]+)/manifest\.json$")
23
+ EVAL_INFO_RE = re.compile(
24
+ r"^runs/(?P<run_id>[^/]+)/(?P<run_time>[^/]+)/evals/(?P<eval_type>[^/]+)/eval_info\.json$"
25
+ )
26
+ EPISODE_METADATA_RE = re.compile(
27
+ r"^runs/(?P<run_id>[^/]+)/(?P<run_time>[^/]+)/evals/(?P<eval_type>[^/]+)/episodes/"
28
+ r"(?P<episode_id>[^/]+)/metadata\.json$"
29
+ )
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class EvalIndex:
34
+ files: set[str]
35
+ runs: list[str]
36
+ evals_by_run: dict[str, list[str]]
37
+ episodes_by_eval: dict[str, list[str]]
38
+
39
+
40
+ def _file_path(item: Any) -> str | None:
41
+ if getattr(item, "type", None) not in (None, "file"):
42
+ return None
43
+ path = getattr(item, "path", None)
44
+ return str(path) if path else None
45
+
46
+
47
+ def _run_key(run_id: str, run_time: str) -> str:
48
+ return f"{run_id}/{run_time}"
49
+
50
+
51
+ def _eval_key(run_id: str, run_time: str, eval_type: str) -> str:
52
+ return f"{run_id}/{run_time}/{eval_type}"
53
+
54
+
55
+ def _split_eval_key(eval_key: str) -> tuple[str, str, str]:
56
+ run_id, run_time, eval_type = eval_key.split("/", 2)
57
+ return run_id, run_time, eval_type
58
+
59
+
60
+ def _base_path(run_id: str, run_time: str) -> str:
61
+ return f"runs/{run_id}/{run_time}"
62
+
63
+
64
+ def _eval_path(run_id: str, run_time: str, eval_type: str) -> str:
65
+ return f"{_base_path(run_id, run_time)}/evals/{eval_type}"
66
+
67
+
68
+ def _local_path(cache_dir: Path, bucket_id: str, remote_path: str) -> Path:
69
+ namespace, bucket_name = bucket_id.split("/", 1) if "/" in bucket_id else ("me", bucket_id)
70
+ return cache_dir / namespace / bucket_name / remote_path
71
+
72
+
73
+ def _download(bucket_id: str, remote_path: str, cache_dir: Path) -> Path | None:
74
+ local_path = _local_path(cache_dir, bucket_id, remote_path)
75
+ if local_path.exists():
76
+ return local_path
77
+ local_path.parent.mkdir(parents=True, exist_ok=True)
78
+ download_bucket_files(
79
+ bucket_id,
80
+ files=[(remote_path, local_path)],
81
+ raise_on_missing_files=False,
82
+ )
83
+ return local_path if local_path.exists() else None
84
+
85
+
86
+ def _read_text(bucket_id: str, remote_path: str, cache_dir: Path, max_chars: int | None = None) -> str:
87
+ path = _download(bucket_id, remote_path, cache_dir)
88
+ if path is None:
89
+ return ""
90
+ text = path.read_text(encoding="utf-8", errors="replace")
91
+ if max_chars is not None and len(text) > max_chars:
92
+ return text[-max_chars:]
93
+ return text
94
+
95
+
96
+ def _read_json(bucket_id: str, remote_path: str, cache_dir: Path) -> dict[str, Any]:
97
+ text = _read_text(bucket_id, remote_path, cache_dir)
98
+ if not text:
99
+ return {}
100
+ return json.loads(text)
101
+
102
+
103
+ def _build_index(bucket_id: str) -> EvalIndex:
104
+ paths = []
105
+ for item in list_bucket_tree(bucket_id, prefix="runs", recursive=True):
106
+ path = _file_path(item)
107
+ if path:
108
+ paths.append(path)
109
+
110
+ files = set(paths)
111
+ runs = set()
112
+ evals_by_run: dict[str, set[str]] = {}
113
+ episodes_by_eval: dict[str, set[str]] = {}
114
+
115
+ for path in paths:
116
+ if match := RUN_MANIFEST_RE.match(path):
117
+ key = _run_key(match["run_id"], match["run_time"])
118
+ runs.add(key)
119
+ evals_by_run.setdefault(key, set())
120
+ continue
121
+ if match := EVAL_INFO_RE.match(path):
122
+ run_key = _run_key(match["run_id"], match["run_time"])
123
+ eval_key = _eval_key(match["run_id"], match["run_time"], match["eval_type"])
124
+ runs.add(run_key)
125
+ evals_by_run.setdefault(run_key, set()).add(eval_key)
126
+ episodes_by_eval.setdefault(eval_key, set())
127
+ continue
128
+ if match := EPISODE_METADATA_RE.match(path):
129
+ run_key = _run_key(match["run_id"], match["run_time"])
130
+ eval_key = _eval_key(match["run_id"], match["run_time"], match["eval_type"])
131
+ runs.add(run_key)
132
+ evals_by_run.setdefault(run_key, set()).add(eval_key)
133
+ episodes_by_eval.setdefault(eval_key, set()).add(match["episode_id"])
134
+
135
+ return EvalIndex(
136
+ files=files,
137
+ runs=sorted(runs, reverse=True),
138
+ evals_by_run={key: sorted(value) for key, value in evals_by_run.items()},
139
+ episodes_by_eval={key: sorted(value) for key, value in episodes_by_eval.items()},
140
+ )
141
+
142
+
143
+ def _summarize_eval(info: dict[str, Any]) -> dict[str, Any]:
144
+ overall = info.get("overall") or info.get("aggregated") or {}
145
+ if not isinstance(overall, dict):
146
+ return {}
147
+ keys = ("pc_success", "avg_sum_reward", "avg_max_reward", "n_episodes", "eval_s", "eval_ep_s")
148
+ return {key: overall.get(key) for key in keys if key in overall}
149
+
150
+
151
+ def _trace_table(bucket_id: str, remote_path: str, cache_dir: Path, limit: int = 2000) -> tuple[list[str], list[list[Any]]]:
152
+ text = _read_text(bucket_id, remote_path, cache_dir)
153
+ if not text:
154
+ return [], []
155
+ rows = []
156
+ for line in text.splitlines()[:limit]:
157
+ if line.strip():
158
+ rows.append(json.loads(line))
159
+ if not rows:
160
+ return [], []
161
+
162
+ preferred = ["frame_index", "timestamp", "reward", "next.success", "done"]
163
+ vector_keys = [key for key in ("action", "observation.state") if key in rows[0]]
164
+ headers = preferred + vector_keys
165
+ table = []
166
+ for row in rows:
167
+ table.append([_table_cell(row.get(key)) for key in headers])
168
+ return headers, table
169
+
170
+
171
+ def _table_cell(value: Any) -> Any:
172
+ if isinstance(value, (dict, list)):
173
+ return json.dumps(value)
174
+ return value
175
+
176
+
177
+ def _choices(values: list[str], value: str | None = None) -> gr.Dropdown:
178
+ return gr.update(choices=values, value=value if value in values else (values[0] if values else None))
179
+
180
+
181
+ def _trace_update(headers: list[str] | None = None, rows: list[list[Any]] | None = None) -> gr.Dataframe:
182
+ headers = headers or []
183
+ rows = rows or []
184
+ return gr.update(headers=headers, value=rows, col_count=(len(headers), "dynamic"))
185
+
186
+
187
+ def build_app(default_bucket: str, cache_dir: Path) -> gr.Blocks:
188
+ css = """
189
+ .metric-panel textarea {font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;}
190
+ .rerun-panel {min-height: 720px;}
191
+ """
192
+
193
+ def refresh(bucket_id: str):
194
+ index = _build_index(bucket_id)
195
+ empty_trace = _trace_update()
196
+ return index, _choices(index.runs), _choices([]), _choices([]), {}, "", "", None, empty_trace
197
+
198
+ def select_run(bucket_id: str, run_key: str | None, index: EvalIndex):
199
+ if not run_key:
200
+ return _choices([]), {}, ""
201
+ run_id, run_time = run_key.split("/", 1)
202
+ manifest_path = f"{_base_path(run_id, run_time)}/manifest.json"
203
+ manifest = _read_json(bucket_id, manifest_path, cache_dir)
204
+ evals = index.evals_by_run.get(run_key, [])
205
+ return _choices(evals), manifest, f"hf://buckets/{bucket_id}/{_base_path(run_id, run_time)}"
206
+
207
+ def select_eval(bucket_id: str, eval_key: str | None, index: EvalIndex):
208
+ if not eval_key:
209
+ return {}, _choices([]), "", ""
210
+ run_id, run_time, eval_type = _split_eval_key(eval_key)
211
+ eval_base = _eval_path(run_id, run_time, eval_type)
212
+ info = _read_json(bucket_id, f"{eval_base}/eval_info.json", cache_dir)
213
+ command = _read_text(bucket_id, f"{eval_base}/command.txt", cache_dir)
214
+ logs = _read_text(bucket_id, f"{eval_base}/logs.txt", cache_dir, max_chars=60_000)
215
+ episodes = index.episodes_by_eval.get(eval_key, [])
216
+ return _summarize_eval(info), _choices(episodes), command, logs
217
+
218
+ def select_episode(bucket_id: str, eval_key: str | None, episode_id: str | None, index: EvalIndex):
219
+ if not eval_key or not episode_id:
220
+ return {}, _trace_update(), None
221
+ run_id, run_time, eval_type = _split_eval_key(eval_key)
222
+ eval_base = _eval_path(run_id, run_time, eval_type)
223
+ episode_base = f"{eval_base}/episodes/{episode_id}"
224
+ metadata = _read_json(bucket_id, f"{episode_base}/metadata.json", cache_dir)
225
+ headers, rows = _trace_table(bucket_id, f"{episode_base}/trace.jsonl", cache_dir)
226
+
227
+ rrd_path = _download(bucket_id, f"{episode_base}/episode.rrd", cache_dir)
228
+ trace_update = _trace_update(headers, rows)
229
+ return metadata, trace_update, str(rrd_path) if rrd_path else None
230
+
231
+ with gr.Blocks(title="LeRobot Eval Viewer", css=css) as app:
232
+ index_state = gr.State(EvalIndex(files=set(), runs=[], evals_by_run={}, episodes_by_eval={}))
233
+
234
+ gr.Markdown("# LeRobot Eval Viewer")
235
+ gr.Markdown("Browse Hugging Face Bucket eval artifacts, inspect traces, and open episode `.rrd` files in Rerun.")
236
+
237
+ with gr.Row():
238
+ bucket = gr.Textbox(value=default_bucket, label="HF Bucket", scale=2)
239
+ refresh_button = gr.Button("Refresh", variant="primary", scale=0)
240
+
241
+ with gr.Row():
242
+ run_dropdown = gr.Dropdown(label="Run", choices=[], interactive=True)
243
+ eval_dropdown = gr.Dropdown(label="Eval", choices=[], interactive=True)
244
+ episode_dropdown = gr.Dropdown(label="Episode", choices=[], interactive=True)
245
+
246
+ run_uri = gr.Textbox(label="Run URI", interactive=False)
247
+
248
+ with gr.Row():
249
+ metrics = gr.JSON(label="Metrics", elem_classes=["metric-panel"])
250
+ manifest = gr.JSON(label="Manifest", elem_classes=["metric-panel"])
251
+ episode_metadata = gr.JSON(label="Episode Metadata", elem_classes=["metric-panel"])
252
+
253
+ with gr.Tab("Trace"):
254
+ trace = gr.Dataframe(
255
+ label="Trace",
256
+ headers=[],
257
+ value=[],
258
+ col_count=(0, "dynamic"),
259
+ wrap=True,
260
+ interactive=False,
261
+ )
262
+
263
+ with gr.Tab("Rerun"):
264
+ rerun = Rerun(
265
+ label="Rerun Episode",
266
+ streaming=True,
267
+ elem_classes=["rerun-panel"],
268
+ panel_states={
269
+ "blueprint": "collapsed",
270
+ "selection": "collapsed",
271
+ "time": "expanded",
272
+ },
273
+ )
274
+
275
+ with gr.Tab("Command"):
276
+ command = gr.Code(label="command.txt", language="shell")
277
+
278
+ with gr.Tab("Logs"):
279
+ logs = gr.Code(label="logs.txt", language="shell", lines=24)
280
+
281
+ refresh_button.click(
282
+ refresh,
283
+ inputs=[bucket],
284
+ outputs=[
285
+ index_state,
286
+ run_dropdown,
287
+ eval_dropdown,
288
+ episode_dropdown,
289
+ metrics,
290
+ command,
291
+ logs,
292
+ rerun,
293
+ trace,
294
+ ],
295
+ )
296
+ bucket.submit(
297
+ refresh,
298
+ inputs=[bucket],
299
+ outputs=[
300
+ index_state,
301
+ run_dropdown,
302
+ eval_dropdown,
303
+ episode_dropdown,
304
+ metrics,
305
+ command,
306
+ logs,
307
+ rerun,
308
+ trace,
309
+ ],
310
+ )
311
+ run_dropdown.change(
312
+ select_run,
313
+ inputs=[bucket, run_dropdown, index_state],
314
+ outputs=[eval_dropdown, manifest, run_uri],
315
+ )
316
+ eval_dropdown.change(
317
+ select_eval,
318
+ inputs=[bucket, eval_dropdown, index_state],
319
+ outputs=[metrics, episode_dropdown, command, logs],
320
+ )
321
+ episode_dropdown.change(
322
+ select_episode,
323
+ inputs=[bucket, eval_dropdown, episode_dropdown, index_state],
324
+ outputs=[episode_metadata, trace, rerun],
325
+ )
326
+ app.load(
327
+ refresh,
328
+ inputs=[bucket],
329
+ outputs=[
330
+ index_state,
331
+ run_dropdown,
332
+ eval_dropdown,
333
+ episode_dropdown,
334
+ metrics,
335
+ command,
336
+ logs,
337
+ rerun,
338
+ trace,
339
+ ],
340
+ )
341
+
342
+ return app
343
+
344
+
345
+ def parse_args() -> argparse.Namespace:
346
+ parser = argparse.ArgumentParser(description="Launch a Gradio viewer for LeRobot eval bucket artifacts.")
347
+ parser.add_argument("--bucket", default=os.environ.get("LEROBOT_EVAL_BUCKET", DEFAULT_BUCKET))
348
+ parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR)
349
+ default_host = "0.0.0.0" if os.environ.get("SPACE_ID") else "127.0.0.1"
350
+ parser.add_argument("--host", default=os.environ.get("GRADIO_SERVER_NAME", default_host))
351
+ parser.add_argument("--port", type=int, default=int(os.environ.get("GRADIO_SERVER_PORT", "7860")))
352
+ parser.add_argument("--share", action="store_true", help="Create a public Gradio share URL.")
353
+ return parser.parse_args()
354
+
355
+
356
+ def main() -> None:
357
+ args = parse_args()
358
+ args.cache_dir.mkdir(parents=True, exist_ok=True)
359
+ app = build_app(default_bucket=args.bucket, cache_dir=args.cache_dir)
360
+ app.launch(
361
+ server_name=args.host,
362
+ server_port=args.port,
363
+ share=args.share,
364
+ ssr_mode=False,
365
+ allowed_paths=[str(args.cache_dir.resolve())],
366
+ )
367
+
368
+
369
+ if __name__ == "__main__":
370
+ main()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=5.0.0,<6.0.0
2
+ gradio_rerun>=0.26.2,<0.27.0
3
+ huggingface_hub>=1.2.3
4
+ rerun-sdk>=0.24.0,<0.27.0