wsagi commited on
Commit
fe8aed0
·
verified ·
1 Parent(s): 477f433

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/drq-g1-walk.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ assets/drq-h1-walk.mp4 filter=lfs diff=lfs merge=lfs -text
DRQ+HBench-g1-walk-v0+0/agent_var.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ffa719406b465edac92cba54cec5fe9cedbd1156372351276b5ae214023c97f
3
+ size 1086
DRQ+HBench-g1-walk-v0+0/encoder.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5709d80a41804bc2022597b7f9bb059012adfe44336b99b5e98747f00aa9f7
3
+ size 11368742
DRQ+HBench-g1-walk-v0+0/encoder_optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182b7e07cd6e955732580993426602ae3846868e171da2cd4164bf71d7a478f9
3
+ size 22741646
DRQ+HBench-g1-walk-v0+0/encoder_target.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4519493bed30b9674f828e0d2e6eebdde882e1fccb7692181e954af6da81106a
3
+ size 11369202
DRQ+HBench-g1-walk-v0+0/exp_var.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c58e80359a1d6278882ee6ff609f2671549abd272582814c4a586e98825f6090
3
+ size 8397
DRQ+HBench-g1-walk-v0+0/policy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23cad46e4f7abf716f19f6002aa945dfd287317625e753171f96d7229132a62a
3
+ size 2150846
DRQ+HBench-g1-walk-v0+0/policy_optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7745a0565e61da7361115a35b77b4a1a99b8fc4e6f393fcea1f0fa2570be621c
3
+ size 4302822
DRQ+HBench-g1-walk-v0+0/policy_target.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfb5e81866ecba5cea29bb5360f31232f201987c39386357a32a7042f9ada58
3
+ size 2151236
DRQ+HBench-g1-walk-v0+0/value.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec16fa73d702cc4b52502e78147fe02b3bb4ab2dd4dd81e0510f16643f28286
3
+ size 6312510
DRQ+HBench-g1-walk-v0+0/value_optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34b188af7079e84c4ece58da2b53369217996489d7b440e13165695219f8f2b2
3
+ size 12629798
DRQ+HBench-g1-walk-v0+0/value_target.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba0c59969f24a77b4edad2581b6bfce0e494d81de76b106f36c343b36b3714a
3
+ size 6313610
DRQ+HBench-h1-walk-v0+0/agent_var.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf8d0bd97de9b41f974c43021363aad7db2381ebcb6b6c979e9765157850562
3
+ size 1086
DRQ+HBench-h1-walk-v0+0/encoder.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e97535acf118e12c157146c5f1920a5c05e134d5b1297f97849e21e0d7f393f
3
+ size 11256614
DRQ+HBench-h1-walk-v0+0/encoder_optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0dda7c1360446938d1ac6814d8a04cf816b4e54b653f42d23f9a7b5cdc4ad95
3
+ size 22517390
DRQ+HBench-h1-walk-v0+0/encoder_target.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4eb7b1a30f7aeb54d19c472de990125fa462b0b5a963e29aa455c4f1ea32b6
3
+ size 11257074
DRQ+HBench-h1-walk-v0+0/exp_var.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcef3390a2ac13d6dcefa27a154272625f6c9fe3bd2f19fcb74a2cd2a2eda39f
3
+ size 8397
DRQ+HBench-h1-walk-v0+0/policy.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3452a05d052112002baef80a53f0bc9df9e15319ce5d3a299daedeb1025b284c
3
+ size 2142654
DRQ+HBench-h1-walk-v0+0/policy_optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e5e806221377df42d0582874ea303b8040663c3475ef732eefc486ff607865
3
+ size 4286438
DRQ+HBench-h1-walk-v0+0/policy_target.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34641cf8117f02057986699223530742356f5cbc40ef3aacce3ec2fec0defaed
3
+ size 2143044
DRQ+HBench-h1-walk-v0+0/value.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:785533e0635a67da696a29613e583834435d871a72dc7a71875f630235205564
3
+ size 6312510
DRQ+HBench-h1-walk-v0+0/value_optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a7090fe731329eb703a4ec340b61626acb337d803a5ac751db8b7b6710a4ab
3
+ size 12629798
DRQ+HBench-h1-walk-v0+0/value_target.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2604d82eac11d136c8480cb56960bb07097df5e63a64233c13bb6450c36cf550
3
+ size 6313610
README.md ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: drq
3
+ tags:
4
+ - reinforcement-learning
5
+ - humanoid
6
+ - mujoco
7
+ - humanoid-bench
8
+ - locomotion
9
+ - unitree-h1
10
+ - unitree-g1
11
+ datasets:
12
+ - carlosferrazza/humanoid-bench
13
+ base_model: dmux/DR.Q
14
+ license: mit
15
+ ---
16
+
17
+ # HumanoidBench-DR.Q · 自训通关 checkpoints
18
+
19
+ _Self-trained DR.Q checkpoints that **beat** the public dmux/DR.Q baseline on HumanoidBench locomotion tasks._
20
+
21
+ DR.Q 是 TD3 + model-based 表征学习的离策略 RL 算法(encoder + policy ~13 MB 推理)。
22
+ 本仓库收录在 [HumanoidBench](https://github.com/carlosferrazza/humanoid-bench) 上**从零自训通关**的 checkpoints。
23
+
24
+ _DR.Q is a TD3-family off-policy RL algorithm with model-based representation learning (~13 MB encoder + policy at inference).
25
+ This repo hosts checkpoints **trained from scratch** that pass the HumanoidBench locomotion bar._
26
+
27
+ ---
28
+
29
+ ## 📊 性能 / Performance
30
+
31
+ | Task | success_rate | mean_return | N | 对比公开 baseline |
32
+ |---|---|---|---|---|
33
+ | **`h1-walk-v0`** | **90%** | **801.05** | 10 ep × seed 0 | dmux/DR.Q seed 0: ~30% / mean ~530 |
34
+ | **`g1-walk-v0`** | **70%** | **710.52** | 10 ep × seed 0 | torque baseline: 0% / mean ~100 (**7.1× 提升**) |
35
+
36
+ `success_bar = 700` (HumanoidBench locomotion threshold).
37
+ _All numbers from deterministic eval with `action_repeat=2`. Raw JSONL in `eval/`._
38
+
39
+ ---
40
+
41
+ ## 🎬 演示 / Demos
42
+
43
+ ### H1-walk-v0 (Unitree H1, 19 DoF)
44
+
45
+ <video controls width="720" src="https://huggingface.co/wsagi/HumanoidBench-DR.Q/resolve/main/assets/drq-h1-walk.mp4"></video>
46
+
47
+ ### G1-walk-v0 (Unitree G1, 23 DoF with BlockedHands wrapper)
48
+
49
+ <video controls width="720" src="https://huggingface.co/wsagi/HumanoidBench-DR.Q/resolve/main/assets/drq-g1-walk.mp4"></video>
50
+
51
+ ---
52
+
53
+ ## 📦 仓库内容 / Repo layout
54
+
55
+ ```
56
+ HumanoidBench-DR.Q/
57
+ ├── DRQ+HBench-h1-walk-v0+0/ # H1-walk self-trained ckpt (76 MB)
58
+ │ ├── encoder.pt policy.pt agent_var.npy ← inference (~13 MB)
59
+ │ ├── *_target.pt × 3 ← Q-learning targets
60
+ │ ├── *_optimizer.pt × 3 ← Adam states (resume)
61
+ │ ├── value.pt ← critic
62
+ │ └── exp_var.npy ← exploration variance
63
+ ├── DRQ+HBench-g1-walk-v0+0/ # G1-walk self-trained ckpt (76 MB)
64
+ │ └── ... (same 11-file layout)
65
+ ├── eval/ # Final eval JSONL (per-episode + summary row)
66
+ └── assets/ # MP4 demos
67
+ ```
68
+
69
+ **推理只需 3 个文件**:`encoder.pt` + `policy.pt` + `agent_var.npy`(共 ~13 MB)。
70
+ 其余 8 个文件用于续训与 Q-learning target。
71
+
72
+ _Only 3 files are needed for inference; the rest are for resume-training and Q-learning targets._
73
+
74
+ ---
75
+
76
+ ## 🚀 加载与推理 / Load & inference
77
+
78
+ ```python
79
+ # Minimal inference loader — see scripts/drq_viewer.py in companion repo
80
+ import torch, numpy as np
81
+ from huggingface_hub import snapshot_download
82
+
83
+ ckpt_dir = snapshot_download(
84
+ repo_id="wsagi/HumanoidBench-DR.Q",
85
+ allow_patterns="DRQ+HBench-h1-walk-v0+0/*",
86
+ )
87
+
88
+ var = np.load(f"{ckpt_dir}/DRQ+HBench-h1-walk-v0+0/agent_var.npy", allow_pickle=True).item()
89
+ # encoder = Encoder(obs_dim, ...); encoder.load_state_dict(torch.load(.../encoder.pt))
90
+ # policy = Policy(...); policy.load_state_dict(torch.load(.../policy.pt))
91
+ ```
92
+
93
+ 完整加载链路参考配套仓库 [vitorcen/humanoid-training](https://github.com/vitorcen/humanoid-training) 的 `scripts/drq_viewer.py`。
94
+
95
+ ```bash
96
+ git clone --recursive https://github.com/vitorcen/humanoid-training
97
+ cd humanoid-training
98
+ bash patches/apply.sh # apply DR.Q + HumanoidBench local patches
99
+ DISPLAY=:0 python scripts/drq_viewer.py --task h1-walk-v0 --seed 0
100
+ DISPLAY=:0 python scripts/drq_viewer.py --task g1-walk-v0 --seed 0
101
+ ```
102
+
103
+ ---
104
+
105
+ ## ⚠️ G1-walk 必备 patches / Required patches for G1
106
+
107
+ G1 通关**不是开箱即用**,需两层 patch(详见 [g1_training_strategies.html](https://github.com/vitorcen/humanoid-training/blob/main/docs/g1_training_strategies.html)):
108
+
109
+ | Patch | 作用 |
110
+ |---|---|
111
+ | `patches/g1-pos-control.patch` | G1 默认 torque control → **PD position control**(与 H1 一致),sample efficiency 4×↑ |
112
+ | `patches/humanoid-bench-g1-blocked-hands.patch` | 扩展 `BlockedHandsLocoWrapper` 支持 G1,**屏蔽 14 维手指**(37D → 23D action),避免噪声污染 encoder dynamics loss |
113
+
114
+ _G1 raw torque baseline trained 1M steps and stayed at 0% / mean ~100. The combined patches lift it to 70% / mean 711._
115
+
116
+ **根因(OpenCode deepseek-v4-pro 诊断)**:DR.Q 同方差 σ=0.2 exploration noise 在 37D action 上几乎每一步都扰动手指,encoder 的 dynamics loss 被迫学习无关的手指动力学 → 250k step 时 catastrophic forgetting。
117
+
118
+ _Root cause: isotropic σ=0.2 noise contaminates the encoder's dynamics loss with irrelevant 14-DoF finger motion, leading to catastrophic forgetting around 250k steps._
119
+
120
+ ---
121
+
122
+ ## 🔧 训练配置 / Training config
123
+
124
+ | | H1-walk | G1-walk |
125
+ |---|---|---|
126
+ | Algorithm | DR.Q (TD3 + zs encoder) | DR.Q + PD control + BlockedHands |
127
+ | Env steps | 500,000 | 500,000 |
128
+ | Wall time | 6.6 h | 3.0 h |
129
+ | GPU | RTX 4090 | RTX 4090 |
130
+ | `action_repeat` | 2 | 2 |
131
+ | `save_freq` | 50,000 | 50,000 |
132
+ | Watcher | slice-based auto-eval + early-stop (LeIsaac-inspired) | same |
133
+
134
+ **训练流水线**(三个并行进程,详见配套仓库 README):
135
+ - A) DR.Q `main.py` 主训
136
+ - B) `scripts/train_watcher.py` — 分 10 slice 实时聚合 + PROGRESS/UNDERFIT/OVERFIT/DEAD 早停
137
+ - C) `scripts/ckpt_eval_loop.py` — 每出 ckpt 自动 mirror 到 HF cache + N=3 deterministic eval
138
+
139
+ ---
140
+
141
+ ## 📚 引用 / Citations
142
+
143
+ ```bibtex
144
+ @article{sferrazza2024humanoidbench,
145
+ title={HumanoidBench: Simulated Humanoid Benchmark for Whole-Body Locomotion and Manipulation},
146
+ author={Sferrazza, Carmelo and Huang, Dun-Ming and Lin, Xingyu and Lee, Youngwoon and Abbeel, Pieter},
147
+ journal={Robotics: Science and Systems},
148
+ year={2024}
149
+ }
150
+
151
+ @article{yarats2022mastering,
152
+ title={Mastering Visual Continuous Control: Improved Data-Augmented Reinforcement Learning},
153
+ author={Yarats, Denis and Fergus, Rob and Lazaric, Alessandro and Pinto, Lerrel},
154
+ journal={ICLR},
155
+ year={2022}
156
+ }
157
+ ```
158
+
159
+ ---
160
+
161
+ ## 📄 License
162
+
163
+ MIT — same as base DR.Q and HumanoidBench.
164
+
165
+ ---
166
+
167
+ _Companion repository_: [github.com/vitorcen/humanoid-training](https://github.com/vitorcen/humanoid-training) — full training scripts, patches, eval harness, and analysis docs.
assets/drq-g1-walk.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30330124bcd101284206c6abf20738b88fcb4a24d79defbf1b3e8a6138a72fc8
3
+ size 411125
assets/drq-h1-walk.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e5b7f0a44d93ad43cf10ac0f6b6be5eb62470568d74a7f581e0c80813a5c943
3
+ size 276249
eval/g1-walk-v0.jsonl ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 0, "ep_steps": 1000, "ep_return": 750.7662936729176, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.8296611309051514}
2
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 1, "ep_steps": 1000, "ep_return": 733.3856304355644, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.8071818351745605}
3
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 2, "ep_steps": 1000, "ep_return": 521.2008137157275, "success": false, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.8442943096160889}
4
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 3, "ep_steps": 1000, "ep_return": 706.1093643279406, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.7915904521942139}
5
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 4, "ep_steps": 1000, "ep_return": 657.5082534046979, "success": false, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.8038837909698486}
6
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 5, "ep_steps": 1000, "ep_return": 651.3662470092779, "success": false, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.8597824573516846}
7
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 6, "ep_steps": 1000, "ep_return": 769.2891580176396, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.7870059013366699}
8
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 7, "ep_steps": 1000, "ep_return": 814.1963970582311, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.8006560802459717}
9
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 8, "ep_steps": 1000, "ep_return": 735.2333430160378, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.8080053329467773}
10
+ {"task": "g1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 9, "ep_steps": 1000, "ep_return": 766.1107264076938, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.807042121887207}
11
+ {"_summary": true, "n_episodes": 10, "success_rate": 0.7, "mean_return": 710.5166227065728, "mean_steps": 1000.0, "mean_subtasks": 0.0, "mean_ttf_success_s": null, "timeout_rate": 1.0, "task": "g1-walk-v0", "driver": "drq", "seeds": [0], "eval_per_seed": 10, "total_wall_s": 8.86957573890686}
eval/h1-walk-v0.jsonl ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 0, "ep_steps": 1000, "ep_return": 831.7330195042176, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.4069836139678955}
2
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 1, "ep_steps": 1000, "ep_return": 830.7659604815722, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.39011359214782715}
3
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 2, "ep_steps": 1000, "ep_return": 831.6017931113811, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.3844790458679199}
4
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 3, "ep_steps": 1000, "ep_return": 830.9254211628348, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.38822317123413086}
5
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 4, "ep_steps": 1000, "ep_return": 831.2334128876776, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.38614797592163086}
6
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 5, "ep_steps": 753, "ep_return": 530.3864998710653, "success": false, "subtasks_max": 0, "time_to_success_s": null, "timed_out": false, "wall_time_s": 0.3132052421569824}
7
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 6, "ep_steps": 1000, "ep_return": 830.451588613047, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.3938441276550293}
8
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 7, "ep_steps": 1000, "ep_return": 830.9401017369254, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.3912630081176758}
9
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 8, "ep_steps": 1000, "ep_return": 830.5614255749334, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.4045600891113281}
10
+ {"task": "h1-walk-v0", "driver": "drq", "seed": 0, "ep_idx": 9, "ep_steps": 1000, "ep_return": 831.8926512136476, "success": true, "subtasks_max": 0, "time_to_success_s": null, "timed_out": true, "wall_time_s": 0.38388848304748535}
11
+ {"_summary": true, "n_episodes": 10, "success_rate": 0.9, "mean_return": 801.0491874157302, "mean_steps": 975.3, "mean_subtasks": 0.0, "mean_ttf_success_s": null, "timeout_rate": 0.9, "task": "h1-walk-v0", "driver": "drq", "seeds": [0], "eval_per_seed": 10, "total_wall_s": 4.451604843139648}