File size: 29,877 Bytes
01f02fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
{
  "step": 4500,
  "metrics": {
    "eval_rew_align/success_auprc_racer_val": 0.5972598636691593,
    "eval_rew_align/positive_success_acc_racer_val": 0.5238095238095238,
    "eval_rew_align/negative_success_acc_racer_val": 0.9725363489499192,
    "eval_rew_align/loss_racer_val": 1.5039077520370483,
    "eval_rew_align/pearson_racer_val": 0.8166853465988891,
    "eval_rew_align/success_auprc_oxe_bc_z_eval": 0.054588487359398905,
    "eval_rew_align/positive_success_acc_oxe_bc_z_eval": 0.7,
    "eval_rew_align/negative_success_acc_oxe_bc_z_eval": 0.9432314410480349,
    "eval_rew_align/loss_oxe_bc_z_eval": 1.7649718403816224,
    "eval_rew_align/pearson_oxe_bc_z_eval": 0.5611694184881661,
    "eval_rew_align/success_auprc_oxe_berkeley_cable_eval": 0.12320737550700828,
    "eval_rew_align/positive_success_acc_oxe_berkeley_cable_eval": 0.7,
    "eval_rew_align/negative_success_acc_oxe_berkeley_cable_eval": 0.9396299902629016,
    "eval_rew_align/loss_oxe_berkeley_cable_eval": 1.6676030993461608,
    "eval_rew_align/pearson_oxe_berkeley_cable_eval": 0.7626281468321523,
    "eval_rew_align/success_auprc_oxe_bridge_v2_eval": 0.2226129586383097,
    "eval_rew_align/positive_success_acc_oxe_bridge_v2_eval": 0.7,
    "eval_rew_align/negative_success_acc_oxe_bridge_v2_eval": 0.9700440528634361,
    "eval_rew_align/loss_oxe_bridge_v2_eval": 1.5779191851615906,
    "eval_rew_align/pearson_oxe_bridge_v2_eval": 0.8196023502220793,
    "eval_rew_align/success_auprc_oxe_jaco_eval": 0.05703350629550197,
    "eval_rew_align/positive_success_acc_oxe_jaco_eval": 0.8,
    "eval_rew_align/negative_success_acc_oxe_jaco_eval": 0.9796816087138668,
    "eval_rew_align/loss_oxe_jaco_eval": 1.701886808872223,
    "eval_rew_align/pearson_oxe_jaco_eval": 0.7369627561402344,
    "eval_rew_align/success_auprc_oxe_toto_eval": 0.10819046102805713,
    "eval_rew_align/positive_success_acc_oxe_toto_eval": 1.0,
    "eval_rew_align/negative_success_acc_oxe_toto_eval": 0.9452054794520548,
    "eval_rew_align/loss_oxe_toto_eval": 1.5248035669326783,
    "eval_rew_align/pearson_oxe_toto_eval": 0.9275399402861348,
    "eval_rew_align/success_auprc_oxe_viola_eval": 0.3924038961069135,
    "eval_rew_align/positive_success_acc_oxe_viola_eval": 1.0,
    "eval_rew_align/negative_success_acc_oxe_viola_eval": 0.9430528375733855,
    "eval_rew_align/loss_oxe_viola_eval": 1.5757618188858031,
    "eval_rew_align/pearson_oxe_viola_eval": 0.8978344352364431,
    "eval_rew_align/success_auprc_mw_eval": 0.14365004363589842,
    "eval_rew_align/positive_success_acc_mw_eval": 0.8,
    "eval_rew_align/negative_success_acc_mw_eval": 0.9627450980392157,
    "eval_rew_align/loss_mw_eval": 1.7702434301376342,
    "eval_rew_align/pearson_mw_eval": 0.7687541228936258,
    "eval_rew_align/success_auprc_libero_90": 0.1795092166845774,
    "eval_rew_align/positive_success_acc_libero_90": 0.9,
    "eval_rew_align/negative_success_acc_libero_90": 0.9682352941176471,
    "eval_rew_align/loss_libero_90": 1.5339298248291016,
    "eval_rew_align/pearson_libero_90": 0.8980980150621931,
    "eval_rew_align/success_auprc_usc_trossen": 0.2819898652527857,
    "eval_rew_align/positive_success_acc_usc_trossen": 0.5,
    "eval_rew_align/negative_success_acc_usc_trossen": 0.98,
    "eval_rew_align/loss_usc_trossen": 1.5562334299087524,
    "eval_rew_align/pearson_usc_trossen": 0.7085253582776633,
    "eval_p_rank/kendall_last_usc_trossen": 0.8333333333333333,
    "eval_p_rank/kendall_rewind_last_usc_trossen": 1.0,
    "eval_p_rank/avg_succ_subopt_diff_last_usc_trossen": 0.14124762515227,
    "eval_p_rank/min_succ_subopt_diff_last_usc_trossen": 0.040902674198150635,
    "eval_p_rank/max_succ_subopt_diff_last_usc_trossen": 0.2803109735250473,
    "eval_p_rank/avg_subopt_fail_diff_last_usc_trossen": 0.19397936016321182,
    "eval_p_rank/min_subopt_fail_diff_last_usc_trossen": 0.026902765035629272,
    "eval_p_rank/max_subopt_fail_diff_last_usc_trossen": 0.3610559552907944,
    "eval_p_rank/avg_succ_fail_diff_last_usc_trossen": 0.28165244973368114,
    "eval_p_rank/min_succ_fail_diff_last_usc_trossen": 0.06780543923377991,
    "eval_p_rank/max_succ_fail_diff_last_usc_trossen": 0.46358518302440643,
    "eval_p_rank/ranking_acc_last_usc_trossen": 0.8809523809523809,
    "eval_p_rank/ranking_acc_all_pairs_last_usc_trossen": 0.8809523809523809,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_trossen": 0.9375,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_trossen": 0.75,
    "eval_p_rank/ranking_acc_failure_vs_successful_last_usc_trossen": 0.8888888888888888,
    "eval_p_rank/kendall_avg_usc_trossen": 0.8333333333333333,
    "eval_p_rank/kendall_rewind_avg_usc_trossen": 1.0,
    "eval_p_rank/avg_succ_subopt_diff_avg_usc_trossen": 0.14124762515227,
    "eval_p_rank/min_succ_subopt_diff_avg_usc_trossen": 0.040902674198150635,
    "eval_p_rank/max_succ_subopt_diff_avg_usc_trossen": 0.2803109735250473,
    "eval_p_rank/avg_subopt_fail_diff_avg_usc_trossen": 0.19397936016321182,
    "eval_p_rank/min_subopt_fail_diff_avg_usc_trossen": 0.026902765035629272,
    "eval_p_rank/max_subopt_fail_diff_avg_usc_trossen": 0.3610559552907944,
    "eval_p_rank/avg_succ_fail_diff_avg_usc_trossen": 0.28165244973368114,
    "eval_p_rank/min_succ_fail_diff_avg_usc_trossen": 0.06780543923377991,
    "eval_p_rank/max_succ_fail_diff_avg_usc_trossen": 0.46358518302440643,
    "eval_p_rank/ranking_acc_avg_usc_trossen": 0.8809523809523809,
    "eval_p_rank/ranking_acc_all_pairs_avg_usc_trossen": 0.8809523809523809,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_trossen": 0.9375,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_trossen": 0.75,
    "eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_trossen": 0.8888888888888888,
    "eval_p_rank/kendall_sum_usc_trossen": 0.8333333333333333,
    "eval_p_rank/kendall_rewind_sum_usc_trossen": 1.0,
    "eval_p_rank/avg_succ_subopt_diff_sum_usc_trossen": 0.14124762515227,
    "eval_p_rank/min_succ_subopt_diff_sum_usc_trossen": 0.040902674198150635,
    "eval_p_rank/max_succ_subopt_diff_sum_usc_trossen": 0.2803109735250473,
    "eval_p_rank/avg_subopt_fail_diff_sum_usc_trossen": 0.19397936016321182,
    "eval_p_rank/min_subopt_fail_diff_sum_usc_trossen": 0.026902765035629272,
    "eval_p_rank/max_subopt_fail_diff_sum_usc_trossen": 0.3610559552907944,
    "eval_p_rank/avg_succ_fail_diff_sum_usc_trossen": 0.28165244973368114,
    "eval_p_rank/min_succ_fail_diff_sum_usc_trossen": 0.06780543923377991,
    "eval_p_rank/max_succ_fail_diff_sum_usc_trossen": 0.46358518302440643,
    "eval_p_rank/ranking_acc_sum_usc_trossen": 0.8809523809523809,
    "eval_p_rank/ranking_acc_all_pairs_sum_usc_trossen": 0.8809523809523809,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_trossen": 0.9375,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_trossen": 0.75,
    "eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_trossen": 0.8888888888888888,
    "eval_rew_align/success_auprc_rfm_new_mit_franka_nowrist": 0.13878492377635082,
    "eval_rew_align/positive_success_acc_rfm_new_mit_franka_nowrist": 0.9,
    "eval_rew_align/negative_success_acc_rfm_new_mit_franka_nowrist": 0.9635294117647059,
    "eval_rew_align/loss_rfm_new_mit_franka_nowrist": 1.3595333456993104,
    "eval_rew_align/pearson_rfm_new_mit_franka_nowrist": 0.9332205211882452,
    "eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist": 0.46904761904761905,
    "eval_p_rank/kendall_rewind_last_rfm_new_mit_franka_nowrist": 0.8095238095238095,
    "eval_p_rank/avg_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.1011232117811839,
    "eval_p_rank/min_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.022794996698697445,
    "eval_p_rank/max_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.21488183736801147,
    "eval_p_rank/avg_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.14513030257962997,
    "eval_p_rank/min_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": -0.14316336512565614,
    "eval_p_rank/max_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.34648392796516414,
    "eval_p_rank/avg_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.24625351436081383,
    "eval_p_rank/min_succ_fail_diff_last_rfm_new_mit_franka_nowrist": -0.00810291568438215,
    "eval_p_rank/max_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.49043338249127066,
    "eval_p_rank/ranking_acc_last_rfm_new_mit_franka_nowrist": 0.7598684210526315,
    "eval_p_rank/ranking_acc_all_pairs_last_rfm_new_mit_franka_nowrist": 0.7598684210526315,
    "eval_p_rank/ranking_acc_failure_vs_successful_last_rfm_new_mit_franka_nowrist": 0.8482142857142857,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_rfm_new_mit_franka_nowrist": 0.7523809523809524,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_rfm_new_mit_franka_nowrist": 0.6551724137931034,
    "eval_p_rank/kendall_avg_rfm_new_mit_franka_nowrist": 0.46904761904761905,
    "eval_p_rank/kendall_rewind_avg_rfm_new_mit_franka_nowrist": 0.8095238095238095,
    "eval_p_rank/avg_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.1011232117811839,
    "eval_p_rank/min_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.022794996698697445,
    "eval_p_rank/max_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.21488183736801147,
    "eval_p_rank/avg_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.14513030257962997,
    "eval_p_rank/min_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.14316336512565614,
    "eval_p_rank/max_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.34648392796516414,
    "eval_p_rank/avg_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.24625351436081383,
    "eval_p_rank/min_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.00810291568438215,
    "eval_p_rank/max_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.49043338249127066,
    "eval_p_rank/ranking_acc_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315,
    "eval_p_rank/ranking_acc_all_pairs_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315,
    "eval_p_rank/ranking_acc_failure_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.8482142857142857,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_rfm_new_mit_franka_nowrist": 0.7523809523809524,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.6551724137931034,
    "eval_p_rank/kendall_sum_rfm_new_mit_franka_nowrist": 0.46904761904761905,
    "eval_p_rank/kendall_rewind_sum_rfm_new_mit_franka_nowrist": 0.8095238095238095,
    "eval_p_rank/avg_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.1011232117811839,
    "eval_p_rank/min_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.022794996698697445,
    "eval_p_rank/max_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.21488183736801147,
    "eval_p_rank/avg_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.14513030257962997,
    "eval_p_rank/min_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.14316336512565614,
    "eval_p_rank/max_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.34648392796516414,
    "eval_p_rank/avg_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.24625351436081383,
    "eval_p_rank/min_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.00810291568438215,
    "eval_p_rank/max_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.49043338249127066,
    "eval_p_rank/ranking_acc_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315,
    "eval_p_rank/ranking_acc_all_pairs_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315,
    "eval_p_rank/ranking_acc_failure_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.8482142857142857,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_rfm_new_mit_franka_nowrist": 0.7523809523809524,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.6551724137931034,
    "eval_rew_align/success_auprc_utd_so101_clean_top": 0.1594673014952464,
    "eval_rew_align/positive_success_acc_utd_so101_clean_top": 0.8,
    "eval_rew_align/negative_success_acc_utd_so101_clean_top": 0.9796078431372549,
    "eval_rew_align/loss_utd_so101_clean_top": 1.422999668121338,
    "eval_rew_align/pearson_utd_so101_clean_top": 0.9214771733077172,
    "eval_p_rank/kendall_last_utd_so101_clean_top": 0.7333333333333333,
    "eval_p_rank/kendall_rewind_last_utd_so101_clean_top": 0.7333333333333333,
    "eval_p_rank/avg_succ_subopt_diff_last_utd_so101_clean_top": 0.1281689941883087,
    "eval_p_rank/min_succ_subopt_diff_last_utd_so101_clean_top": -0.6224770694971085,
    "eval_p_rank/max_succ_subopt_diff_last_utd_so101_clean_top": 0.4432547390460968,
    "eval_p_rank/avg_subopt_fail_diff_last_utd_so101_clean_top": 0.2357720375061035,
    "eval_p_rank/min_subopt_fail_diff_last_utd_so101_clean_top": -0.012576103210449219,
    "eval_p_rank/max_subopt_fail_diff_last_utd_so101_clean_top": 0.5894219428300858,
    "eval_p_rank/avg_succ_fail_diff_last_utd_so101_clean_top": 0.3639410316944122,
    "eval_p_rank/min_succ_fail_diff_last_utd_so101_clean_top": -0.033055126667022705,
    "eval_p_rank/max_succ_fail_diff_last_utd_so101_clean_top": 0.6650743782520294,
    "eval_p_rank/ranking_acc_last_utd_so101_clean_top": 0.8666666666666667,
    "eval_p_rank/ranking_acc_all_pairs_last_utd_so101_clean_top": 0.8666666666666667,
    "eval_p_rank/ranking_acc_failure_vs_successful_last_utd_so101_clean_top": 0.9,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_utd_so101_clean_top": 0.8,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_utd_so101_clean_top": 0.9,
    "eval_p_rank/kendall_avg_utd_so101_clean_top": 0.7333333333333333,
    "eval_p_rank/kendall_rewind_avg_utd_so101_clean_top": 0.7333333333333333,
    "eval_p_rank/avg_succ_subopt_diff_avg_utd_so101_clean_top": 0.1281689941883087,
    "eval_p_rank/min_succ_subopt_diff_avg_utd_so101_clean_top": -0.6224770694971085,
    "eval_p_rank/max_succ_subopt_diff_avg_utd_so101_clean_top": 0.4432547390460968,
    "eval_p_rank/avg_subopt_fail_diff_avg_utd_so101_clean_top": 0.2357720375061035,
    "eval_p_rank/min_subopt_fail_diff_avg_utd_so101_clean_top": -0.012576103210449219,
    "eval_p_rank/max_subopt_fail_diff_avg_utd_so101_clean_top": 0.5894219428300858,
    "eval_p_rank/avg_succ_fail_diff_avg_utd_so101_clean_top": 0.3639410316944122,
    "eval_p_rank/min_succ_fail_diff_avg_utd_so101_clean_top": -0.033055126667022705,
    "eval_p_rank/max_succ_fail_diff_avg_utd_so101_clean_top": 0.6650743782520294,
    "eval_p_rank/ranking_acc_avg_utd_so101_clean_top": 0.8666666666666667,
    "eval_p_rank/ranking_acc_all_pairs_avg_utd_so101_clean_top": 0.8666666666666667,
    "eval_p_rank/ranking_acc_failure_vs_successful_avg_utd_so101_clean_top": 0.9,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_utd_so101_clean_top": 0.8,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_utd_so101_clean_top": 0.9,
    "eval_p_rank/kendall_sum_utd_so101_clean_top": 0.7333333333333333,
    "eval_p_rank/kendall_rewind_sum_utd_so101_clean_top": 0.7333333333333333,
    "eval_p_rank/avg_succ_subopt_diff_sum_utd_so101_clean_top": 0.1281689941883087,
    "eval_p_rank/min_succ_subopt_diff_sum_utd_so101_clean_top": -0.6224770694971085,
    "eval_p_rank/max_succ_subopt_diff_sum_utd_so101_clean_top": 0.4432547390460968,
    "eval_p_rank/avg_subopt_fail_diff_sum_utd_so101_clean_top": 0.2357720375061035,
    "eval_p_rank/min_subopt_fail_diff_sum_utd_so101_clean_top": -0.012576103210449219,
    "eval_p_rank/max_subopt_fail_diff_sum_utd_so101_clean_top": 0.5894219428300858,
    "eval_p_rank/avg_succ_fail_diff_sum_utd_so101_clean_top": 0.3639410316944122,
    "eval_p_rank/min_succ_fail_diff_sum_utd_so101_clean_top": -0.033055126667022705,
    "eval_p_rank/max_succ_fail_diff_sum_utd_so101_clean_top": 0.6650743782520294,
    "eval_p_rank/ranking_acc_sum_utd_so101_clean_top": 0.8666666666666667,
    "eval_p_rank/ranking_acc_all_pairs_sum_utd_so101_clean_top": 0.8666666666666667,
    "eval_p_rank/ranking_acc_failure_vs_successful_sum_utd_so101_clean_top": 0.9,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_utd_so101_clean_top": 0.8,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_utd_so101_clean_top": 0.9,
    "eval_rew_align/success_auprc_usc_xarm": 0.3298253598253598,
    "eval_rew_align/positive_success_acc_usc_xarm": 1.0,
    "eval_rew_align/negative_success_acc_usc_xarm": 0.971764705882353,
    "eval_rew_align/loss_usc_xarm": 1.459894859790802,
    "eval_rew_align/pearson_usc_xarm": 0.9290145264370201,
    "eval_p_rank/kendall_last_usc_xarm": 0.75,
    "eval_p_rank/kendall_rewind_last_usc_xarm": 0.8888888888888888,
    "eval_p_rank/avg_succ_subopt_diff_last_usc_xarm": 0.11288829644521077,
    "eval_p_rank/min_succ_subopt_diff_last_usc_xarm": 0.0029833614826202393,
    "eval_p_rank/max_succ_subopt_diff_last_usc_xarm": 0.217641681432724,
    "eval_p_rank/avg_subopt_fail_diff_last_usc_xarm": 0.11646403868993123,
    "eval_p_rank/min_subopt_fail_diff_last_usc_xarm": -0.03846535086631775,
    "eval_p_rank/max_subopt_fail_diff_last_usc_xarm": 0.34273654222488403,
    "eval_p_rank/avg_succ_fail_diff_last_usc_xarm": 0.229352335135142,
    "eval_p_rank/min_succ_fail_diff_last_usc_xarm": 0.0956188440322876,
    "eval_p_rank/max_succ_fail_diff_last_usc_xarm": 0.35525771975517273,
    "eval_p_rank/ranking_acc_last_usc_xarm": 0.875,
    "eval_p_rank/ranking_acc_all_pairs_last_usc_xarm": 0.875,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_xarm": 0.7083333333333334,
    "eval_p_rank/ranking_acc_failure_vs_successful_last_usc_xarm": 1.0,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_xarm": 0.9166666666666666,
    "eval_p_rank/kendall_avg_usc_xarm": 0.75,
    "eval_p_rank/kendall_rewind_avg_usc_xarm": 0.8888888888888888,
    "eval_p_rank/avg_succ_subopt_diff_avg_usc_xarm": 0.11288829644521077,
    "eval_p_rank/min_succ_subopt_diff_avg_usc_xarm": 0.0029833614826202393,
    "eval_p_rank/max_succ_subopt_diff_avg_usc_xarm": 0.217641681432724,
    "eval_p_rank/avg_subopt_fail_diff_avg_usc_xarm": 0.11646403868993123,
    "eval_p_rank/min_subopt_fail_diff_avg_usc_xarm": -0.03846535086631775,
    "eval_p_rank/max_subopt_fail_diff_avg_usc_xarm": 0.34273654222488403,
    "eval_p_rank/avg_succ_fail_diff_avg_usc_xarm": 0.229352335135142,
    "eval_p_rank/min_succ_fail_diff_avg_usc_xarm": 0.0956188440322876,
    "eval_p_rank/max_succ_fail_diff_avg_usc_xarm": 0.35525771975517273,
    "eval_p_rank/ranking_acc_avg_usc_xarm": 0.875,
    "eval_p_rank/ranking_acc_all_pairs_avg_usc_xarm": 0.875,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_xarm": 0.7083333333333334,
    "eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_xarm": 1.0,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_xarm": 0.9166666666666666,
    "eval_p_rank/kendall_sum_usc_xarm": 0.75,
    "eval_p_rank/kendall_rewind_sum_usc_xarm": 0.8888888888888888,
    "eval_p_rank/avg_succ_subopt_diff_sum_usc_xarm": 0.11288829644521077,
    "eval_p_rank/min_succ_subopt_diff_sum_usc_xarm": 0.0029833614826202393,
    "eval_p_rank/max_succ_subopt_diff_sum_usc_xarm": 0.217641681432724,
    "eval_p_rank/avg_subopt_fail_diff_sum_usc_xarm": 0.11646403868993123,
    "eval_p_rank/min_subopt_fail_diff_sum_usc_xarm": -0.03846535086631775,
    "eval_p_rank/max_subopt_fail_diff_sum_usc_xarm": 0.34273654222488403,
    "eval_p_rank/avg_succ_fail_diff_sum_usc_xarm": 0.229352335135142,
    "eval_p_rank/min_succ_fail_diff_sum_usc_xarm": 0.0956188440322876,
    "eval_p_rank/max_succ_fail_diff_sum_usc_xarm": 0.35525771975517273,
    "eval_p_rank/ranking_acc_sum_usc_xarm": 0.875,
    "eval_p_rank/ranking_acc_all_pairs_sum_usc_xarm": 0.875,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_xarm": 0.7083333333333334,
    "eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_xarm": 1.0,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_xarm": 0.9166666666666666,
    "eval_rew_align/success_auprc_usc_franka": 0.178648534454372,
    "eval_rew_align/positive_success_acc_usc_franka": 0.75,
    "eval_rew_align/negative_success_acc_usc_franka": 0.9436274509803921,
    "eval_rew_align/loss_usc_franka": 1.4907499551773071,
    "eval_rew_align/pearson_usc_franka": 0.9115594502071923,
    "eval_p_rank/kendall_last_usc_franka": 0.7916666666666666,
    "eval_p_rank/kendall_rewind_last_usc_franka": 0.8333333333333334,
    "eval_p_rank/avg_succ_subopt_diff_last_usc_franka": 0.06161930412054062,
    "eval_p_rank/min_succ_subopt_diff_last_usc_franka": -0.010589927434921265,
    "eval_p_rank/max_succ_subopt_diff_last_usc_franka": 0.17146822810173035,
    "eval_p_rank/avg_subopt_fail_diff_last_usc_franka": 0.18651490285992622,
    "eval_p_rank/min_subopt_fail_diff_last_usc_franka": 0.0136566162109375,
    "eval_p_rank/max_subopt_fail_diff_last_usc_franka": 0.3522116541862488,
    "eval_p_rank/avg_succ_fail_diff_last_usc_franka": 0.24813420698046684,
    "eval_p_rank/min_succ_fail_diff_last_usc_franka": 0.032290756702423096,
    "eval_p_rank/max_succ_fail_diff_last_usc_franka": 0.4191764295101166,
    "eval_p_rank/ranking_acc_last_usc_franka": 0.8958333333333334,
    "eval_p_rank/ranking_acc_all_pairs_last_usc_franka": 0.8958333333333334,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_franka": 0.8125,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_franka": 0.9375,
    "eval_p_rank/ranking_acc_failure_vs_successful_last_usc_franka": 0.9375,
    "eval_p_rank/kendall_avg_usc_franka": 0.7916666666666666,
    "eval_p_rank/kendall_rewind_avg_usc_franka": 0.8333333333333334,
    "eval_p_rank/avg_succ_subopt_diff_avg_usc_franka": 0.06161930412054062,
    "eval_p_rank/min_succ_subopt_diff_avg_usc_franka": -0.010589927434921265,
    "eval_p_rank/max_succ_subopt_diff_avg_usc_franka": 0.17146822810173035,
    "eval_p_rank/avg_subopt_fail_diff_avg_usc_franka": 0.18651490285992622,
    "eval_p_rank/min_subopt_fail_diff_avg_usc_franka": 0.0136566162109375,
    "eval_p_rank/max_subopt_fail_diff_avg_usc_franka": 0.3522116541862488,
    "eval_p_rank/avg_succ_fail_diff_avg_usc_franka": 0.24813420698046684,
    "eval_p_rank/min_succ_fail_diff_avg_usc_franka": 0.032290756702423096,
    "eval_p_rank/max_succ_fail_diff_avg_usc_franka": 0.4191764295101166,
    "eval_p_rank/ranking_acc_avg_usc_franka": 0.8958333333333334,
    "eval_p_rank/ranking_acc_all_pairs_avg_usc_franka": 0.8958333333333334,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_franka": 0.8125,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_franka": 0.9375,
    "eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_franka": 0.9375,
    "eval_p_rank/kendall_sum_usc_franka": 0.7916666666666666,
    "eval_p_rank/kendall_rewind_sum_usc_franka": 0.8333333333333334,
    "eval_p_rank/avg_succ_subopt_diff_sum_usc_franka": 0.06161930412054062,
    "eval_p_rank/min_succ_subopt_diff_sum_usc_franka": -0.010589927434921265,
    "eval_p_rank/max_succ_subopt_diff_sum_usc_franka": 0.17146822810173035,
    "eval_p_rank/avg_subopt_fail_diff_sum_usc_franka": 0.18651490285992622,
    "eval_p_rank/min_subopt_fail_diff_sum_usc_franka": 0.0136566162109375,
    "eval_p_rank/max_subopt_fail_diff_sum_usc_franka": 0.3522116541862488,
    "eval_p_rank/avg_succ_fail_diff_sum_usc_franka": 0.24813420698046684,
    "eval_p_rank/min_succ_fail_diff_sum_usc_franka": 0.032290756702423096,
    "eval_p_rank/max_succ_fail_diff_sum_usc_franka": 0.4191764295101166,
    "eval_p_rank/ranking_acc_sum_usc_franka": 0.8958333333333334,
    "eval_p_rank/ranking_acc_all_pairs_sum_usc_franka": 0.8958333333333334,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_franka": 0.8125,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_franka": 0.9375,
    "eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_franka": 0.9375,
    "eval_rew_align/success_auprc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.08961776352588778,
    "eval_rew_align/positive_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9,
    "eval_rew_align/negative_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9552941176470588,
    "eval_rew_align/loss_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 1.5833105087280273,
    "eval_rew_align/pearson_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.8841339237987327,
    "eval_p_rank/kendall_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
    "eval_p_rank/kendall_rewind_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
    "eval_p_rank/avg_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
    "eval_p_rank/min_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
    "eval_p_rank/max_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
    "eval_p_rank/avg_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
    "eval_p_rank/min_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
    "eval_p_rank/max_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
    "eval_p_rank/avg_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
    "eval_p_rank/min_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
    "eval_p_rank/max_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
    "eval_p_rank/ranking_acc_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
    "eval_p_rank/ranking_acc_all_pairs_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
    "eval_p_rank/ranking_acc_failure_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
    "eval_p_rank/kendall_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
    "eval_p_rank/kendall_rewind_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
    "eval_p_rank/avg_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
    "eval_p_rank/min_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
    "eval_p_rank/max_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
    "eval_p_rank/avg_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
    "eval_p_rank/min_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
    "eval_p_rank/max_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
    "eval_p_rank/avg_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
    "eval_p_rank/min_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
    "eval_p_rank/max_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
    "eval_p_rank/ranking_acc_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
    "eval_p_rank/ranking_acc_all_pairs_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
    "eval_p_rank/ranking_acc_failure_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
    "eval_p_rank/kendall_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
    "eval_p_rank/kendall_rewind_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
    "eval_p_rank/avg_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
    "eval_p_rank/min_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
    "eval_p_rank/max_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
    "eval_p_rank/avg_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
    "eval_p_rank/min_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
    "eval_p_rank/max_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
    "eval_p_rank/avg_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
    "eval_p_rank/min_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
    "eval_p_rank/max_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
    "eval_p_rank/ranking_acc_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
    "eval_p_rank/ranking_acc_all_pairs_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
    "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
    "eval_p_rank/ranking_acc_failure_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
    "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
    "time/custom_evaluations": 227.5345072869677
  }
}