File size: 29,877 Bytes
01f02fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 |
{
"step": 4500,
"metrics": {
"eval_rew_align/success_auprc_racer_val": 0.5972598636691593,
"eval_rew_align/positive_success_acc_racer_val": 0.5238095238095238,
"eval_rew_align/negative_success_acc_racer_val": 0.9725363489499192,
"eval_rew_align/loss_racer_val": 1.5039077520370483,
"eval_rew_align/pearson_racer_val": 0.8166853465988891,
"eval_rew_align/success_auprc_oxe_bc_z_eval": 0.054588487359398905,
"eval_rew_align/positive_success_acc_oxe_bc_z_eval": 0.7,
"eval_rew_align/negative_success_acc_oxe_bc_z_eval": 0.9432314410480349,
"eval_rew_align/loss_oxe_bc_z_eval": 1.7649718403816224,
"eval_rew_align/pearson_oxe_bc_z_eval": 0.5611694184881661,
"eval_rew_align/success_auprc_oxe_berkeley_cable_eval": 0.12320737550700828,
"eval_rew_align/positive_success_acc_oxe_berkeley_cable_eval": 0.7,
"eval_rew_align/negative_success_acc_oxe_berkeley_cable_eval": 0.9396299902629016,
"eval_rew_align/loss_oxe_berkeley_cable_eval": 1.6676030993461608,
"eval_rew_align/pearson_oxe_berkeley_cable_eval": 0.7626281468321523,
"eval_rew_align/success_auprc_oxe_bridge_v2_eval": 0.2226129586383097,
"eval_rew_align/positive_success_acc_oxe_bridge_v2_eval": 0.7,
"eval_rew_align/negative_success_acc_oxe_bridge_v2_eval": 0.9700440528634361,
"eval_rew_align/loss_oxe_bridge_v2_eval": 1.5779191851615906,
"eval_rew_align/pearson_oxe_bridge_v2_eval": 0.8196023502220793,
"eval_rew_align/success_auprc_oxe_jaco_eval": 0.05703350629550197,
"eval_rew_align/positive_success_acc_oxe_jaco_eval": 0.8,
"eval_rew_align/negative_success_acc_oxe_jaco_eval": 0.9796816087138668,
"eval_rew_align/loss_oxe_jaco_eval": 1.701886808872223,
"eval_rew_align/pearson_oxe_jaco_eval": 0.7369627561402344,
"eval_rew_align/success_auprc_oxe_toto_eval": 0.10819046102805713,
"eval_rew_align/positive_success_acc_oxe_toto_eval": 1.0,
"eval_rew_align/negative_success_acc_oxe_toto_eval": 0.9452054794520548,
"eval_rew_align/loss_oxe_toto_eval": 1.5248035669326783,
"eval_rew_align/pearson_oxe_toto_eval": 0.9275399402861348,
"eval_rew_align/success_auprc_oxe_viola_eval": 0.3924038961069135,
"eval_rew_align/positive_success_acc_oxe_viola_eval": 1.0,
"eval_rew_align/negative_success_acc_oxe_viola_eval": 0.9430528375733855,
"eval_rew_align/loss_oxe_viola_eval": 1.5757618188858031,
"eval_rew_align/pearson_oxe_viola_eval": 0.8978344352364431,
"eval_rew_align/success_auprc_mw_eval": 0.14365004363589842,
"eval_rew_align/positive_success_acc_mw_eval": 0.8,
"eval_rew_align/negative_success_acc_mw_eval": 0.9627450980392157,
"eval_rew_align/loss_mw_eval": 1.7702434301376342,
"eval_rew_align/pearson_mw_eval": 0.7687541228936258,
"eval_rew_align/success_auprc_libero_90": 0.1795092166845774,
"eval_rew_align/positive_success_acc_libero_90": 0.9,
"eval_rew_align/negative_success_acc_libero_90": 0.9682352941176471,
"eval_rew_align/loss_libero_90": 1.5339298248291016,
"eval_rew_align/pearson_libero_90": 0.8980980150621931,
"eval_rew_align/success_auprc_usc_trossen": 0.2819898652527857,
"eval_rew_align/positive_success_acc_usc_trossen": 0.5,
"eval_rew_align/negative_success_acc_usc_trossen": 0.98,
"eval_rew_align/loss_usc_trossen": 1.5562334299087524,
"eval_rew_align/pearson_usc_trossen": 0.7085253582776633,
"eval_p_rank/kendall_last_usc_trossen": 0.8333333333333333,
"eval_p_rank/kendall_rewind_last_usc_trossen": 1.0,
"eval_p_rank/avg_succ_subopt_diff_last_usc_trossen": 0.14124762515227,
"eval_p_rank/min_succ_subopt_diff_last_usc_trossen": 0.040902674198150635,
"eval_p_rank/max_succ_subopt_diff_last_usc_trossen": 0.2803109735250473,
"eval_p_rank/avg_subopt_fail_diff_last_usc_trossen": 0.19397936016321182,
"eval_p_rank/min_subopt_fail_diff_last_usc_trossen": 0.026902765035629272,
"eval_p_rank/max_subopt_fail_diff_last_usc_trossen": 0.3610559552907944,
"eval_p_rank/avg_succ_fail_diff_last_usc_trossen": 0.28165244973368114,
"eval_p_rank/min_succ_fail_diff_last_usc_trossen": 0.06780543923377991,
"eval_p_rank/max_succ_fail_diff_last_usc_trossen": 0.46358518302440643,
"eval_p_rank/ranking_acc_last_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_all_pairs_last_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_trossen": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_trossen": 0.75,
"eval_p_rank/ranking_acc_failure_vs_successful_last_usc_trossen": 0.8888888888888888,
"eval_p_rank/kendall_avg_usc_trossen": 0.8333333333333333,
"eval_p_rank/kendall_rewind_avg_usc_trossen": 1.0,
"eval_p_rank/avg_succ_subopt_diff_avg_usc_trossen": 0.14124762515227,
"eval_p_rank/min_succ_subopt_diff_avg_usc_trossen": 0.040902674198150635,
"eval_p_rank/max_succ_subopt_diff_avg_usc_trossen": 0.2803109735250473,
"eval_p_rank/avg_subopt_fail_diff_avg_usc_trossen": 0.19397936016321182,
"eval_p_rank/min_subopt_fail_diff_avg_usc_trossen": 0.026902765035629272,
"eval_p_rank/max_subopt_fail_diff_avg_usc_trossen": 0.3610559552907944,
"eval_p_rank/avg_succ_fail_diff_avg_usc_trossen": 0.28165244973368114,
"eval_p_rank/min_succ_fail_diff_avg_usc_trossen": 0.06780543923377991,
"eval_p_rank/max_succ_fail_diff_avg_usc_trossen": 0.46358518302440643,
"eval_p_rank/ranking_acc_avg_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_all_pairs_avg_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_trossen": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_trossen": 0.75,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_trossen": 0.8888888888888888,
"eval_p_rank/kendall_sum_usc_trossen": 0.8333333333333333,
"eval_p_rank/kendall_rewind_sum_usc_trossen": 1.0,
"eval_p_rank/avg_succ_subopt_diff_sum_usc_trossen": 0.14124762515227,
"eval_p_rank/min_succ_subopt_diff_sum_usc_trossen": 0.040902674198150635,
"eval_p_rank/max_succ_subopt_diff_sum_usc_trossen": 0.2803109735250473,
"eval_p_rank/avg_subopt_fail_diff_sum_usc_trossen": 0.19397936016321182,
"eval_p_rank/min_subopt_fail_diff_sum_usc_trossen": 0.026902765035629272,
"eval_p_rank/max_subopt_fail_diff_sum_usc_trossen": 0.3610559552907944,
"eval_p_rank/avg_succ_fail_diff_sum_usc_trossen": 0.28165244973368114,
"eval_p_rank/min_succ_fail_diff_sum_usc_trossen": 0.06780543923377991,
"eval_p_rank/max_succ_fail_diff_sum_usc_trossen": 0.46358518302440643,
"eval_p_rank/ranking_acc_sum_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_all_pairs_sum_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_trossen": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_trossen": 0.75,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_trossen": 0.8888888888888888,
"eval_rew_align/success_auprc_rfm_new_mit_franka_nowrist": 0.13878492377635082,
"eval_rew_align/positive_success_acc_rfm_new_mit_franka_nowrist": 0.9,
"eval_rew_align/negative_success_acc_rfm_new_mit_franka_nowrist": 0.9635294117647059,
"eval_rew_align/loss_rfm_new_mit_franka_nowrist": 1.3595333456993104,
"eval_rew_align/pearson_rfm_new_mit_franka_nowrist": 0.9332205211882452,
"eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist": 0.46904761904761905,
"eval_p_rank/kendall_rewind_last_rfm_new_mit_franka_nowrist": 0.8095238095238095,
"eval_p_rank/avg_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.1011232117811839,
"eval_p_rank/min_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.022794996698697445,
"eval_p_rank/max_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.21488183736801147,
"eval_p_rank/avg_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.14513030257962997,
"eval_p_rank/min_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": -0.14316336512565614,
"eval_p_rank/max_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.34648392796516414,
"eval_p_rank/avg_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.24625351436081383,
"eval_p_rank/min_succ_fail_diff_last_rfm_new_mit_franka_nowrist": -0.00810291568438215,
"eval_p_rank/max_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.49043338249127066,
"eval_p_rank/ranking_acc_last_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_all_pairs_last_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_failure_vs_successful_last_rfm_new_mit_franka_nowrist": 0.8482142857142857,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_rfm_new_mit_franka_nowrist": 0.7523809523809524,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_rfm_new_mit_franka_nowrist": 0.6551724137931034,
"eval_p_rank/kendall_avg_rfm_new_mit_franka_nowrist": 0.46904761904761905,
"eval_p_rank/kendall_rewind_avg_rfm_new_mit_franka_nowrist": 0.8095238095238095,
"eval_p_rank/avg_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.1011232117811839,
"eval_p_rank/min_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.022794996698697445,
"eval_p_rank/max_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.21488183736801147,
"eval_p_rank/avg_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.14513030257962997,
"eval_p_rank/min_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.14316336512565614,
"eval_p_rank/max_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.34648392796516414,
"eval_p_rank/avg_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.24625351436081383,
"eval_p_rank/min_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.00810291568438215,
"eval_p_rank/max_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.49043338249127066,
"eval_p_rank/ranking_acc_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_all_pairs_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.8482142857142857,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_rfm_new_mit_franka_nowrist": 0.7523809523809524,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.6551724137931034,
"eval_p_rank/kendall_sum_rfm_new_mit_franka_nowrist": 0.46904761904761905,
"eval_p_rank/kendall_rewind_sum_rfm_new_mit_franka_nowrist": 0.8095238095238095,
"eval_p_rank/avg_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.1011232117811839,
"eval_p_rank/min_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.022794996698697445,
"eval_p_rank/max_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.21488183736801147,
"eval_p_rank/avg_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.14513030257962997,
"eval_p_rank/min_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.14316336512565614,
"eval_p_rank/max_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.34648392796516414,
"eval_p_rank/avg_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.24625351436081383,
"eval_p_rank/min_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.00810291568438215,
"eval_p_rank/max_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.49043338249127066,
"eval_p_rank/ranking_acc_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_all_pairs_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.8482142857142857,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_rfm_new_mit_franka_nowrist": 0.7523809523809524,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.6551724137931034,
"eval_rew_align/success_auprc_utd_so101_clean_top": 0.1594673014952464,
"eval_rew_align/positive_success_acc_utd_so101_clean_top": 0.8,
"eval_rew_align/negative_success_acc_utd_so101_clean_top": 0.9796078431372549,
"eval_rew_align/loss_utd_so101_clean_top": 1.422999668121338,
"eval_rew_align/pearson_utd_so101_clean_top": 0.9214771733077172,
"eval_p_rank/kendall_last_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/kendall_rewind_last_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/avg_succ_subopt_diff_last_utd_so101_clean_top": 0.1281689941883087,
"eval_p_rank/min_succ_subopt_diff_last_utd_so101_clean_top": -0.6224770694971085,
"eval_p_rank/max_succ_subopt_diff_last_utd_so101_clean_top": 0.4432547390460968,
"eval_p_rank/avg_subopt_fail_diff_last_utd_so101_clean_top": 0.2357720375061035,
"eval_p_rank/min_subopt_fail_diff_last_utd_so101_clean_top": -0.012576103210449219,
"eval_p_rank/max_subopt_fail_diff_last_utd_so101_clean_top": 0.5894219428300858,
"eval_p_rank/avg_succ_fail_diff_last_utd_so101_clean_top": 0.3639410316944122,
"eval_p_rank/min_succ_fail_diff_last_utd_so101_clean_top": -0.033055126667022705,
"eval_p_rank/max_succ_fail_diff_last_utd_so101_clean_top": 0.6650743782520294,
"eval_p_rank/ranking_acc_last_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_all_pairs_last_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_failure_vs_successful_last_utd_so101_clean_top": 0.9,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_utd_so101_clean_top": 0.8,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_utd_so101_clean_top": 0.9,
"eval_p_rank/kendall_avg_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/kendall_rewind_avg_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/avg_succ_subopt_diff_avg_utd_so101_clean_top": 0.1281689941883087,
"eval_p_rank/min_succ_subopt_diff_avg_utd_so101_clean_top": -0.6224770694971085,
"eval_p_rank/max_succ_subopt_diff_avg_utd_so101_clean_top": 0.4432547390460968,
"eval_p_rank/avg_subopt_fail_diff_avg_utd_so101_clean_top": 0.2357720375061035,
"eval_p_rank/min_subopt_fail_diff_avg_utd_so101_clean_top": -0.012576103210449219,
"eval_p_rank/max_subopt_fail_diff_avg_utd_so101_clean_top": 0.5894219428300858,
"eval_p_rank/avg_succ_fail_diff_avg_utd_so101_clean_top": 0.3639410316944122,
"eval_p_rank/min_succ_fail_diff_avg_utd_so101_clean_top": -0.033055126667022705,
"eval_p_rank/max_succ_fail_diff_avg_utd_so101_clean_top": 0.6650743782520294,
"eval_p_rank/ranking_acc_avg_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_all_pairs_avg_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_utd_so101_clean_top": 0.9,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_utd_so101_clean_top": 0.8,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_utd_so101_clean_top": 0.9,
"eval_p_rank/kendall_sum_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/kendall_rewind_sum_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/avg_succ_subopt_diff_sum_utd_so101_clean_top": 0.1281689941883087,
"eval_p_rank/min_succ_subopt_diff_sum_utd_so101_clean_top": -0.6224770694971085,
"eval_p_rank/max_succ_subopt_diff_sum_utd_so101_clean_top": 0.4432547390460968,
"eval_p_rank/avg_subopt_fail_diff_sum_utd_so101_clean_top": 0.2357720375061035,
"eval_p_rank/min_subopt_fail_diff_sum_utd_so101_clean_top": -0.012576103210449219,
"eval_p_rank/max_subopt_fail_diff_sum_utd_so101_clean_top": 0.5894219428300858,
"eval_p_rank/avg_succ_fail_diff_sum_utd_so101_clean_top": 0.3639410316944122,
"eval_p_rank/min_succ_fail_diff_sum_utd_so101_clean_top": -0.033055126667022705,
"eval_p_rank/max_succ_fail_diff_sum_utd_so101_clean_top": 0.6650743782520294,
"eval_p_rank/ranking_acc_sum_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_all_pairs_sum_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_utd_so101_clean_top": 0.9,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_utd_so101_clean_top": 0.8,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_utd_so101_clean_top": 0.9,
"eval_rew_align/success_auprc_usc_xarm": 0.3298253598253598,
"eval_rew_align/positive_success_acc_usc_xarm": 1.0,
"eval_rew_align/negative_success_acc_usc_xarm": 0.971764705882353,
"eval_rew_align/loss_usc_xarm": 1.459894859790802,
"eval_rew_align/pearson_usc_xarm": 0.9290145264370201,
"eval_p_rank/kendall_last_usc_xarm": 0.75,
"eval_p_rank/kendall_rewind_last_usc_xarm": 0.8888888888888888,
"eval_p_rank/avg_succ_subopt_diff_last_usc_xarm": 0.11288829644521077,
"eval_p_rank/min_succ_subopt_diff_last_usc_xarm": 0.0029833614826202393,
"eval_p_rank/max_succ_subopt_diff_last_usc_xarm": 0.217641681432724,
"eval_p_rank/avg_subopt_fail_diff_last_usc_xarm": 0.11646403868993123,
"eval_p_rank/min_subopt_fail_diff_last_usc_xarm": -0.03846535086631775,
"eval_p_rank/max_subopt_fail_diff_last_usc_xarm": 0.34273654222488403,
"eval_p_rank/avg_succ_fail_diff_last_usc_xarm": 0.229352335135142,
"eval_p_rank/min_succ_fail_diff_last_usc_xarm": 0.0956188440322876,
"eval_p_rank/max_succ_fail_diff_last_usc_xarm": 0.35525771975517273,
"eval_p_rank/ranking_acc_last_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_all_pairs_last_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_xarm": 0.7083333333333334,
"eval_p_rank/ranking_acc_failure_vs_successful_last_usc_xarm": 1.0,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_xarm": 0.9166666666666666,
"eval_p_rank/kendall_avg_usc_xarm": 0.75,
"eval_p_rank/kendall_rewind_avg_usc_xarm": 0.8888888888888888,
"eval_p_rank/avg_succ_subopt_diff_avg_usc_xarm": 0.11288829644521077,
"eval_p_rank/min_succ_subopt_diff_avg_usc_xarm": 0.0029833614826202393,
"eval_p_rank/max_succ_subopt_diff_avg_usc_xarm": 0.217641681432724,
"eval_p_rank/avg_subopt_fail_diff_avg_usc_xarm": 0.11646403868993123,
"eval_p_rank/min_subopt_fail_diff_avg_usc_xarm": -0.03846535086631775,
"eval_p_rank/max_subopt_fail_diff_avg_usc_xarm": 0.34273654222488403,
"eval_p_rank/avg_succ_fail_diff_avg_usc_xarm": 0.229352335135142,
"eval_p_rank/min_succ_fail_diff_avg_usc_xarm": 0.0956188440322876,
"eval_p_rank/max_succ_fail_diff_avg_usc_xarm": 0.35525771975517273,
"eval_p_rank/ranking_acc_avg_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_all_pairs_avg_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_xarm": 0.7083333333333334,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_xarm": 1.0,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_xarm": 0.9166666666666666,
"eval_p_rank/kendall_sum_usc_xarm": 0.75,
"eval_p_rank/kendall_rewind_sum_usc_xarm": 0.8888888888888888,
"eval_p_rank/avg_succ_subopt_diff_sum_usc_xarm": 0.11288829644521077,
"eval_p_rank/min_succ_subopt_diff_sum_usc_xarm": 0.0029833614826202393,
"eval_p_rank/max_succ_subopt_diff_sum_usc_xarm": 0.217641681432724,
"eval_p_rank/avg_subopt_fail_diff_sum_usc_xarm": 0.11646403868993123,
"eval_p_rank/min_subopt_fail_diff_sum_usc_xarm": -0.03846535086631775,
"eval_p_rank/max_subopt_fail_diff_sum_usc_xarm": 0.34273654222488403,
"eval_p_rank/avg_succ_fail_diff_sum_usc_xarm": 0.229352335135142,
"eval_p_rank/min_succ_fail_diff_sum_usc_xarm": 0.0956188440322876,
"eval_p_rank/max_succ_fail_diff_sum_usc_xarm": 0.35525771975517273,
"eval_p_rank/ranking_acc_sum_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_all_pairs_sum_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_xarm": 0.7083333333333334,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_xarm": 1.0,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_xarm": 0.9166666666666666,
"eval_rew_align/success_auprc_usc_franka": 0.178648534454372,
"eval_rew_align/positive_success_acc_usc_franka": 0.75,
"eval_rew_align/negative_success_acc_usc_franka": 0.9436274509803921,
"eval_rew_align/loss_usc_franka": 1.4907499551773071,
"eval_rew_align/pearson_usc_franka": 0.9115594502071923,
"eval_p_rank/kendall_last_usc_franka": 0.7916666666666666,
"eval_p_rank/kendall_rewind_last_usc_franka": 0.8333333333333334,
"eval_p_rank/avg_succ_subopt_diff_last_usc_franka": 0.06161930412054062,
"eval_p_rank/min_succ_subopt_diff_last_usc_franka": -0.010589927434921265,
"eval_p_rank/max_succ_subopt_diff_last_usc_franka": 0.17146822810173035,
"eval_p_rank/avg_subopt_fail_diff_last_usc_franka": 0.18651490285992622,
"eval_p_rank/min_subopt_fail_diff_last_usc_franka": 0.0136566162109375,
"eval_p_rank/max_subopt_fail_diff_last_usc_franka": 0.3522116541862488,
"eval_p_rank/avg_succ_fail_diff_last_usc_franka": 0.24813420698046684,
"eval_p_rank/min_succ_fail_diff_last_usc_franka": 0.032290756702423096,
"eval_p_rank/max_succ_fail_diff_last_usc_franka": 0.4191764295101166,
"eval_p_rank/ranking_acc_last_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_all_pairs_last_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_franka": 0.8125,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_franka": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_successful_last_usc_franka": 0.9375,
"eval_p_rank/kendall_avg_usc_franka": 0.7916666666666666,
"eval_p_rank/kendall_rewind_avg_usc_franka": 0.8333333333333334,
"eval_p_rank/avg_succ_subopt_diff_avg_usc_franka": 0.06161930412054062,
"eval_p_rank/min_succ_subopt_diff_avg_usc_franka": -0.010589927434921265,
"eval_p_rank/max_succ_subopt_diff_avg_usc_franka": 0.17146822810173035,
"eval_p_rank/avg_subopt_fail_diff_avg_usc_franka": 0.18651490285992622,
"eval_p_rank/min_subopt_fail_diff_avg_usc_franka": 0.0136566162109375,
"eval_p_rank/max_subopt_fail_diff_avg_usc_franka": 0.3522116541862488,
"eval_p_rank/avg_succ_fail_diff_avg_usc_franka": 0.24813420698046684,
"eval_p_rank/min_succ_fail_diff_avg_usc_franka": 0.032290756702423096,
"eval_p_rank/max_succ_fail_diff_avg_usc_franka": 0.4191764295101166,
"eval_p_rank/ranking_acc_avg_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_all_pairs_avg_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_franka": 0.8125,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_franka": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_franka": 0.9375,
"eval_p_rank/kendall_sum_usc_franka": 0.7916666666666666,
"eval_p_rank/kendall_rewind_sum_usc_franka": 0.8333333333333334,
"eval_p_rank/avg_succ_subopt_diff_sum_usc_franka": 0.06161930412054062,
"eval_p_rank/min_succ_subopt_diff_sum_usc_franka": -0.010589927434921265,
"eval_p_rank/max_succ_subopt_diff_sum_usc_franka": 0.17146822810173035,
"eval_p_rank/avg_subopt_fail_diff_sum_usc_franka": 0.18651490285992622,
"eval_p_rank/min_subopt_fail_diff_sum_usc_franka": 0.0136566162109375,
"eval_p_rank/max_subopt_fail_diff_sum_usc_franka": 0.3522116541862488,
"eval_p_rank/avg_succ_fail_diff_sum_usc_franka": 0.24813420698046684,
"eval_p_rank/min_succ_fail_diff_sum_usc_franka": 0.032290756702423096,
"eval_p_rank/max_succ_fail_diff_sum_usc_franka": 0.4191764295101166,
"eval_p_rank/ranking_acc_sum_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_all_pairs_sum_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_franka": 0.8125,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_franka": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_franka": 0.9375,
"eval_rew_align/success_auprc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.08961776352588778,
"eval_rew_align/positive_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9,
"eval_rew_align/negative_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9552941176470588,
"eval_rew_align/loss_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 1.5833105087280273,
"eval_rew_align/pearson_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.8841339237987327,
"eval_p_rank/kendall_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
"eval_p_rank/kendall_rewind_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
"eval_p_rank/avg_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
"eval_p_rank/min_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
"eval_p_rank/max_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
"eval_p_rank/avg_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
"eval_p_rank/min_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
"eval_p_rank/max_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
"eval_p_rank/avg_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
"eval_p_rank/min_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
"eval_p_rank/max_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
"eval_p_rank/ranking_acc_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_all_pairs_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
"eval_p_rank/ranking_acc_failure_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
"eval_p_rank/kendall_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
"eval_p_rank/kendall_rewind_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
"eval_p_rank/avg_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
"eval_p_rank/min_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
"eval_p_rank/max_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
"eval_p_rank/avg_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
"eval_p_rank/min_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
"eval_p_rank/max_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
"eval_p_rank/avg_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
"eval_p_rank/min_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
"eval_p_rank/max_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
"eval_p_rank/ranking_acc_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_all_pairs_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
"eval_p_rank/kendall_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
"eval_p_rank/kendall_rewind_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
"eval_p_rank/avg_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
"eval_p_rank/min_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
"eval_p_rank/max_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
"eval_p_rank/avg_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
"eval_p_rank/min_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
"eval_p_rank/max_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
"eval_p_rank/avg_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
"eval_p_rank/min_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
"eval_p_rank/max_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
"eval_p_rank/ranking_acc_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_all_pairs_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
"time/custom_evaluations": 227.5345072869677
}
} |