{ "step": 4500, "metrics": { "eval_rew_align/success_auprc_racer_val": 0.5972598636691593, "eval_rew_align/positive_success_acc_racer_val": 0.5238095238095238, "eval_rew_align/negative_success_acc_racer_val": 0.9725363489499192, "eval_rew_align/loss_racer_val": 1.5039077520370483, "eval_rew_align/pearson_racer_val": 0.8166853465988891, "eval_rew_align/success_auprc_oxe_bc_z_eval": 0.054588487359398905, "eval_rew_align/positive_success_acc_oxe_bc_z_eval": 0.7, "eval_rew_align/negative_success_acc_oxe_bc_z_eval": 0.9432314410480349, "eval_rew_align/loss_oxe_bc_z_eval": 1.7649718403816224, "eval_rew_align/pearson_oxe_bc_z_eval": 0.5611694184881661, "eval_rew_align/success_auprc_oxe_berkeley_cable_eval": 0.12320737550700828, "eval_rew_align/positive_success_acc_oxe_berkeley_cable_eval": 0.7, "eval_rew_align/negative_success_acc_oxe_berkeley_cable_eval": 0.9396299902629016, "eval_rew_align/loss_oxe_berkeley_cable_eval": 1.6676030993461608, "eval_rew_align/pearson_oxe_berkeley_cable_eval": 0.7626281468321523, "eval_rew_align/success_auprc_oxe_bridge_v2_eval": 0.2226129586383097, "eval_rew_align/positive_success_acc_oxe_bridge_v2_eval": 0.7, "eval_rew_align/negative_success_acc_oxe_bridge_v2_eval": 0.9700440528634361, "eval_rew_align/loss_oxe_bridge_v2_eval": 1.5779191851615906, "eval_rew_align/pearson_oxe_bridge_v2_eval": 0.8196023502220793, "eval_rew_align/success_auprc_oxe_jaco_eval": 0.05703350629550197, "eval_rew_align/positive_success_acc_oxe_jaco_eval": 0.8, "eval_rew_align/negative_success_acc_oxe_jaco_eval": 0.9796816087138668, "eval_rew_align/loss_oxe_jaco_eval": 1.701886808872223, "eval_rew_align/pearson_oxe_jaco_eval": 0.7369627561402344, "eval_rew_align/success_auprc_oxe_toto_eval": 0.10819046102805713, "eval_rew_align/positive_success_acc_oxe_toto_eval": 1.0, "eval_rew_align/negative_success_acc_oxe_toto_eval": 0.9452054794520548, "eval_rew_align/loss_oxe_toto_eval": 1.5248035669326783, "eval_rew_align/pearson_oxe_toto_eval": 0.9275399402861348, "eval_rew_align/success_auprc_oxe_viola_eval": 0.3924038961069135, "eval_rew_align/positive_success_acc_oxe_viola_eval": 1.0, "eval_rew_align/negative_success_acc_oxe_viola_eval": 0.9430528375733855, "eval_rew_align/loss_oxe_viola_eval": 1.5757618188858031, "eval_rew_align/pearson_oxe_viola_eval": 0.8978344352364431, "eval_rew_align/success_auprc_mw_eval": 0.14365004363589842, "eval_rew_align/positive_success_acc_mw_eval": 0.8, "eval_rew_align/negative_success_acc_mw_eval": 0.9627450980392157, "eval_rew_align/loss_mw_eval": 1.7702434301376342, "eval_rew_align/pearson_mw_eval": 0.7687541228936258, "eval_rew_align/success_auprc_libero_90": 0.1795092166845774, "eval_rew_align/positive_success_acc_libero_90": 0.9, "eval_rew_align/negative_success_acc_libero_90": 0.9682352941176471, "eval_rew_align/loss_libero_90": 1.5339298248291016, "eval_rew_align/pearson_libero_90": 0.8980980150621931, "eval_rew_align/success_auprc_usc_trossen": 0.2819898652527857, "eval_rew_align/positive_success_acc_usc_trossen": 0.5, "eval_rew_align/negative_success_acc_usc_trossen": 0.98, "eval_rew_align/loss_usc_trossen": 1.5562334299087524, "eval_rew_align/pearson_usc_trossen": 0.7085253582776633, "eval_p_rank/kendall_last_usc_trossen": 0.8333333333333333, "eval_p_rank/kendall_rewind_last_usc_trossen": 1.0, "eval_p_rank/avg_succ_subopt_diff_last_usc_trossen": 0.14124762515227, "eval_p_rank/min_succ_subopt_diff_last_usc_trossen": 0.040902674198150635, "eval_p_rank/max_succ_subopt_diff_last_usc_trossen": 0.2803109735250473, "eval_p_rank/avg_subopt_fail_diff_last_usc_trossen": 0.19397936016321182, "eval_p_rank/min_subopt_fail_diff_last_usc_trossen": 0.026902765035629272, "eval_p_rank/max_subopt_fail_diff_last_usc_trossen": 0.3610559552907944, "eval_p_rank/avg_succ_fail_diff_last_usc_trossen": 0.28165244973368114, "eval_p_rank/min_succ_fail_diff_last_usc_trossen": 0.06780543923377991, "eval_p_rank/max_succ_fail_diff_last_usc_trossen": 0.46358518302440643, "eval_p_rank/ranking_acc_last_usc_trossen": 0.8809523809523809, "eval_p_rank/ranking_acc_all_pairs_last_usc_trossen": 0.8809523809523809, "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_trossen": 0.9375, "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_trossen": 0.75, "eval_p_rank/ranking_acc_failure_vs_successful_last_usc_trossen": 0.8888888888888888, "eval_p_rank/kendall_avg_usc_trossen": 0.8333333333333333, "eval_p_rank/kendall_rewind_avg_usc_trossen": 1.0, "eval_p_rank/avg_succ_subopt_diff_avg_usc_trossen": 0.14124762515227, "eval_p_rank/min_succ_subopt_diff_avg_usc_trossen": 0.040902674198150635, "eval_p_rank/max_succ_subopt_diff_avg_usc_trossen": 0.2803109735250473, "eval_p_rank/avg_subopt_fail_diff_avg_usc_trossen": 0.19397936016321182, "eval_p_rank/min_subopt_fail_diff_avg_usc_trossen": 0.026902765035629272, "eval_p_rank/max_subopt_fail_diff_avg_usc_trossen": 0.3610559552907944, "eval_p_rank/avg_succ_fail_diff_avg_usc_trossen": 0.28165244973368114, "eval_p_rank/min_succ_fail_diff_avg_usc_trossen": 0.06780543923377991, "eval_p_rank/max_succ_fail_diff_avg_usc_trossen": 0.46358518302440643, "eval_p_rank/ranking_acc_avg_usc_trossen": 0.8809523809523809, "eval_p_rank/ranking_acc_all_pairs_avg_usc_trossen": 0.8809523809523809, "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_trossen": 0.9375, "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_trossen": 0.75, "eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_trossen": 0.8888888888888888, "eval_p_rank/kendall_sum_usc_trossen": 0.8333333333333333, "eval_p_rank/kendall_rewind_sum_usc_trossen": 1.0, "eval_p_rank/avg_succ_subopt_diff_sum_usc_trossen": 0.14124762515227, "eval_p_rank/min_succ_subopt_diff_sum_usc_trossen": 0.040902674198150635, "eval_p_rank/max_succ_subopt_diff_sum_usc_trossen": 0.2803109735250473, "eval_p_rank/avg_subopt_fail_diff_sum_usc_trossen": 0.19397936016321182, "eval_p_rank/min_subopt_fail_diff_sum_usc_trossen": 0.026902765035629272, "eval_p_rank/max_subopt_fail_diff_sum_usc_trossen": 0.3610559552907944, "eval_p_rank/avg_succ_fail_diff_sum_usc_trossen": 0.28165244973368114, "eval_p_rank/min_succ_fail_diff_sum_usc_trossen": 0.06780543923377991, "eval_p_rank/max_succ_fail_diff_sum_usc_trossen": 0.46358518302440643, "eval_p_rank/ranking_acc_sum_usc_trossen": 0.8809523809523809, "eval_p_rank/ranking_acc_all_pairs_sum_usc_trossen": 0.8809523809523809, "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_trossen": 0.9375, "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_trossen": 0.75, "eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_trossen": 0.8888888888888888, "eval_rew_align/success_auprc_rfm_new_mit_franka_nowrist": 0.13878492377635082, "eval_rew_align/positive_success_acc_rfm_new_mit_franka_nowrist": 0.9, "eval_rew_align/negative_success_acc_rfm_new_mit_franka_nowrist": 0.9635294117647059, "eval_rew_align/loss_rfm_new_mit_franka_nowrist": 1.3595333456993104, "eval_rew_align/pearson_rfm_new_mit_franka_nowrist": 0.9332205211882452, "eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist": 0.46904761904761905, "eval_p_rank/kendall_rewind_last_rfm_new_mit_franka_nowrist": 0.8095238095238095, "eval_p_rank/avg_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.1011232117811839, "eval_p_rank/min_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.022794996698697445, "eval_p_rank/max_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.21488183736801147, "eval_p_rank/avg_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.14513030257962997, "eval_p_rank/min_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": -0.14316336512565614, "eval_p_rank/max_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.34648392796516414, "eval_p_rank/avg_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.24625351436081383, "eval_p_rank/min_succ_fail_diff_last_rfm_new_mit_franka_nowrist": -0.00810291568438215, "eval_p_rank/max_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.49043338249127066, "eval_p_rank/ranking_acc_last_rfm_new_mit_franka_nowrist": 0.7598684210526315, "eval_p_rank/ranking_acc_all_pairs_last_rfm_new_mit_franka_nowrist": 0.7598684210526315, "eval_p_rank/ranking_acc_failure_vs_successful_last_rfm_new_mit_franka_nowrist": 0.8482142857142857, "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_rfm_new_mit_franka_nowrist": 0.7523809523809524, "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_rfm_new_mit_franka_nowrist": 0.6551724137931034, "eval_p_rank/kendall_avg_rfm_new_mit_franka_nowrist": 0.46904761904761905, "eval_p_rank/kendall_rewind_avg_rfm_new_mit_franka_nowrist": 0.8095238095238095, "eval_p_rank/avg_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.1011232117811839, "eval_p_rank/min_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.022794996698697445, "eval_p_rank/max_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.21488183736801147, "eval_p_rank/avg_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.14513030257962997, "eval_p_rank/min_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.14316336512565614, "eval_p_rank/max_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.34648392796516414, "eval_p_rank/avg_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.24625351436081383, "eval_p_rank/min_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.00810291568438215, "eval_p_rank/max_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.49043338249127066, "eval_p_rank/ranking_acc_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315, "eval_p_rank/ranking_acc_all_pairs_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315, "eval_p_rank/ranking_acc_failure_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.8482142857142857, "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_rfm_new_mit_franka_nowrist": 0.7523809523809524, "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.6551724137931034, "eval_p_rank/kendall_sum_rfm_new_mit_franka_nowrist": 0.46904761904761905, "eval_p_rank/kendall_rewind_sum_rfm_new_mit_franka_nowrist": 0.8095238095238095, "eval_p_rank/avg_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.1011232117811839, "eval_p_rank/min_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.022794996698697445, "eval_p_rank/max_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.21488183736801147, "eval_p_rank/avg_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.14513030257962997, "eval_p_rank/min_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.14316336512565614, "eval_p_rank/max_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.34648392796516414, "eval_p_rank/avg_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.24625351436081383, "eval_p_rank/min_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.00810291568438215, "eval_p_rank/max_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.49043338249127066, "eval_p_rank/ranking_acc_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315, "eval_p_rank/ranking_acc_all_pairs_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315, "eval_p_rank/ranking_acc_failure_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.8482142857142857, "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_rfm_new_mit_franka_nowrist": 0.7523809523809524, "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.6551724137931034, "eval_rew_align/success_auprc_utd_so101_clean_top": 0.1594673014952464, "eval_rew_align/positive_success_acc_utd_so101_clean_top": 0.8, "eval_rew_align/negative_success_acc_utd_so101_clean_top": 0.9796078431372549, "eval_rew_align/loss_utd_so101_clean_top": 1.422999668121338, "eval_rew_align/pearson_utd_so101_clean_top": 0.9214771733077172, "eval_p_rank/kendall_last_utd_so101_clean_top": 0.7333333333333333, "eval_p_rank/kendall_rewind_last_utd_so101_clean_top": 0.7333333333333333, "eval_p_rank/avg_succ_subopt_diff_last_utd_so101_clean_top": 0.1281689941883087, "eval_p_rank/min_succ_subopt_diff_last_utd_so101_clean_top": -0.6224770694971085, "eval_p_rank/max_succ_subopt_diff_last_utd_so101_clean_top": 0.4432547390460968, "eval_p_rank/avg_subopt_fail_diff_last_utd_so101_clean_top": 0.2357720375061035, "eval_p_rank/min_subopt_fail_diff_last_utd_so101_clean_top": -0.012576103210449219, "eval_p_rank/max_subopt_fail_diff_last_utd_so101_clean_top": 0.5894219428300858, "eval_p_rank/avg_succ_fail_diff_last_utd_so101_clean_top": 0.3639410316944122, "eval_p_rank/min_succ_fail_diff_last_utd_so101_clean_top": -0.033055126667022705, "eval_p_rank/max_succ_fail_diff_last_utd_so101_clean_top": 0.6650743782520294, "eval_p_rank/ranking_acc_last_utd_so101_clean_top": 0.8666666666666667, "eval_p_rank/ranking_acc_all_pairs_last_utd_so101_clean_top": 0.8666666666666667, "eval_p_rank/ranking_acc_failure_vs_successful_last_utd_so101_clean_top": 0.9, "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_utd_so101_clean_top": 0.8, "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_utd_so101_clean_top": 0.9, "eval_p_rank/kendall_avg_utd_so101_clean_top": 0.7333333333333333, "eval_p_rank/kendall_rewind_avg_utd_so101_clean_top": 0.7333333333333333, "eval_p_rank/avg_succ_subopt_diff_avg_utd_so101_clean_top": 0.1281689941883087, "eval_p_rank/min_succ_subopt_diff_avg_utd_so101_clean_top": -0.6224770694971085, "eval_p_rank/max_succ_subopt_diff_avg_utd_so101_clean_top": 0.4432547390460968, "eval_p_rank/avg_subopt_fail_diff_avg_utd_so101_clean_top": 0.2357720375061035, "eval_p_rank/min_subopt_fail_diff_avg_utd_so101_clean_top": -0.012576103210449219, "eval_p_rank/max_subopt_fail_diff_avg_utd_so101_clean_top": 0.5894219428300858, "eval_p_rank/avg_succ_fail_diff_avg_utd_so101_clean_top": 0.3639410316944122, "eval_p_rank/min_succ_fail_diff_avg_utd_so101_clean_top": -0.033055126667022705, "eval_p_rank/max_succ_fail_diff_avg_utd_so101_clean_top": 0.6650743782520294, "eval_p_rank/ranking_acc_avg_utd_so101_clean_top": 0.8666666666666667, "eval_p_rank/ranking_acc_all_pairs_avg_utd_so101_clean_top": 0.8666666666666667, "eval_p_rank/ranking_acc_failure_vs_successful_avg_utd_so101_clean_top": 0.9, "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_utd_so101_clean_top": 0.8, "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_utd_so101_clean_top": 0.9, "eval_p_rank/kendall_sum_utd_so101_clean_top": 0.7333333333333333, "eval_p_rank/kendall_rewind_sum_utd_so101_clean_top": 0.7333333333333333, "eval_p_rank/avg_succ_subopt_diff_sum_utd_so101_clean_top": 0.1281689941883087, "eval_p_rank/min_succ_subopt_diff_sum_utd_so101_clean_top": -0.6224770694971085, "eval_p_rank/max_succ_subopt_diff_sum_utd_so101_clean_top": 0.4432547390460968, "eval_p_rank/avg_subopt_fail_diff_sum_utd_so101_clean_top": 0.2357720375061035, "eval_p_rank/min_subopt_fail_diff_sum_utd_so101_clean_top": -0.012576103210449219, "eval_p_rank/max_subopt_fail_diff_sum_utd_so101_clean_top": 0.5894219428300858, "eval_p_rank/avg_succ_fail_diff_sum_utd_so101_clean_top": 0.3639410316944122, "eval_p_rank/min_succ_fail_diff_sum_utd_so101_clean_top": -0.033055126667022705, "eval_p_rank/max_succ_fail_diff_sum_utd_so101_clean_top": 0.6650743782520294, "eval_p_rank/ranking_acc_sum_utd_so101_clean_top": 0.8666666666666667, "eval_p_rank/ranking_acc_all_pairs_sum_utd_so101_clean_top": 0.8666666666666667, "eval_p_rank/ranking_acc_failure_vs_successful_sum_utd_so101_clean_top": 0.9, "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_utd_so101_clean_top": 0.8, "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_utd_so101_clean_top": 0.9, "eval_rew_align/success_auprc_usc_xarm": 0.3298253598253598, "eval_rew_align/positive_success_acc_usc_xarm": 1.0, "eval_rew_align/negative_success_acc_usc_xarm": 0.971764705882353, "eval_rew_align/loss_usc_xarm": 1.459894859790802, "eval_rew_align/pearson_usc_xarm": 0.9290145264370201, "eval_p_rank/kendall_last_usc_xarm": 0.75, "eval_p_rank/kendall_rewind_last_usc_xarm": 0.8888888888888888, "eval_p_rank/avg_succ_subopt_diff_last_usc_xarm": 0.11288829644521077, "eval_p_rank/min_succ_subopt_diff_last_usc_xarm": 0.0029833614826202393, "eval_p_rank/max_succ_subopt_diff_last_usc_xarm": 0.217641681432724, "eval_p_rank/avg_subopt_fail_diff_last_usc_xarm": 0.11646403868993123, "eval_p_rank/min_subopt_fail_diff_last_usc_xarm": -0.03846535086631775, "eval_p_rank/max_subopt_fail_diff_last_usc_xarm": 0.34273654222488403, "eval_p_rank/avg_succ_fail_diff_last_usc_xarm": 0.229352335135142, "eval_p_rank/min_succ_fail_diff_last_usc_xarm": 0.0956188440322876, "eval_p_rank/max_succ_fail_diff_last_usc_xarm": 0.35525771975517273, "eval_p_rank/ranking_acc_last_usc_xarm": 0.875, "eval_p_rank/ranking_acc_all_pairs_last_usc_xarm": 0.875, "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_xarm": 0.7083333333333334, "eval_p_rank/ranking_acc_failure_vs_successful_last_usc_xarm": 1.0, "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_xarm": 0.9166666666666666, "eval_p_rank/kendall_avg_usc_xarm": 0.75, "eval_p_rank/kendall_rewind_avg_usc_xarm": 0.8888888888888888, "eval_p_rank/avg_succ_subopt_diff_avg_usc_xarm": 0.11288829644521077, "eval_p_rank/min_succ_subopt_diff_avg_usc_xarm": 0.0029833614826202393, "eval_p_rank/max_succ_subopt_diff_avg_usc_xarm": 0.217641681432724, "eval_p_rank/avg_subopt_fail_diff_avg_usc_xarm": 0.11646403868993123, "eval_p_rank/min_subopt_fail_diff_avg_usc_xarm": -0.03846535086631775, "eval_p_rank/max_subopt_fail_diff_avg_usc_xarm": 0.34273654222488403, "eval_p_rank/avg_succ_fail_diff_avg_usc_xarm": 0.229352335135142, "eval_p_rank/min_succ_fail_diff_avg_usc_xarm": 0.0956188440322876, "eval_p_rank/max_succ_fail_diff_avg_usc_xarm": 0.35525771975517273, "eval_p_rank/ranking_acc_avg_usc_xarm": 0.875, "eval_p_rank/ranking_acc_all_pairs_avg_usc_xarm": 0.875, "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_xarm": 0.7083333333333334, "eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_xarm": 1.0, "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_xarm": 0.9166666666666666, "eval_p_rank/kendall_sum_usc_xarm": 0.75, "eval_p_rank/kendall_rewind_sum_usc_xarm": 0.8888888888888888, "eval_p_rank/avg_succ_subopt_diff_sum_usc_xarm": 0.11288829644521077, "eval_p_rank/min_succ_subopt_diff_sum_usc_xarm": 0.0029833614826202393, "eval_p_rank/max_succ_subopt_diff_sum_usc_xarm": 0.217641681432724, "eval_p_rank/avg_subopt_fail_diff_sum_usc_xarm": 0.11646403868993123, "eval_p_rank/min_subopt_fail_diff_sum_usc_xarm": -0.03846535086631775, "eval_p_rank/max_subopt_fail_diff_sum_usc_xarm": 0.34273654222488403, "eval_p_rank/avg_succ_fail_diff_sum_usc_xarm": 0.229352335135142, "eval_p_rank/min_succ_fail_diff_sum_usc_xarm": 0.0956188440322876, "eval_p_rank/max_succ_fail_diff_sum_usc_xarm": 0.35525771975517273, "eval_p_rank/ranking_acc_sum_usc_xarm": 0.875, "eval_p_rank/ranking_acc_all_pairs_sum_usc_xarm": 0.875, "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_xarm": 0.7083333333333334, "eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_xarm": 1.0, "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_xarm": 0.9166666666666666, "eval_rew_align/success_auprc_usc_franka": 0.178648534454372, "eval_rew_align/positive_success_acc_usc_franka": 0.75, "eval_rew_align/negative_success_acc_usc_franka": 0.9436274509803921, "eval_rew_align/loss_usc_franka": 1.4907499551773071, "eval_rew_align/pearson_usc_franka": 0.9115594502071923, "eval_p_rank/kendall_last_usc_franka": 0.7916666666666666, "eval_p_rank/kendall_rewind_last_usc_franka": 0.8333333333333334, "eval_p_rank/avg_succ_subopt_diff_last_usc_franka": 0.06161930412054062, "eval_p_rank/min_succ_subopt_diff_last_usc_franka": -0.010589927434921265, "eval_p_rank/max_succ_subopt_diff_last_usc_franka": 0.17146822810173035, "eval_p_rank/avg_subopt_fail_diff_last_usc_franka": 0.18651490285992622, "eval_p_rank/min_subopt_fail_diff_last_usc_franka": 0.0136566162109375, "eval_p_rank/max_subopt_fail_diff_last_usc_franka": 0.3522116541862488, "eval_p_rank/avg_succ_fail_diff_last_usc_franka": 0.24813420698046684, "eval_p_rank/min_succ_fail_diff_last_usc_franka": 0.032290756702423096, "eval_p_rank/max_succ_fail_diff_last_usc_franka": 0.4191764295101166, "eval_p_rank/ranking_acc_last_usc_franka": 0.8958333333333334, "eval_p_rank/ranking_acc_all_pairs_last_usc_franka": 0.8958333333333334, "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_franka": 0.8125, "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_franka": 0.9375, "eval_p_rank/ranking_acc_failure_vs_successful_last_usc_franka": 0.9375, "eval_p_rank/kendall_avg_usc_franka": 0.7916666666666666, "eval_p_rank/kendall_rewind_avg_usc_franka": 0.8333333333333334, "eval_p_rank/avg_succ_subopt_diff_avg_usc_franka": 0.06161930412054062, "eval_p_rank/min_succ_subopt_diff_avg_usc_franka": -0.010589927434921265, "eval_p_rank/max_succ_subopt_diff_avg_usc_franka": 0.17146822810173035, "eval_p_rank/avg_subopt_fail_diff_avg_usc_franka": 0.18651490285992622, "eval_p_rank/min_subopt_fail_diff_avg_usc_franka": 0.0136566162109375, "eval_p_rank/max_subopt_fail_diff_avg_usc_franka": 0.3522116541862488, "eval_p_rank/avg_succ_fail_diff_avg_usc_franka": 0.24813420698046684, "eval_p_rank/min_succ_fail_diff_avg_usc_franka": 0.032290756702423096, "eval_p_rank/max_succ_fail_diff_avg_usc_franka": 0.4191764295101166, "eval_p_rank/ranking_acc_avg_usc_franka": 0.8958333333333334, "eval_p_rank/ranking_acc_all_pairs_avg_usc_franka": 0.8958333333333334, "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_franka": 0.8125, "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_franka": 0.9375, "eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_franka": 0.9375, "eval_p_rank/kendall_sum_usc_franka": 0.7916666666666666, "eval_p_rank/kendall_rewind_sum_usc_franka": 0.8333333333333334, "eval_p_rank/avg_succ_subopt_diff_sum_usc_franka": 0.06161930412054062, "eval_p_rank/min_succ_subopt_diff_sum_usc_franka": -0.010589927434921265, "eval_p_rank/max_succ_subopt_diff_sum_usc_franka": 0.17146822810173035, "eval_p_rank/avg_subopt_fail_diff_sum_usc_franka": 0.18651490285992622, "eval_p_rank/min_subopt_fail_diff_sum_usc_franka": 0.0136566162109375, "eval_p_rank/max_subopt_fail_diff_sum_usc_franka": 0.3522116541862488, "eval_p_rank/avg_succ_fail_diff_sum_usc_franka": 0.24813420698046684, "eval_p_rank/min_succ_fail_diff_sum_usc_franka": 0.032290756702423096, "eval_p_rank/max_succ_fail_diff_sum_usc_franka": 0.4191764295101166, "eval_p_rank/ranking_acc_sum_usc_franka": 0.8958333333333334, "eval_p_rank/ranking_acc_all_pairs_sum_usc_franka": 0.8958333333333334, "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_franka": 0.8125, "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_franka": 0.9375, "eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_franka": 0.9375, "eval_rew_align/success_auprc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.08961776352588778, "eval_rew_align/positive_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9, "eval_rew_align/negative_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9552941176470588, "eval_rew_align/loss_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 1.5833105087280273, "eval_rew_align/pearson_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.8841339237987327, "eval_p_rank/kendall_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333, "eval_p_rank/kendall_rewind_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333, "eval_p_rank/avg_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066, "eval_p_rank/min_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178, "eval_p_rank/max_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468, "eval_p_rank/avg_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595, "eval_p_rank/min_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456, "eval_p_rank/max_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605, "eval_p_rank/avg_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527, "eval_p_rank/min_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775, "eval_p_rank/max_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692, "eval_p_rank/ranking_acc_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666, "eval_p_rank/ranking_acc_all_pairs_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666, "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76, "eval_p_rank/ranking_acc_failure_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744, "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46, "eval_p_rank/kendall_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333, "eval_p_rank/kendall_rewind_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333, "eval_p_rank/avg_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066, "eval_p_rank/min_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178, "eval_p_rank/max_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468, "eval_p_rank/avg_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595, "eval_p_rank/min_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456, "eval_p_rank/max_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605, "eval_p_rank/avg_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527, "eval_p_rank/min_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775, "eval_p_rank/max_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692, "eval_p_rank/ranking_acc_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666, "eval_p_rank/ranking_acc_all_pairs_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666, "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76, "eval_p_rank/ranking_acc_failure_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744, "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46, "eval_p_rank/kendall_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333, "eval_p_rank/kendall_rewind_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333, "eval_p_rank/avg_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066, "eval_p_rank/min_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178, "eval_p_rank/max_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468, "eval_p_rank/avg_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595, "eval_p_rank/min_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456, "eval_p_rank/max_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605, "eval_p_rank/avg_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527, "eval_p_rank/min_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775, "eval_p_rank/max_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692, "eval_p_rank/ranking_acc_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666, "eval_p_rank/ranking_acc_all_pairs_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666, "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76, "eval_p_rank/ranking_acc_failure_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744, "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46, "time/custom_evaluations": 227.5345072869677 } }