Robometer-4B / metrics.json
aliangdw's picture
Duplicate from aliangdw/rfm_qwen4b_pref_prog_succ_8frames_all_discrete_10bins_part2
01f02fa
{
"step": 4500,
"metrics": {
"eval_rew_align/success_auprc_racer_val": 0.5972598636691593,
"eval_rew_align/positive_success_acc_racer_val": 0.5238095238095238,
"eval_rew_align/negative_success_acc_racer_val": 0.9725363489499192,
"eval_rew_align/loss_racer_val": 1.5039077520370483,
"eval_rew_align/pearson_racer_val": 0.8166853465988891,
"eval_rew_align/success_auprc_oxe_bc_z_eval": 0.054588487359398905,
"eval_rew_align/positive_success_acc_oxe_bc_z_eval": 0.7,
"eval_rew_align/negative_success_acc_oxe_bc_z_eval": 0.9432314410480349,
"eval_rew_align/loss_oxe_bc_z_eval": 1.7649718403816224,
"eval_rew_align/pearson_oxe_bc_z_eval": 0.5611694184881661,
"eval_rew_align/success_auprc_oxe_berkeley_cable_eval": 0.12320737550700828,
"eval_rew_align/positive_success_acc_oxe_berkeley_cable_eval": 0.7,
"eval_rew_align/negative_success_acc_oxe_berkeley_cable_eval": 0.9396299902629016,
"eval_rew_align/loss_oxe_berkeley_cable_eval": 1.6676030993461608,
"eval_rew_align/pearson_oxe_berkeley_cable_eval": 0.7626281468321523,
"eval_rew_align/success_auprc_oxe_bridge_v2_eval": 0.2226129586383097,
"eval_rew_align/positive_success_acc_oxe_bridge_v2_eval": 0.7,
"eval_rew_align/negative_success_acc_oxe_bridge_v2_eval": 0.9700440528634361,
"eval_rew_align/loss_oxe_bridge_v2_eval": 1.5779191851615906,
"eval_rew_align/pearson_oxe_bridge_v2_eval": 0.8196023502220793,
"eval_rew_align/success_auprc_oxe_jaco_eval": 0.05703350629550197,
"eval_rew_align/positive_success_acc_oxe_jaco_eval": 0.8,
"eval_rew_align/negative_success_acc_oxe_jaco_eval": 0.9796816087138668,
"eval_rew_align/loss_oxe_jaco_eval": 1.701886808872223,
"eval_rew_align/pearson_oxe_jaco_eval": 0.7369627561402344,
"eval_rew_align/success_auprc_oxe_toto_eval": 0.10819046102805713,
"eval_rew_align/positive_success_acc_oxe_toto_eval": 1.0,
"eval_rew_align/negative_success_acc_oxe_toto_eval": 0.9452054794520548,
"eval_rew_align/loss_oxe_toto_eval": 1.5248035669326783,
"eval_rew_align/pearson_oxe_toto_eval": 0.9275399402861348,
"eval_rew_align/success_auprc_oxe_viola_eval": 0.3924038961069135,
"eval_rew_align/positive_success_acc_oxe_viola_eval": 1.0,
"eval_rew_align/negative_success_acc_oxe_viola_eval": 0.9430528375733855,
"eval_rew_align/loss_oxe_viola_eval": 1.5757618188858031,
"eval_rew_align/pearson_oxe_viola_eval": 0.8978344352364431,
"eval_rew_align/success_auprc_mw_eval": 0.14365004363589842,
"eval_rew_align/positive_success_acc_mw_eval": 0.8,
"eval_rew_align/negative_success_acc_mw_eval": 0.9627450980392157,
"eval_rew_align/loss_mw_eval": 1.7702434301376342,
"eval_rew_align/pearson_mw_eval": 0.7687541228936258,
"eval_rew_align/success_auprc_libero_90": 0.1795092166845774,
"eval_rew_align/positive_success_acc_libero_90": 0.9,
"eval_rew_align/negative_success_acc_libero_90": 0.9682352941176471,
"eval_rew_align/loss_libero_90": 1.5339298248291016,
"eval_rew_align/pearson_libero_90": 0.8980980150621931,
"eval_rew_align/success_auprc_usc_trossen": 0.2819898652527857,
"eval_rew_align/positive_success_acc_usc_trossen": 0.5,
"eval_rew_align/negative_success_acc_usc_trossen": 0.98,
"eval_rew_align/loss_usc_trossen": 1.5562334299087524,
"eval_rew_align/pearson_usc_trossen": 0.7085253582776633,
"eval_p_rank/kendall_last_usc_trossen": 0.8333333333333333,
"eval_p_rank/kendall_rewind_last_usc_trossen": 1.0,
"eval_p_rank/avg_succ_subopt_diff_last_usc_trossen": 0.14124762515227,
"eval_p_rank/min_succ_subopt_diff_last_usc_trossen": 0.040902674198150635,
"eval_p_rank/max_succ_subopt_diff_last_usc_trossen": 0.2803109735250473,
"eval_p_rank/avg_subopt_fail_diff_last_usc_trossen": 0.19397936016321182,
"eval_p_rank/min_subopt_fail_diff_last_usc_trossen": 0.026902765035629272,
"eval_p_rank/max_subopt_fail_diff_last_usc_trossen": 0.3610559552907944,
"eval_p_rank/avg_succ_fail_diff_last_usc_trossen": 0.28165244973368114,
"eval_p_rank/min_succ_fail_diff_last_usc_trossen": 0.06780543923377991,
"eval_p_rank/max_succ_fail_diff_last_usc_trossen": 0.46358518302440643,
"eval_p_rank/ranking_acc_last_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_all_pairs_last_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_trossen": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_trossen": 0.75,
"eval_p_rank/ranking_acc_failure_vs_successful_last_usc_trossen": 0.8888888888888888,
"eval_p_rank/kendall_avg_usc_trossen": 0.8333333333333333,
"eval_p_rank/kendall_rewind_avg_usc_trossen": 1.0,
"eval_p_rank/avg_succ_subopt_diff_avg_usc_trossen": 0.14124762515227,
"eval_p_rank/min_succ_subopt_diff_avg_usc_trossen": 0.040902674198150635,
"eval_p_rank/max_succ_subopt_diff_avg_usc_trossen": 0.2803109735250473,
"eval_p_rank/avg_subopt_fail_diff_avg_usc_trossen": 0.19397936016321182,
"eval_p_rank/min_subopt_fail_diff_avg_usc_trossen": 0.026902765035629272,
"eval_p_rank/max_subopt_fail_diff_avg_usc_trossen": 0.3610559552907944,
"eval_p_rank/avg_succ_fail_diff_avg_usc_trossen": 0.28165244973368114,
"eval_p_rank/min_succ_fail_diff_avg_usc_trossen": 0.06780543923377991,
"eval_p_rank/max_succ_fail_diff_avg_usc_trossen": 0.46358518302440643,
"eval_p_rank/ranking_acc_avg_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_all_pairs_avg_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_trossen": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_trossen": 0.75,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_trossen": 0.8888888888888888,
"eval_p_rank/kendall_sum_usc_trossen": 0.8333333333333333,
"eval_p_rank/kendall_rewind_sum_usc_trossen": 1.0,
"eval_p_rank/avg_succ_subopt_diff_sum_usc_trossen": 0.14124762515227,
"eval_p_rank/min_succ_subopt_diff_sum_usc_trossen": 0.040902674198150635,
"eval_p_rank/max_succ_subopt_diff_sum_usc_trossen": 0.2803109735250473,
"eval_p_rank/avg_subopt_fail_diff_sum_usc_trossen": 0.19397936016321182,
"eval_p_rank/min_subopt_fail_diff_sum_usc_trossen": 0.026902765035629272,
"eval_p_rank/max_subopt_fail_diff_sum_usc_trossen": 0.3610559552907944,
"eval_p_rank/avg_succ_fail_diff_sum_usc_trossen": 0.28165244973368114,
"eval_p_rank/min_succ_fail_diff_sum_usc_trossen": 0.06780543923377991,
"eval_p_rank/max_succ_fail_diff_sum_usc_trossen": 0.46358518302440643,
"eval_p_rank/ranking_acc_sum_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_all_pairs_sum_usc_trossen": 0.8809523809523809,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_trossen": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_trossen": 0.75,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_trossen": 0.8888888888888888,
"eval_rew_align/success_auprc_rfm_new_mit_franka_nowrist": 0.13878492377635082,
"eval_rew_align/positive_success_acc_rfm_new_mit_franka_nowrist": 0.9,
"eval_rew_align/negative_success_acc_rfm_new_mit_franka_nowrist": 0.9635294117647059,
"eval_rew_align/loss_rfm_new_mit_franka_nowrist": 1.3595333456993104,
"eval_rew_align/pearson_rfm_new_mit_franka_nowrist": 0.9332205211882452,
"eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist": 0.46904761904761905,
"eval_p_rank/kendall_rewind_last_rfm_new_mit_franka_nowrist": 0.8095238095238095,
"eval_p_rank/avg_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.1011232117811839,
"eval_p_rank/min_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.022794996698697445,
"eval_p_rank/max_succ_subopt_diff_last_rfm_new_mit_franka_nowrist": 0.21488183736801147,
"eval_p_rank/avg_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.14513030257962997,
"eval_p_rank/min_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": -0.14316336512565614,
"eval_p_rank/max_subopt_fail_diff_last_rfm_new_mit_franka_nowrist": 0.34648392796516414,
"eval_p_rank/avg_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.24625351436081383,
"eval_p_rank/min_succ_fail_diff_last_rfm_new_mit_franka_nowrist": -0.00810291568438215,
"eval_p_rank/max_succ_fail_diff_last_rfm_new_mit_franka_nowrist": 0.49043338249127066,
"eval_p_rank/ranking_acc_last_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_all_pairs_last_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_failure_vs_successful_last_rfm_new_mit_franka_nowrist": 0.8482142857142857,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_rfm_new_mit_franka_nowrist": 0.7523809523809524,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_rfm_new_mit_franka_nowrist": 0.6551724137931034,
"eval_p_rank/kendall_avg_rfm_new_mit_franka_nowrist": 0.46904761904761905,
"eval_p_rank/kendall_rewind_avg_rfm_new_mit_franka_nowrist": 0.8095238095238095,
"eval_p_rank/avg_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.1011232117811839,
"eval_p_rank/min_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.022794996698697445,
"eval_p_rank/max_succ_subopt_diff_avg_rfm_new_mit_franka_nowrist": 0.21488183736801147,
"eval_p_rank/avg_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.14513030257962997,
"eval_p_rank/min_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.14316336512565614,
"eval_p_rank/max_subopt_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.34648392796516414,
"eval_p_rank/avg_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.24625351436081383,
"eval_p_rank/min_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": -0.00810291568438215,
"eval_p_rank/max_succ_fail_diff_avg_rfm_new_mit_franka_nowrist": 0.49043338249127066,
"eval_p_rank/ranking_acc_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_all_pairs_avg_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.8482142857142857,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_rfm_new_mit_franka_nowrist": 0.7523809523809524,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_rfm_new_mit_franka_nowrist": 0.6551724137931034,
"eval_p_rank/kendall_sum_rfm_new_mit_franka_nowrist": 0.46904761904761905,
"eval_p_rank/kendall_rewind_sum_rfm_new_mit_franka_nowrist": 0.8095238095238095,
"eval_p_rank/avg_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.1011232117811839,
"eval_p_rank/min_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.022794996698697445,
"eval_p_rank/max_succ_subopt_diff_sum_rfm_new_mit_franka_nowrist": 0.21488183736801147,
"eval_p_rank/avg_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.14513030257962997,
"eval_p_rank/min_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.14316336512565614,
"eval_p_rank/max_subopt_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.34648392796516414,
"eval_p_rank/avg_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.24625351436081383,
"eval_p_rank/min_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": -0.00810291568438215,
"eval_p_rank/max_succ_fail_diff_sum_rfm_new_mit_franka_nowrist": 0.49043338249127066,
"eval_p_rank/ranking_acc_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_all_pairs_sum_rfm_new_mit_franka_nowrist": 0.7598684210526315,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.8482142857142857,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_rfm_new_mit_franka_nowrist": 0.7523809523809524,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_rfm_new_mit_franka_nowrist": 0.6551724137931034,
"eval_rew_align/success_auprc_utd_so101_clean_top": 0.1594673014952464,
"eval_rew_align/positive_success_acc_utd_so101_clean_top": 0.8,
"eval_rew_align/negative_success_acc_utd_so101_clean_top": 0.9796078431372549,
"eval_rew_align/loss_utd_so101_clean_top": 1.422999668121338,
"eval_rew_align/pearson_utd_so101_clean_top": 0.9214771733077172,
"eval_p_rank/kendall_last_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/kendall_rewind_last_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/avg_succ_subopt_diff_last_utd_so101_clean_top": 0.1281689941883087,
"eval_p_rank/min_succ_subopt_diff_last_utd_so101_clean_top": -0.6224770694971085,
"eval_p_rank/max_succ_subopt_diff_last_utd_so101_clean_top": 0.4432547390460968,
"eval_p_rank/avg_subopt_fail_diff_last_utd_so101_clean_top": 0.2357720375061035,
"eval_p_rank/min_subopt_fail_diff_last_utd_so101_clean_top": -0.012576103210449219,
"eval_p_rank/max_subopt_fail_diff_last_utd_so101_clean_top": 0.5894219428300858,
"eval_p_rank/avg_succ_fail_diff_last_utd_so101_clean_top": 0.3639410316944122,
"eval_p_rank/min_succ_fail_diff_last_utd_so101_clean_top": -0.033055126667022705,
"eval_p_rank/max_succ_fail_diff_last_utd_so101_clean_top": 0.6650743782520294,
"eval_p_rank/ranking_acc_last_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_all_pairs_last_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_failure_vs_successful_last_utd_so101_clean_top": 0.9,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_utd_so101_clean_top": 0.8,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_utd_so101_clean_top": 0.9,
"eval_p_rank/kendall_avg_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/kendall_rewind_avg_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/avg_succ_subopt_diff_avg_utd_so101_clean_top": 0.1281689941883087,
"eval_p_rank/min_succ_subopt_diff_avg_utd_so101_clean_top": -0.6224770694971085,
"eval_p_rank/max_succ_subopt_diff_avg_utd_so101_clean_top": 0.4432547390460968,
"eval_p_rank/avg_subopt_fail_diff_avg_utd_so101_clean_top": 0.2357720375061035,
"eval_p_rank/min_subopt_fail_diff_avg_utd_so101_clean_top": -0.012576103210449219,
"eval_p_rank/max_subopt_fail_diff_avg_utd_so101_clean_top": 0.5894219428300858,
"eval_p_rank/avg_succ_fail_diff_avg_utd_so101_clean_top": 0.3639410316944122,
"eval_p_rank/min_succ_fail_diff_avg_utd_so101_clean_top": -0.033055126667022705,
"eval_p_rank/max_succ_fail_diff_avg_utd_so101_clean_top": 0.6650743782520294,
"eval_p_rank/ranking_acc_avg_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_all_pairs_avg_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_utd_so101_clean_top": 0.9,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_utd_so101_clean_top": 0.8,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_utd_so101_clean_top": 0.9,
"eval_p_rank/kendall_sum_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/kendall_rewind_sum_utd_so101_clean_top": 0.7333333333333333,
"eval_p_rank/avg_succ_subopt_diff_sum_utd_so101_clean_top": 0.1281689941883087,
"eval_p_rank/min_succ_subopt_diff_sum_utd_so101_clean_top": -0.6224770694971085,
"eval_p_rank/max_succ_subopt_diff_sum_utd_so101_clean_top": 0.4432547390460968,
"eval_p_rank/avg_subopt_fail_diff_sum_utd_so101_clean_top": 0.2357720375061035,
"eval_p_rank/min_subopt_fail_diff_sum_utd_so101_clean_top": -0.012576103210449219,
"eval_p_rank/max_subopt_fail_diff_sum_utd_so101_clean_top": 0.5894219428300858,
"eval_p_rank/avg_succ_fail_diff_sum_utd_so101_clean_top": 0.3639410316944122,
"eval_p_rank/min_succ_fail_diff_sum_utd_so101_clean_top": -0.033055126667022705,
"eval_p_rank/max_succ_fail_diff_sum_utd_so101_clean_top": 0.6650743782520294,
"eval_p_rank/ranking_acc_sum_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_all_pairs_sum_utd_so101_clean_top": 0.8666666666666667,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_utd_so101_clean_top": 0.9,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_utd_so101_clean_top": 0.8,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_utd_so101_clean_top": 0.9,
"eval_rew_align/success_auprc_usc_xarm": 0.3298253598253598,
"eval_rew_align/positive_success_acc_usc_xarm": 1.0,
"eval_rew_align/negative_success_acc_usc_xarm": 0.971764705882353,
"eval_rew_align/loss_usc_xarm": 1.459894859790802,
"eval_rew_align/pearson_usc_xarm": 0.9290145264370201,
"eval_p_rank/kendall_last_usc_xarm": 0.75,
"eval_p_rank/kendall_rewind_last_usc_xarm": 0.8888888888888888,
"eval_p_rank/avg_succ_subopt_diff_last_usc_xarm": 0.11288829644521077,
"eval_p_rank/min_succ_subopt_diff_last_usc_xarm": 0.0029833614826202393,
"eval_p_rank/max_succ_subopt_diff_last_usc_xarm": 0.217641681432724,
"eval_p_rank/avg_subopt_fail_diff_last_usc_xarm": 0.11646403868993123,
"eval_p_rank/min_subopt_fail_diff_last_usc_xarm": -0.03846535086631775,
"eval_p_rank/max_subopt_fail_diff_last_usc_xarm": 0.34273654222488403,
"eval_p_rank/avg_succ_fail_diff_last_usc_xarm": 0.229352335135142,
"eval_p_rank/min_succ_fail_diff_last_usc_xarm": 0.0956188440322876,
"eval_p_rank/max_succ_fail_diff_last_usc_xarm": 0.35525771975517273,
"eval_p_rank/ranking_acc_last_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_all_pairs_last_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_xarm": 0.7083333333333334,
"eval_p_rank/ranking_acc_failure_vs_successful_last_usc_xarm": 1.0,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_xarm": 0.9166666666666666,
"eval_p_rank/kendall_avg_usc_xarm": 0.75,
"eval_p_rank/kendall_rewind_avg_usc_xarm": 0.8888888888888888,
"eval_p_rank/avg_succ_subopt_diff_avg_usc_xarm": 0.11288829644521077,
"eval_p_rank/min_succ_subopt_diff_avg_usc_xarm": 0.0029833614826202393,
"eval_p_rank/max_succ_subopt_diff_avg_usc_xarm": 0.217641681432724,
"eval_p_rank/avg_subopt_fail_diff_avg_usc_xarm": 0.11646403868993123,
"eval_p_rank/min_subopt_fail_diff_avg_usc_xarm": -0.03846535086631775,
"eval_p_rank/max_subopt_fail_diff_avg_usc_xarm": 0.34273654222488403,
"eval_p_rank/avg_succ_fail_diff_avg_usc_xarm": 0.229352335135142,
"eval_p_rank/min_succ_fail_diff_avg_usc_xarm": 0.0956188440322876,
"eval_p_rank/max_succ_fail_diff_avg_usc_xarm": 0.35525771975517273,
"eval_p_rank/ranking_acc_avg_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_all_pairs_avg_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_xarm": 0.7083333333333334,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_xarm": 1.0,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_xarm": 0.9166666666666666,
"eval_p_rank/kendall_sum_usc_xarm": 0.75,
"eval_p_rank/kendall_rewind_sum_usc_xarm": 0.8888888888888888,
"eval_p_rank/avg_succ_subopt_diff_sum_usc_xarm": 0.11288829644521077,
"eval_p_rank/min_succ_subopt_diff_sum_usc_xarm": 0.0029833614826202393,
"eval_p_rank/max_succ_subopt_diff_sum_usc_xarm": 0.217641681432724,
"eval_p_rank/avg_subopt_fail_diff_sum_usc_xarm": 0.11646403868993123,
"eval_p_rank/min_subopt_fail_diff_sum_usc_xarm": -0.03846535086631775,
"eval_p_rank/max_subopt_fail_diff_sum_usc_xarm": 0.34273654222488403,
"eval_p_rank/avg_succ_fail_diff_sum_usc_xarm": 0.229352335135142,
"eval_p_rank/min_succ_fail_diff_sum_usc_xarm": 0.0956188440322876,
"eval_p_rank/max_succ_fail_diff_sum_usc_xarm": 0.35525771975517273,
"eval_p_rank/ranking_acc_sum_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_all_pairs_sum_usc_xarm": 0.875,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_xarm": 0.7083333333333334,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_xarm": 1.0,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_xarm": 0.9166666666666666,
"eval_rew_align/success_auprc_usc_franka": 0.178648534454372,
"eval_rew_align/positive_success_acc_usc_franka": 0.75,
"eval_rew_align/negative_success_acc_usc_franka": 0.9436274509803921,
"eval_rew_align/loss_usc_franka": 1.4907499551773071,
"eval_rew_align/pearson_usc_franka": 0.9115594502071923,
"eval_p_rank/kendall_last_usc_franka": 0.7916666666666666,
"eval_p_rank/kendall_rewind_last_usc_franka": 0.8333333333333334,
"eval_p_rank/avg_succ_subopt_diff_last_usc_franka": 0.06161930412054062,
"eval_p_rank/min_succ_subopt_diff_last_usc_franka": -0.010589927434921265,
"eval_p_rank/max_succ_subopt_diff_last_usc_franka": 0.17146822810173035,
"eval_p_rank/avg_subopt_fail_diff_last_usc_franka": 0.18651490285992622,
"eval_p_rank/min_subopt_fail_diff_last_usc_franka": 0.0136566162109375,
"eval_p_rank/max_subopt_fail_diff_last_usc_franka": 0.3522116541862488,
"eval_p_rank/avg_succ_fail_diff_last_usc_franka": 0.24813420698046684,
"eval_p_rank/min_succ_fail_diff_last_usc_franka": 0.032290756702423096,
"eval_p_rank/max_succ_fail_diff_last_usc_franka": 0.4191764295101166,
"eval_p_rank/ranking_acc_last_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_all_pairs_last_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_usc_franka": 0.8125,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_usc_franka": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_successful_last_usc_franka": 0.9375,
"eval_p_rank/kendall_avg_usc_franka": 0.7916666666666666,
"eval_p_rank/kendall_rewind_avg_usc_franka": 0.8333333333333334,
"eval_p_rank/avg_succ_subopt_diff_avg_usc_franka": 0.06161930412054062,
"eval_p_rank/min_succ_subopt_diff_avg_usc_franka": -0.010589927434921265,
"eval_p_rank/max_succ_subopt_diff_avg_usc_franka": 0.17146822810173035,
"eval_p_rank/avg_subopt_fail_diff_avg_usc_franka": 0.18651490285992622,
"eval_p_rank/min_subopt_fail_diff_avg_usc_franka": 0.0136566162109375,
"eval_p_rank/max_subopt_fail_diff_avg_usc_franka": 0.3522116541862488,
"eval_p_rank/avg_succ_fail_diff_avg_usc_franka": 0.24813420698046684,
"eval_p_rank/min_succ_fail_diff_avg_usc_franka": 0.032290756702423096,
"eval_p_rank/max_succ_fail_diff_avg_usc_franka": 0.4191764295101166,
"eval_p_rank/ranking_acc_avg_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_all_pairs_avg_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_usc_franka": 0.8125,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_usc_franka": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_usc_franka": 0.9375,
"eval_p_rank/kendall_sum_usc_franka": 0.7916666666666666,
"eval_p_rank/kendall_rewind_sum_usc_franka": 0.8333333333333334,
"eval_p_rank/avg_succ_subopt_diff_sum_usc_franka": 0.06161930412054062,
"eval_p_rank/min_succ_subopt_diff_sum_usc_franka": -0.010589927434921265,
"eval_p_rank/max_succ_subopt_diff_sum_usc_franka": 0.17146822810173035,
"eval_p_rank/avg_subopt_fail_diff_sum_usc_franka": 0.18651490285992622,
"eval_p_rank/min_subopt_fail_diff_sum_usc_franka": 0.0136566162109375,
"eval_p_rank/max_subopt_fail_diff_sum_usc_franka": 0.3522116541862488,
"eval_p_rank/avg_succ_fail_diff_sum_usc_franka": 0.24813420698046684,
"eval_p_rank/min_succ_fail_diff_sum_usc_franka": 0.032290756702423096,
"eval_p_rank/max_succ_fail_diff_sum_usc_franka": 0.4191764295101166,
"eval_p_rank/ranking_acc_sum_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_all_pairs_sum_usc_franka": 0.8958333333333334,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_usc_franka": 0.8125,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_usc_franka": 0.9375,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_usc_franka": 0.9375,
"eval_rew_align/success_auprc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.08961776352588778,
"eval_rew_align/positive_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9,
"eval_rew_align/negative_success_acc_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.9552941176470588,
"eval_rew_align/loss_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 1.5833105087280273,
"eval_rew_align/pearson_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.8841339237987327,
"eval_p_rank/kendall_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
"eval_p_rank/kendall_rewind_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
"eval_p_rank/avg_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
"eval_p_rank/min_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
"eval_p_rank/max_succ_subopt_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
"eval_p_rank/avg_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
"eval_p_rank/min_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
"eval_p_rank/max_subopt_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
"eval_p_rank/avg_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
"eval_p_rank/min_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
"eval_p_rank/max_succ_fail_diff_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
"eval_p_rank/ranking_acc_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_all_pairs_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
"eval_p_rank/ranking_acc_failure_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_last_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
"eval_p_rank/kendall_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
"eval_p_rank/kendall_rewind_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
"eval_p_rank/avg_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
"eval_p_rank/min_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
"eval_p_rank/max_succ_subopt_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
"eval_p_rank/avg_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
"eval_p_rank/min_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
"eval_p_rank/max_subopt_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
"eval_p_rank/avg_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
"eval_p_rank/min_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
"eval_p_rank/max_succ_fail_diff_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
"eval_p_rank/ranking_acc_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_all_pairs_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
"eval_p_rank/kendall_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.3093333333333333,
"eval_p_rank/kendall_rewind_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.5333333333333333,
"eval_p_rank/avg_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.02896499633789066,
"eval_p_rank/min_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.23819030523300178,
"eval_p_rank/max_succ_subopt_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.0715120196342468,
"eval_p_rank/avg_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.20831456266343595,
"eval_p_rank/min_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.16911142468452456,
"eval_p_rank/max_subopt_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4413899302482605,
"eval_p_rank/avg_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.17934956632554527,
"eval_p_rank/min_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": -0.09759940505027775,
"eval_p_rank/max_succ_fail_diff_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.4535657167434692,
"eval_p_rank/ranking_acc_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_all_pairs_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.6546666666666666,
"eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.76,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.744,
"eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_jesbu1_usc_koch_p_ranking_rfm_usc_koch_p_ranking_all": 0.46,
"time/custom_evaluations": 227.5345072869677
}
}