HenryZhang's picture
upload rewind preference model
7f5f327 verified
{
"step": 250,
"metrics": {
"eval_rew_align/mse_roboarena_eval": 0.0572185566740965,
"eval_rew_align/pearson_roboarena_eval": -0.20521791406560833,
"eval_p_rank/spearman_roboarena_eval": 0.1486367653896934,
"eval_p_rank/spearman_rewind_roboarena_eval": 0.285297764198399,
"eval_p_rank/ranking_acc_roboarena_eval": 0.5525760135135135,
"eval_p_rank/ranking_total_pairs_roboarena_eval": 4736.0,
"eval_rew_align/mse_mw_eval": 0.028775711768743873,
"eval_rew_align/pearson_mw_eval": -0.2622550899752058,
"eval_p_rank/spearman_mw_eval": 0.21948339897935923,
"eval_p_rank/spearman_rewind_mw_eval": 0.23529411764705882,
"eval_p_rank/avg_succ_subopt_diff_mw_eval": 0.00025275735294118495,
"eval_p_rank/min_succ_subopt_diff_mw_eval": -0.013867187499999989,
"eval_p_rank/max_succ_subopt_diff_mw_eval": 0.017187500000000022,
"eval_p_rank/avg_subopt_fail_diff_mw_eval": 0.0027573529411764708,
"eval_p_rank/min_subopt_fail_diff_mw_eval": -0.0166015625,
"eval_p_rank/max_subopt_fail_diff_mw_eval": 0.0234375,
"eval_p_rank/avg_succ_fail_diff_mw_eval": 0.0030101102941176554,
"eval_p_rank/min_succ_fail_diff_mw_eval": -0.012890624999999989,
"eval_p_rank/max_succ_fail_diff_mw_eval": 0.02128906250000001,
"eval_p_rank/ranking_acc_mw_eval": 0.6078431372549019,
"eval_p_rank/ranking_total_pairs_mw_eval": 51.0,
"eval_p_rank/spearman_utd_so101": -0.02834936490538903,
"eval_p_rank/spearman_rewind_utd_so101": -0.1,
"eval_p_rank/avg_succ_subopt_diff_utd_so101": -0.00234375,
"eval_p_rank/min_succ_subopt_diff_utd_so101": -0.029296875,
"eval_p_rank/max_succ_subopt_diff_utd_so101": 0.029296875,
"eval_p_rank/avg_subopt_fail_diff_utd_so101": 0.0,
"eval_p_rank/min_subopt_fail_diff_utd_so101": -0.0361328125,
"eval_p_rank/max_subopt_fail_diff_utd_so101": 0.0341796875,
"eval_p_rank/avg_succ_fail_diff_utd_so101": -0.00234375,
"eval_p_rank/min_succ_fail_diff_utd_so101": -0.0224609375,
"eval_p_rank/max_succ_fail_diff_utd_so101": 0.021484375,
"eval_p_rank/ranking_acc_utd_so101": 0.4666666666666667,
"eval_p_rank/ranking_total_pairs_utd_so101": 30.0,
"eval_p_rank/spearman_usc_franka": 0.359375,
"eval_p_rank/spearman_rewind_usc_franka": 0.25,
"eval_p_rank/avg_succ_subopt_diff_usc_franka": 0.003173828125,
"eval_p_rank/min_succ_subopt_diff_usc_franka": -0.001953125,
"eval_p_rank/max_succ_subopt_diff_usc_franka": 0.0107421875,
"eval_p_rank/avg_subopt_fail_diff_usc_franka": 0.00244140625,
"eval_p_rank/min_subopt_fail_diff_usc_franka": -0.0107421875,
"eval_p_rank/max_subopt_fail_diff_usc_franka": 0.017578125,
"eval_p_rank/avg_succ_fail_diff_usc_franka": 0.005615234375,
"eval_p_rank/min_succ_fail_diff_usc_franka": -0.0126953125,
"eval_p_rank/max_succ_fail_diff_usc_franka": 0.0166015625,
"eval_p_rank/ranking_acc_usc_franka": 0.5833333333333334,
"eval_p_rank/ranking_total_pairs_usc_franka": 12.0,
"eval_p_rank/spearman_usc_xarm": 0.05691772515768497,
"eval_p_rank/spearman_rewind_usc_xarm": 0.08333333333333333,
"eval_p_rank/avg_succ_subopt_diff_usc_xarm": 0.005045572916666667,
"eval_p_rank/min_succ_subopt_diff_usc_xarm": -0.015625,
"eval_p_rank/max_succ_subopt_diff_usc_xarm": 0.0185546875,
"eval_p_rank/avg_subopt_fail_diff_usc_xarm": -0.004069010416666667,
"eval_p_rank/min_subopt_fail_diff_usc_xarm": -0.0205078125,
"eval_p_rank/max_subopt_fail_diff_usc_xarm": 0.01171875,
"eval_p_rank/avg_succ_fail_diff_usc_xarm": 0.0009765625,
"eval_p_rank/min_succ_fail_diff_usc_xarm": -0.01171875,
"eval_p_rank/max_succ_fail_diff_usc_xarm": 0.013671875,
"eval_p_rank/ranking_acc_usc_xarm": 0.5555555555555556,
"eval_p_rank/ranking_total_pairs_usc_xarm": 18.0,
"time/custom_evaluations": 45.65803809789941
}
}