aliangdw's picture
Duplicate from aliangdw/libero_ablation_prog_only_lora_ft_4frames
edf5b57
{
"step": 1000,
"metrics": {
"eval_rew_align/loss_libero_90": 0.07622170671820641,
"eval_rew_align/pearson_libero_90": 0.9599514076421588,
"eval_p_rank/kendall_last_libero_90": 0.4991780821917808,
"eval_p_rank/kendall_rewind_last_libero_90": 0.8356164383561644,
"eval_p_rank/avg_succ_fail_diff_last_libero_90": 0.08843660197437626,
"eval_p_rank/min_succ_fail_diff_last_libero_90": -0.08279139995574952,
"eval_p_rank/max_succ_fail_diff_last_libero_90": 0.2787765145301819,
"eval_p_rank/ranking_acc_last_libero_90": 0.7495890410958904,
"eval_p_rank/ranking_acc_all_pairs_last_libero_90": 0.7495890410958904,
"eval_p_rank/ranking_acc_failure_vs_successful_last_libero_90": 0.7495890410958904,
"eval_p_rank/kendall_avg_libero_90": 0.4991780821917808,
"eval_p_rank/kendall_rewind_avg_libero_90": 0.8356164383561644,
"eval_p_rank/avg_succ_fail_diff_avg_libero_90": 0.08843660197437626,
"eval_p_rank/min_succ_fail_diff_avg_libero_90": -0.08279139995574952,
"eval_p_rank/max_succ_fail_diff_avg_libero_90": 0.2787765145301819,
"eval_p_rank/ranking_acc_avg_libero_90": 0.7495890410958904,
"eval_p_rank/ranking_acc_all_pairs_avg_libero_90": 0.7495890410958904,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_libero_90": 0.7495890410958904,
"eval_p_rank/kendall_sum_libero_90": 0.4991780821917808,
"eval_p_rank/kendall_rewind_sum_libero_90": 0.8356164383561644,
"eval_p_rank/avg_succ_fail_diff_sum_libero_90": 0.08843660197437626,
"eval_p_rank/min_succ_fail_diff_sum_libero_90": -0.08279139995574952,
"eval_p_rank/max_succ_fail_diff_sum_libero_90": 0.2787765145301819,
"eval_p_rank/ranking_acc_sum_libero_90": 0.7495890410958904,
"eval_p_rank/ranking_acc_all_pairs_sum_libero_90": 0.7495890410958904,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_libero_90": 0.7495890410958904,
"eval_rew_align/loss_libero_10": 0.057464000582695005,
"eval_rew_align/pearson_libero_10": 0.9774006704323236,
"eval_p_rank/kendall_last_libero_10": 0.76,
"eval_p_rank/kendall_rewind_last_libero_10": 1.0,
"eval_p_rank/avg_succ_fail_diff_last_libero_10": 0.14927782282233237,
"eval_p_rank/min_succ_fail_diff_last_libero_10": 0.02288996577262875,
"eval_p_rank/max_succ_fail_diff_last_libero_10": 0.20963821858167647,
"eval_p_rank/ranking_acc_last_libero_10": 0.88,
"eval_p_rank/ranking_acc_all_pairs_last_libero_10": 0.88,
"eval_p_rank/ranking_acc_failure_vs_successful_last_libero_10": 0.88,
"eval_p_rank/kendall_avg_libero_10": 0.76,
"eval_p_rank/kendall_rewind_avg_libero_10": 1.0,
"eval_p_rank/avg_succ_fail_diff_avg_libero_10": 0.14927782282233237,
"eval_p_rank/min_succ_fail_diff_avg_libero_10": 0.02288996577262875,
"eval_p_rank/max_succ_fail_diff_avg_libero_10": 0.20963821858167647,
"eval_p_rank/ranking_acc_avg_libero_10": 0.88,
"eval_p_rank/ranking_acc_all_pairs_avg_libero_10": 0.88,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_libero_10": 0.88,
"eval_p_rank/kendall_sum_libero_10": 0.76,
"eval_p_rank/kendall_rewind_sum_libero_10": 1.0,
"eval_p_rank/avg_succ_fail_diff_sum_libero_10": 0.14927782282233237,
"eval_p_rank/min_succ_fail_diff_sum_libero_10": 0.02288996577262875,
"eval_p_rank/max_succ_fail_diff_sum_libero_10": 0.20963821858167647,
"eval_p_rank/ranking_acc_sum_libero_10": 0.88,
"eval_p_rank/ranking_acc_all_pairs_sum_libero_10": 0.88,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_libero_10": 0.88,
"time/custom_evaluations": 50.40835566003807
}
}