Robometer-4B-LIBERO / metrics.json
aliangdw's picture
Duplicate from aliangdw/libero_ablation_prog_pref_with_fail_lora_ft_4frames
e179783
{
"step": 700,
"metrics": {
"eval_rew_align/loss_libero_90": 0.11321220770478249,
"eval_rew_align/pearson_libero_90": 0.9301406673122348,
"eval_p_rank/kendall_last_libero_90": 0.8904109589041096,
"eval_p_rank/kendall_rewind_last_libero_90": 1.0,
"eval_p_rank/avg_succ_fail_diff_last_libero_90": 0.36485108353503765,
"eval_p_rank/min_succ_fail_diff_last_libero_90": 0.005788075923919722,
"eval_p_rank/max_succ_fail_diff_last_libero_90": 0.704643115401268,
"eval_p_rank/ranking_acc_last_libero_90": 0.9452054794520548,
"eval_p_rank/ranking_acc_all_pairs_last_libero_90": 0.9452054794520548,
"eval_p_rank/ranking_acc_failure_vs_successful_last_libero_90": 0.9452054794520548,
"eval_p_rank/kendall_avg_libero_90": 0.8904109589041096,
"eval_p_rank/kendall_rewind_avg_libero_90": 1.0,
"eval_p_rank/avg_succ_fail_diff_avg_libero_90": 0.36485108353503765,
"eval_p_rank/min_succ_fail_diff_avg_libero_90": 0.005788075923919722,
"eval_p_rank/max_succ_fail_diff_avg_libero_90": 0.704643115401268,
"eval_p_rank/ranking_acc_avg_libero_90": 0.9452054794520548,
"eval_p_rank/ranking_acc_all_pairs_avg_libero_90": 0.9452054794520548,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_libero_90": 0.9452054794520548,
"eval_p_rank/kendall_sum_libero_90": 0.8904109589041096,
"eval_p_rank/kendall_rewind_sum_libero_90": 1.0,
"eval_p_rank/avg_succ_fail_diff_sum_libero_90": 0.36485108353503765,
"eval_p_rank/min_succ_fail_diff_sum_libero_90": 0.005788075923919722,
"eval_p_rank/max_succ_fail_diff_sum_libero_90": 0.704643115401268,
"eval_p_rank/ranking_acc_sum_libero_90": 0.9452054794520548,
"eval_p_rank/ranking_acc_all_pairs_sum_libero_90": 0.9452054794520548,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_libero_90": 0.9452054794520548,
"eval_rew_align/loss_libero_10": 0.1162563480436802,
"eval_rew_align/pearson_libero_10": 0.962842022665807,
"eval_p_rank/kendall_last_libero_10": 0.992,
"eval_p_rank/kendall_rewind_last_libero_10": 1.0,
"eval_p_rank/avg_succ_fail_diff_last_libero_10": 0.4155293563008309,
"eval_p_rank/min_succ_fail_diff_last_libero_10": 0.29186664223670966,
"eval_p_rank/max_succ_fail_diff_last_libero_10": 0.5371088057756424,
"eval_p_rank/ranking_acc_last_libero_10": 0.996,
"eval_p_rank/ranking_acc_all_pairs_last_libero_10": 0.996,
"eval_p_rank/ranking_acc_failure_vs_successful_last_libero_10": 0.996,
"eval_p_rank/kendall_avg_libero_10": 0.992,
"eval_p_rank/kendall_rewind_avg_libero_10": 1.0,
"eval_p_rank/avg_succ_fail_diff_avg_libero_10": 0.4155293563008309,
"eval_p_rank/min_succ_fail_diff_avg_libero_10": 0.29186664223670966,
"eval_p_rank/max_succ_fail_diff_avg_libero_10": 0.5371088057756424,
"eval_p_rank/ranking_acc_avg_libero_10": 0.996,
"eval_p_rank/ranking_acc_all_pairs_avg_libero_10": 0.996,
"eval_p_rank/ranking_acc_failure_vs_successful_avg_libero_10": 0.996,
"eval_p_rank/kendall_sum_libero_10": 0.992,
"eval_p_rank/kendall_rewind_sum_libero_10": 1.0,
"eval_p_rank/avg_succ_fail_diff_sum_libero_10": 0.4155293563008309,
"eval_p_rank/min_succ_fail_diff_sum_libero_10": 0.29186664223670966,
"eval_p_rank/max_succ_fail_diff_sum_libero_10": 0.5371088057756424,
"eval_p_rank/ranking_acc_sum_libero_10": 0.996,
"eval_p_rank/ranking_acc_all_pairs_sum_libero_10": 0.996,
"eval_p_rank/ranking_acc_failure_vs_successful_sum_libero_10": 0.996,
"time/custom_evaluations": 50.66930902400054
}
}