| { |
| "epoch": 15.0, |
| "eval_alignment/accuracy": 0.8828125, |
| "eval_alignment/f1": 0.7796817625458996, |
| "eval_alignment/precision": 0.7538461538461538, |
| "eval_alignment/recall": 0.8073510773130546, |
| "eval_correction/exact_match_rate": 0.3883273557222041, |
| "eval_correction/f1": 0.4883359253499223, |
| "eval_correction/llm_avg_score": 0.3883273557222041, |
| "eval_correction/precision": 0.8386752136752137, |
| "eval_correction/recall": 0.3444493198771391, |
| "eval_error_type/accuracy": 0.7375302663438257, |
| "eval_error_type/f1": 0.5588883765184088, |
| "eval_error_type/precision": 0.5768808510105404, |
| "eval_error_type/recall": 0.5512884586583426, |
| "eval_error_type_detail/Auxiliary_Constructi/f1": 0.6666666666666666, |
| "eval_error_type_detail/Auxiliary_Constructi/precision": 0.6363636363636364, |
| "eval_error_type_detail/Auxiliary_Constructi/recall": 0.7, |
| "eval_error_type_detail/Bound_Constraint_Err/f1": 0.5314685314685313, |
| "eval_error_type_detail/Bound_Constraint_Err/precision": 0.5277777777777778, |
| "eval_error_type_detail/Bound_Constraint_Err/recall": 0.5352112676056338, |
| "eval_error_type_detail/Cardinality_Errors/f1": 0.6666666666666666, |
| "eval_error_type_detail/Cardinality_Errors/precision": 1.0, |
| "eval_error_type_detail/Cardinality_Errors/recall": 0.5, |
| "eval_error_type_detail/Coefficient/Constant/f1": 0.8019323671497584, |
| "eval_error_type_detail/Coefficient/Constant/precision": 0.83, |
| "eval_error_type_detail/Coefficient/Constant/recall": 0.7757009345794392, |
| "eval_error_type_detail/Conclusion_Errors/f1": 0.5346534653465347, |
| "eval_error_type_detail/Conclusion_Errors/precision": 0.5142857142857142, |
| "eval_error_type_detail/Conclusion_Errors/recall": 0.5567010309278351, |
| "eval_error_type_detail/Domain_Constraint_Er/f1": 0.35714285714285715, |
| "eval_error_type_detail/Domain_Constraint_Er/precision": 0.3409090909090909, |
| "eval_error_type_detail/Domain_Constraint_Er/recall": 0.375, |
| "eval_error_type_detail/Extremum_Concept_Err/f1": 0.7631578947368421, |
| "eval_error_type_detail/Extremum_Concept_Err/precision": 0.7435897435897436, |
| "eval_error_type_detail/Extremum_Concept_Err/recall": 0.7837837837837838, |
| "eval_error_type_detail/Function_Confusion/f1": 0.4444444444444444, |
| "eval_error_type_detail/Function_Confusion/precision": 0.5333333333333333, |
| "eval_error_type_detail/Function_Confusion/recall": 0.38095238095238093, |
| "eval_error_type_detail/Geometric_Relationsh/f1": 0.0, |
| "eval_error_type_detail/Geometric_Relationsh/precision": 0.0, |
| "eval_error_type_detail/Geometric_Relationsh/recall": 0.0, |
| "eval_error_type_detail/Incorrect_Premise/f1": 0.5641025641025642, |
| "eval_error_type_detail/Incorrect_Premise/precision": 0.5238095238095238, |
| "eval_error_type_detail/Incorrect_Premise/recall": 0.6111111111111112, |
| "eval_error_type_detail/Index/Subscript_Erro/f1": 0.6222222222222223, |
| "eval_error_type_detail/Index/Subscript_Erro/precision": 0.6363636363636364, |
| "eval_error_type_detail/Index/Subscript_Erro/recall": 0.6086956521739131, |
| "eval_error_type_detail/Infinity_Misinterpre/f1": 0.0, |
| "eval_error_type_detail/Infinity_Misinterpre/precision": 0.0, |
| "eval_error_type_detail/Infinity_Misinterpre/recall": 0.0, |
| "eval_error_type_detail/Integration/Differen/f1": 0.0, |
| "eval_error_type_detail/Integration/Differen/precision": 0.0, |
| "eval_error_type_detail/Integration/Differen/recall": 0.0, |
| "eval_error_type_detail/Logical_Connective_M/f1": 0.845771144278607, |
| "eval_error_type_detail/Logical_Connective_M/precision": 0.85, |
| "eval_error_type_detail/Logical_Connective_M/recall": 0.8415841584158416, |
| "eval_error_type_detail/Missing_Premise/f1": 0.7941176470588236, |
| "eval_error_type_detail/Missing_Premise/precision": 0.788961038961039, |
| "eval_error_type_detail/Missing_Premise/recall": 0.7993421052631579, |
| "eval_error_type_detail/Object_Type_Errors/f1": 0.6666666666666667, |
| "eval_error_type_detail/Object_Type_Errors/precision": 0.7647058823529411, |
| "eval_error_type_detail/Object_Type_Errors/recall": 0.5909090909090909, |
| "eval_error_type_detail/Operator_Confusion/f1": 0.634920634920635, |
| "eval_error_type_detail/Operator_Confusion/precision": 0.7407407407407407, |
| "eval_error_type_detail/Operator_Confusion/recall": 0.5555555555555556, |
| "eval_error_type_detail/Operator_Precedence_/f1": 0.9073170731707318, |
| "eval_error_type_detail/Operator_Precedence_/precision": 0.8773584905660378, |
| "eval_error_type_detail/Operator_Precedence_/recall": 0.9393939393939394, |
| "eval_error_type_detail/Partial_Order_Confus/f1": 0.8108108108108109, |
| "eval_error_type_detail/Partial_Order_Confus/precision": 0.84375, |
| "eval_error_type_detail/Partial_Order_Confus/recall": 0.7803468208092486, |
| "eval_error_type_detail/Positivity_Constrain/f1": 0.793536804308797, |
| "eval_error_type_detail/Positivity_Constrain/precision": 0.7809187279151943, |
| "eval_error_type_detail/Positivity_Constrain/recall": 0.8065693430656934, |
| "eval_error_type_detail/Quantifier_Strengthe/f1": 0.7148936170212765, |
| "eval_error_type_detail/Quantifier_Strengthe/precision": 0.7368421052631579, |
| "eval_error_type_detail/Quantifier_Strengthe/recall": 0.6942148760330579, |
| "eval_error_type_detail/Quantifier_Weakening/f1": 0.8196721311475409, |
| "eval_error_type_detail/Quantifier_Weakening/precision": 0.8503401360544217, |
| "eval_error_type_detail/Quantifier_Weakening/recall": 0.7911392405063291, |
| "eval_error_type_detail/Range_Error/f1": 0.6768060836501901, |
| "eval_error_type_detail/Range_Error/precision": 0.6793893129770993, |
| "eval_error_type_detail/Range_Error/recall": 0.6742424242424242, |
| "eval_error_type_detail/Range_Shift/f1": 0.761904761904762, |
| "eval_error_type_detail/Range_Shift/precision": 0.7096774193548387, |
| "eval_error_type_detail/Range_Shift/recall": 0.822429906542056, |
| "eval_error_type_detail/Redundant_Premise/f1": 0.7111111111111111, |
| "eval_error_type_detail/Redundant_Premise/precision": 0.6666666666666666, |
| "eval_error_type_detail/Redundant_Premise/recall": 0.7619047619047619, |
| "eval_error_type_detail/Truncation_Error/f1": 0.0, |
| "eval_error_type_detail/Truncation_Error/precision": 0.0, |
| "eval_error_type_detail/Truncation_Error/recall": 0.0, |
| "eval_error_type_detail/Variable_Constraint_/f1": 0.0, |
| "eval_error_type_detail/Variable_Constraint_/precision": 0.0, |
| "eval_error_type_detail/Variable_Constraint_/recall": 0.0, |
| "eval_invalid/count": 10, |
| "eval_invalid/rate": 0.0032552083333333335, |
| "eval_location_cascaded/binary_f1": 0.6974262397991211, |
| "eval_location_cascaded/binary_precision": 1.0, |
| "eval_location_cascaded/binary_recall": 0.535421686746988, |
| "eval_location_cascaded/match_rate": 0.535421686746988, |
| "eval_location_given_correct_type/binary_f1": 0.8435839028094153, |
| "eval_location_given_correct_type/binary_precision": 1.0, |
| "eval_location_given_correct_type/binary_recall": 0.7294812869336835, |
| "eval_location_given_correct_type/match_rate": 0.7294812869336835, |
| "eval_runtime": 3052.7449, |
| "eval_samples_per_second": 0.946, |
| "eval_steps_per_second": 0.002 |
| } |