FormalRx-4B / eval_results.json
hcWang942's picture
Add files using upload-large-folder tool
13add57 verified
{
"epoch": 15.0,
"eval_alignment/accuracy": 0.8828125,
"eval_alignment/f1": 0.7796817625458996,
"eval_alignment/precision": 0.7538461538461538,
"eval_alignment/recall": 0.8073510773130546,
"eval_correction/exact_match_rate": 0.3883273557222041,
"eval_correction/f1": 0.4883359253499223,
"eval_correction/llm_avg_score": 0.3883273557222041,
"eval_correction/precision": 0.8386752136752137,
"eval_correction/recall": 0.3444493198771391,
"eval_error_type/accuracy": 0.7375302663438257,
"eval_error_type/f1": 0.5588883765184088,
"eval_error_type/precision": 0.5768808510105404,
"eval_error_type/recall": 0.5512884586583426,
"eval_error_type_detail/Auxiliary_Constructi/f1": 0.6666666666666666,
"eval_error_type_detail/Auxiliary_Constructi/precision": 0.6363636363636364,
"eval_error_type_detail/Auxiliary_Constructi/recall": 0.7,
"eval_error_type_detail/Bound_Constraint_Err/f1": 0.5314685314685313,
"eval_error_type_detail/Bound_Constraint_Err/precision": 0.5277777777777778,
"eval_error_type_detail/Bound_Constraint_Err/recall": 0.5352112676056338,
"eval_error_type_detail/Cardinality_Errors/f1": 0.6666666666666666,
"eval_error_type_detail/Cardinality_Errors/precision": 1.0,
"eval_error_type_detail/Cardinality_Errors/recall": 0.5,
"eval_error_type_detail/Coefficient/Constant/f1": 0.8019323671497584,
"eval_error_type_detail/Coefficient/Constant/precision": 0.83,
"eval_error_type_detail/Coefficient/Constant/recall": 0.7757009345794392,
"eval_error_type_detail/Conclusion_Errors/f1": 0.5346534653465347,
"eval_error_type_detail/Conclusion_Errors/precision": 0.5142857142857142,
"eval_error_type_detail/Conclusion_Errors/recall": 0.5567010309278351,
"eval_error_type_detail/Domain_Constraint_Er/f1": 0.35714285714285715,
"eval_error_type_detail/Domain_Constraint_Er/precision": 0.3409090909090909,
"eval_error_type_detail/Domain_Constraint_Er/recall": 0.375,
"eval_error_type_detail/Extremum_Concept_Err/f1": 0.7631578947368421,
"eval_error_type_detail/Extremum_Concept_Err/precision": 0.7435897435897436,
"eval_error_type_detail/Extremum_Concept_Err/recall": 0.7837837837837838,
"eval_error_type_detail/Function_Confusion/f1": 0.4444444444444444,
"eval_error_type_detail/Function_Confusion/precision": 0.5333333333333333,
"eval_error_type_detail/Function_Confusion/recall": 0.38095238095238093,
"eval_error_type_detail/Geometric_Relationsh/f1": 0.0,
"eval_error_type_detail/Geometric_Relationsh/precision": 0.0,
"eval_error_type_detail/Geometric_Relationsh/recall": 0.0,
"eval_error_type_detail/Incorrect_Premise/f1": 0.5641025641025642,
"eval_error_type_detail/Incorrect_Premise/precision": 0.5238095238095238,
"eval_error_type_detail/Incorrect_Premise/recall": 0.6111111111111112,
"eval_error_type_detail/Index/Subscript_Erro/f1": 0.6222222222222223,
"eval_error_type_detail/Index/Subscript_Erro/precision": 0.6363636363636364,
"eval_error_type_detail/Index/Subscript_Erro/recall": 0.6086956521739131,
"eval_error_type_detail/Infinity_Misinterpre/f1": 0.0,
"eval_error_type_detail/Infinity_Misinterpre/precision": 0.0,
"eval_error_type_detail/Infinity_Misinterpre/recall": 0.0,
"eval_error_type_detail/Integration/Differen/f1": 0.0,
"eval_error_type_detail/Integration/Differen/precision": 0.0,
"eval_error_type_detail/Integration/Differen/recall": 0.0,
"eval_error_type_detail/Logical_Connective_M/f1": 0.845771144278607,
"eval_error_type_detail/Logical_Connective_M/precision": 0.85,
"eval_error_type_detail/Logical_Connective_M/recall": 0.8415841584158416,
"eval_error_type_detail/Missing_Premise/f1": 0.7941176470588236,
"eval_error_type_detail/Missing_Premise/precision": 0.788961038961039,
"eval_error_type_detail/Missing_Premise/recall": 0.7993421052631579,
"eval_error_type_detail/Object_Type_Errors/f1": 0.6666666666666667,
"eval_error_type_detail/Object_Type_Errors/precision": 0.7647058823529411,
"eval_error_type_detail/Object_Type_Errors/recall": 0.5909090909090909,
"eval_error_type_detail/Operator_Confusion/f1": 0.634920634920635,
"eval_error_type_detail/Operator_Confusion/precision": 0.7407407407407407,
"eval_error_type_detail/Operator_Confusion/recall": 0.5555555555555556,
"eval_error_type_detail/Operator_Precedence_/f1": 0.9073170731707318,
"eval_error_type_detail/Operator_Precedence_/precision": 0.8773584905660378,
"eval_error_type_detail/Operator_Precedence_/recall": 0.9393939393939394,
"eval_error_type_detail/Partial_Order_Confus/f1": 0.8108108108108109,
"eval_error_type_detail/Partial_Order_Confus/precision": 0.84375,
"eval_error_type_detail/Partial_Order_Confus/recall": 0.7803468208092486,
"eval_error_type_detail/Positivity_Constrain/f1": 0.793536804308797,
"eval_error_type_detail/Positivity_Constrain/precision": 0.7809187279151943,
"eval_error_type_detail/Positivity_Constrain/recall": 0.8065693430656934,
"eval_error_type_detail/Quantifier_Strengthe/f1": 0.7148936170212765,
"eval_error_type_detail/Quantifier_Strengthe/precision": 0.7368421052631579,
"eval_error_type_detail/Quantifier_Strengthe/recall": 0.6942148760330579,
"eval_error_type_detail/Quantifier_Weakening/f1": 0.8196721311475409,
"eval_error_type_detail/Quantifier_Weakening/precision": 0.8503401360544217,
"eval_error_type_detail/Quantifier_Weakening/recall": 0.7911392405063291,
"eval_error_type_detail/Range_Error/f1": 0.6768060836501901,
"eval_error_type_detail/Range_Error/precision": 0.6793893129770993,
"eval_error_type_detail/Range_Error/recall": 0.6742424242424242,
"eval_error_type_detail/Range_Shift/f1": 0.761904761904762,
"eval_error_type_detail/Range_Shift/precision": 0.7096774193548387,
"eval_error_type_detail/Range_Shift/recall": 0.822429906542056,
"eval_error_type_detail/Redundant_Premise/f1": 0.7111111111111111,
"eval_error_type_detail/Redundant_Premise/precision": 0.6666666666666666,
"eval_error_type_detail/Redundant_Premise/recall": 0.7619047619047619,
"eval_error_type_detail/Truncation_Error/f1": 0.0,
"eval_error_type_detail/Truncation_Error/precision": 0.0,
"eval_error_type_detail/Truncation_Error/recall": 0.0,
"eval_error_type_detail/Variable_Constraint_/f1": 0.0,
"eval_error_type_detail/Variable_Constraint_/precision": 0.0,
"eval_error_type_detail/Variable_Constraint_/recall": 0.0,
"eval_invalid/count": 10,
"eval_invalid/rate": 0.0032552083333333335,
"eval_location_cascaded/binary_f1": 0.6974262397991211,
"eval_location_cascaded/binary_precision": 1.0,
"eval_location_cascaded/binary_recall": 0.535421686746988,
"eval_location_cascaded/match_rate": 0.535421686746988,
"eval_location_given_correct_type/binary_f1": 0.8435839028094153,
"eval_location_given_correct_type/binary_precision": 1.0,
"eval_location_given_correct_type/binary_recall": 0.7294812869336835,
"eval_location_given_correct_type/match_rate": 0.7294812869336835,
"eval_runtime": 3052.7449,
"eval_samples_per_second": 0.946,
"eval_steps_per_second": 0.002
}