OmniScore-deberta-v3 / metrics_final.json
Firoj's picture
Upload trained DeBERTa-v3 OmniScore model artifacts
fed9f16 verified
{
"mse_overall": 1.4024023678171664,
"mse_informativeness": 2.461441272473649,
"mse_clarity": 1.2284622644467016,
"mse_plausibility": 1.0879825710281692,
"mse_faithfulness": 0.8317233633201462,
"rmse_overall": 1.158077748242096,
"rmse_informativeness": 1.5688981077411144,
"rmse_clarity": 1.1083601690996936,
"rmse_plausibility": 1.0430640301669736,
"rmse_faithfulness": 0.9119886859606023,
"mae_overall": 0.899240226476647,
"mae_informativeness": 1.2259089428195094,
"mae_clarity": 0.8495489541516034,
"mae_plausibility": 0.7888888825356873,
"mae_faithfulness": 0.7326141263997884,
"pearson_informativeness": 0.011997820511715235,
"pearson_pval_informativeness": 0.11588154802545263,
"pearson_clarity": 0.12849459314220896,
"pearson_pval_clarity": 3.890738005032354e-64,
"pearson_plausibility": 0.11834201691919606,
"pearson_pval_plausibility": 1.304550908503357e-54,
"pearson_faithfulness": 0.18031506133750783,
"pearson_pval_faithfulness": 1.8522469152476205e-125,
"pearson_overall": 0.10978737297765703,
"spearman_informativeness": 0.014049690666530525,
"spearman_pval_informativeness": 0.06558977589044894,
"spearman_clarity": 0.07886223322374226,
"spearman_pval_clarity": 4.161494147791507e-25,
"spearman_plausibility": 0.028879234142028146,
"spearman_pval_plausibility": 0.0001535624767660131,
"spearman_faithfulness": 0.13269205496372913,
"spearman_pval_faithfulness": 2.6118969073270385e-68,
"spearman_overall": 0.06362080324900751,
"r2_informativeness": -2.7357081705124537,
"r2_clarity": -0.6323411816544322,
"r2_plausibility": -0.7522394899465792,
"r2_faithfulness": -0.4469073644134465,
"r2_overall": -1.141799051631728,
"accuracy_0.5_overall": 0.3543085880640466,
"accuracy_0.5_informativeness": 0.3000873362445415,
"accuracy_0.5_clarity": 0.35656477438136824,
"accuracy_0.5_plausibility": 0.3750218340611354,
"accuracy_0.5_faithfulness": 0.3855604075691412,
"accuracy_1.0_overall": 0.7163755458515284,
"accuracy_1.0_informativeness": 0.4880349344978166,
"accuracy_1.0_clarity": 0.7710625909752548,
"accuracy_1.0_plausibility": 0.8298107714701601,
"accuracy_1.0_faithfulness": 0.7765938864628821,
"exact_match_overall": 0.3543085880640466,
"exact_match_informativeness": 0.3000873362445415,
"exact_match_clarity": 0.35656477438136824,
"exact_match_plausibility": 0.3750218340611354,
"exact_match_faithfulness": 0.3855604075691412,
"exact_match_all_scores": 0.029286754002911208,
"max_error_overall": 3.9264787435531616,
"max_error_informativeness": 3.946486473083496,
"max_error_clarity": 3.9604382514953613,
"max_error_plausibility": 3.9576148986816406,
"max_error_faithfulness": 3.8413753509521484,
"bias_overall": 0.11064034007522217,
"bias_informativeness": -0.8433933716916968,
"bias_clarity": 0.6641923878737759,
"bias_plausibility": 0.6784779361935855,
"bias_faithfulness": -0.05671559207477597,
"pred_mean": 4.212707297862704,
"pred_std": 0.9062484396734384,
"pred_min": 1.2022016048431396,
"pred_max": 4.963072776794434,
"label_mean": 4.102066957787482,
"label_std": 0.8079869363161494,
"label_min": 1.0,
"label_max": 5.0,
"num_samples": 17175
}