jameVee's picture
Add trained hallucination-detector classifiers
131239b verified
{
"lexical_features": [
"num_spans",
"max_span_score",
"mean_span_score",
"span_char_fraction",
"max_span_len"
],
"lettuce_features": [
"num_spans",
"max_span_score",
"mean_span_score",
"span_char_fraction",
"max_span_len"
],
"lookback_features": [
"mean_ratio",
"min_ratio",
"frac_low_03",
"frac_low_02",
"std_ratio",
"bottom3_mean",
"longest_low_streak",
"mean_ratio_numeric",
"frac_low_numeric",
"num_spans",
"max_span_score",
"mean_span_score",
"span_char_fraction",
"max_span_len"
],
"seed": 1241,
"split": {
"train": 3308,
"test": 828
},
"backbone_lettuce": "KRLabsOrg/lettucedect-base-modernbert-en-v1",
"backbone_lookback": "Qwen/Qwen2.5-0.5B",
"source_dataset": "jameVee/ToolACE-Hallucination",
"hallucination_types": [
"missing_tool",
"overgeneration",
"tool_output_contradiction"
],
"metrics_test": {
"lexical_span_verifier": {
"accuracy": 0.5664251207729468,
"f1": 0.5232403718459495,
"auroc": 0.6025060625811036,
"span_f1": 0.052821437773769644
},
"lettuce_span_supervised": {
"accuracy": 0.677536231884058,
"f1": 0.6317241379310344,
"auroc": 0.7214236643238388,
"span_f1": 0.20837031474021983
},
"lookback_span_supervised": {
"accuracy": 0.4830917874396135,
"f1": 0.5532359081419624,
"auroc": 0.5116260635205834,
"span_f1": 0.0
},
"soft_vote_ensemble": {
"accuracy": 0.6763285024154589,
"f1": 0.6598984771573604,
"auroc": 0.7214089849507066,
"span_f1": 0.17918399775669488
}
}
}