{ "lexical_features": [ "num_spans", "max_span_score", "mean_span_score", "span_char_fraction", "max_span_len" ], "lettuce_features": [ "num_spans", "max_span_score", "mean_span_score", "span_char_fraction", "max_span_len" ], "lookback_features": [ "mean_ratio", "min_ratio", "frac_low_03", "frac_low_02", "std_ratio", "bottom3_mean", "longest_low_streak", "mean_ratio_numeric", "frac_low_numeric", "num_spans", "max_span_score", "mean_span_score", "span_char_fraction", "max_span_len" ], "seed": 1241, "split": { "train": 3308, "test": 828 }, "backbone_lettuce": "KRLabsOrg/lettucedect-base-modernbert-en-v1", "backbone_lookback": "Qwen/Qwen2.5-0.5B", "source_dataset": "jameVee/ToolACE-Hallucination", "hallucination_types": [ "missing_tool", "overgeneration", "tool_output_contradiction" ], "metrics_test": { "lexical_span_verifier": { "accuracy": 0.5664251207729468, "f1": 0.5232403718459495, "auroc": 0.6025060625811036, "span_f1": 0.052821437773769644 }, "lettuce_span_supervised": { "accuracy": 0.677536231884058, "f1": 0.6317241379310344, "auroc": 0.7214236643238388, "span_f1": 0.20837031474021983 }, "lookback_span_supervised": { "accuracy": 0.4830917874396135, "f1": 0.5532359081419624, "auroc": 0.5116260635205834, "span_f1": 0.0 }, "soft_vote_ensemble": { "accuracy": 0.6763285024154589, "f1": 0.6598984771573604, "auroc": 0.7214089849507066, "span_f1": 0.17918399775669488 } } }