Token Classification
Transformers
Safetensors
English
deberta-v2
hallucination-detection
span-detection
tool-use
deberta-v3
ragtruth
Instructions to use Ali-Bhai/deberta-tool-hallucination-span-detector with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Ali-Bhai/deberta-tool-hallucination-span-detector with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("token-classification", model="Ali-Bhai/deberta-tool-hallucination-span-detector")# Load model directly from transformers import AutoTokenizer, AutoModelForTokenClassification tokenizer = AutoTokenizer.from_pretrained("Ali-Bhai/deberta-tool-hallucination-span-detector") model = AutoModelForTokenClassification.from_pretrained("Ali-Bhai/deberta-tool-hallucination-span-detector") - Notebooks
- Google Colab
- Kaggle
| { | |
| "model_name": "span_deberta_v3_small_sliding_windows", | |
| "base_model_name": "microsoft/deberta-v3-small", | |
| "best_checkpoint_dir": "/content/drive/MyDrive/TLLM/03_models/span_deberta_v3_small_sliding_windows/best_checkpoint", | |
| "last_checkpoint_dir": "/content/drive/MyDrive/TLLM/03_models/span_deberta_v3_small_sliding_windows/last_checkpoint", | |
| "best_epoch": 8, | |
| "selected_config": { | |
| "threshold": 0.5, | |
| "min_span_chars": 1, | |
| "min_span_tokens": 1, | |
| "merge_gap_chars": 1, | |
| "strip_predicted_span_whitespace": true, | |
| "drop_spans_without_alnum": true, | |
| "score_name": "sum_non_O_probability" | |
| }, | |
| "labels": [ | |
| "O", | |
| "tool_output_conflict", | |
| "overgeneration", | |
| "missing_tool_action_recommendation" | |
| ], | |
| "label2id": { | |
| "O": 0, | |
| "tool_output_conflict": 1, | |
| "overgeneration": 2, | |
| "missing_tool_action_recommendation": 3 | |
| }, | |
| "id2label": { | |
| "0": "O", | |
| "1": "tool_output_conflict", | |
| "2": "overgeneration", | |
| "3": "missing_tool_action_recommendation" | |
| }, | |
| "final_validation_token_metrics": { | |
| "O": { | |
| "precision": 0.9997869091167383, | |
| "recall": 0.9987582487759881, | |
| "f1": 0.9992723142182701, | |
| "support": 28186, | |
| "tp": 28151, | |
| "fp": 6, | |
| "fn": 35 | |
| }, | |
| "tool_output_conflict": { | |
| "precision": 0.7666666666666667, | |
| "recall": 0.9504132231404959, | |
| "f1": 0.8487084870848709, | |
| "support": 121, | |
| "tp": 115, | |
| "fp": 35, | |
| "fn": 6 | |
| }, | |
| "overgeneration": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 428, | |
| "tp": 428, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "missing_tool_action_recommendation": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 892, | |
| "tp": 892, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "accuracy": 0.9986161271812873, | |
| "macro_f1": 0.9619952003257852, | |
| "weighted_f1": 0.9986898158940638, | |
| "support": 29627 | |
| }, | |
| "final_test_token_metrics": { | |
| "O": { | |
| "precision": 0.9994592359321818, | |
| "recall": 0.9951225692025084, | |
| "f1": 0.9972861881258828, | |
| "support": 31574, | |
| "tp": 31420, | |
| "fp": 17, | |
| "fn": 154 | |
| }, | |
| "tool_output_conflict": { | |
| "precision": 0.4188679245283019, | |
| "recall": 0.8671875, | |
| "f1": 0.564885496183206, | |
| "support": 128, | |
| "tp": 111, | |
| "fp": 154, | |
| "fn": 17 | |
| }, | |
| "overgeneration": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 406, | |
| "tp": 406, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "missing_tool_action_recommendation": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 925, | |
| "tp": 925, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "accuracy": 0.9948233584597221, | |
| "macro_f1": 0.8905429210772722, | |
| "weighted_f1": 0.9957200208094352, | |
| "support": 33033 | |
| }, | |
| "final_validation_metrics": { | |
| "config": { | |
| "threshold": 0.5, | |
| "min_span_chars": 1, | |
| "min_span_tokens": 1, | |
| "merge_gap_chars": 1, | |
| "strip_predicted_span_whitespace": true, | |
| "drop_spans_without_alnum": true, | |
| "score_name": "sum_non_O_probability" | |
| }, | |
| "num_rows": 214, | |
| "row_multiclass_metrics": { | |
| "clean": { | |
| "precision": 1.0, | |
| "recall": 0.9611650485436893, | |
| "f1": 0.9801980198019802, | |
| "support": 103, | |
| "tp": 99, | |
| "fp": 0, | |
| "fn": 4 | |
| }, | |
| "tool_output_conflict": { | |
| "precision": 0.9024390243902439, | |
| "recall": 1.0, | |
| "f1": 0.9487179487179488, | |
| "support": 37, | |
| "tp": 37, | |
| "fp": 4, | |
| "fn": 0 | |
| }, | |
| "overgeneration": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 37, | |
| "tp": 37, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "missing_tool_action_recommendation": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 37, | |
| "tp": 37, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "accuracy": 0.9813084112149533, | |
| "macro_f1": 0.9822289921299823, | |
| "weighted_f1": 0.9816026174867667, | |
| "support": 214 | |
| }, | |
| "binary_example_metrics": { | |
| "accuracy": 0.9813084112149533, | |
| "precision": 0.9652173913043478, | |
| "recall": 1.0, | |
| "f1": 0.9823008849557522, | |
| "tp": 111, | |
| "fp": 4, | |
| "fn": 0, | |
| "tn": 99 | |
| }, | |
| "exact_span_metrics": { | |
| "precision": 0.8333333333333334, | |
| "recall": 0.9009009009009009, | |
| "f1": 0.8658008658008659, | |
| "matched": 100, | |
| "gold_total": 111, | |
| "pred_total": 120 | |
| }, | |
| "overlap_span_metrics_iou_0_01": { | |
| "precision": 0.925, | |
| "recall": 1.0, | |
| "f1": 0.961038961038961, | |
| "matched": 111, | |
| "gold_total": 111, | |
| "pred_total": 120, | |
| "iou_threshold": 0.01 | |
| }, | |
| "overlap_span_metrics_iou_0_50": { | |
| "precision": 0.9083333333333333, | |
| "recall": 0.9819819819819819, | |
| "f1": 0.9437229437229437, | |
| "matched": 109, | |
| "gold_total": 111, | |
| "pred_total": 120, | |
| "iou_threshold": 0.5 | |
| }, | |
| "char_micro_metrics": { | |
| "precision": 0.9916501556750636, | |
| "recall": 0.9984326018808778, | |
| "f1": 0.9950298210735586, | |
| "overlap_chars": 7007, | |
| "gold_chars": 7018, | |
| "pred_chars": 7066 | |
| }, | |
| "per_type_char_micro_metrics": { | |
| "clean": { | |
| "precision": 0.0, | |
| "recall": 0.0, | |
| "f1": 0.0, | |
| "overlap_chars": 0, | |
| "gold_chars": 0, | |
| "pred_chars": 18 | |
| }, | |
| "overgeneration": { | |
| "precision": 0.9948006932409013, | |
| "recall": 1.0, | |
| "f1": 0.9973935708079931, | |
| "overlap_chars": 2296, | |
| "gold_chars": 2296, | |
| "pred_chars": 2308 | |
| }, | |
| "tool_output_conflict": { | |
| "precision": 0.9037656903765691, | |
| "recall": 0.9515418502202643, | |
| "f1": 0.927038626609442, | |
| "overlap_chars": 216, | |
| "gold_chars": 227, | |
| "pred_chars": 239 | |
| }, | |
| "missing_tool_action_recommendation": { | |
| "precision": 0.998666962897134, | |
| "recall": 1.0, | |
| "f1": 0.9993330369052912, | |
| "overlap_chars": 4495, | |
| "gold_chars": 4495, | |
| "pred_chars": 4501 | |
| } | |
| }, | |
| "num_gold_spans": 111, | |
| "num_predicted_spans": 120 | |
| }, | |
| "final_test_metrics": { | |
| "config": { | |
| "threshold": 0.5, | |
| "min_span_chars": 1, | |
| "min_span_tokens": 1, | |
| "merge_gap_chars": 1, | |
| "strip_predicted_span_whitespace": true, | |
| "drop_spans_without_alnum": true, | |
| "score_name": "sum_non_O_probability" | |
| }, | |
| "num_rows": 207, | |
| "row_multiclass_metrics": { | |
| "clean": { | |
| "precision": 0.9777777777777777, | |
| "recall": 0.9166666666666666, | |
| "f1": 0.946236559139785, | |
| "support": 96, | |
| "tp": 88, | |
| "fp": 2, | |
| "fn": 8 | |
| }, | |
| "tool_output_conflict": { | |
| "precision": 0.813953488372093, | |
| "recall": 0.9459459459459459, | |
| "f1": 0.875, | |
| "support": 37, | |
| "tp": 35, | |
| "fp": 8, | |
| "fn": 2 | |
| }, | |
| "overgeneration": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 37, | |
| "tp": 37, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "missing_tool_action_recommendation": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 37, | |
| "tp": 37, | |
| "fp": 0, | |
| "fn": 0 | |
| }, | |
| "accuracy": 0.9516908212560387, | |
| "macro_f1": 0.9553091397849462, | |
| "weighted_f1": 0.9527232351566153, | |
| "support": 207 | |
| }, | |
| "binary_example_metrics": { | |
| "accuracy": 0.9516908212560387, | |
| "precision": 0.9316239316239316, | |
| "recall": 0.9819819819819819, | |
| "f1": 0.9561403508771931, | |
| "tp": 109, | |
| "fp": 8, | |
| "fn": 2, | |
| "tn": 88 | |
| }, | |
| "exact_span_metrics": { | |
| "precision": 0.7357142857142858, | |
| "recall": 0.9279279279279279, | |
| "f1": 0.8207171314741037, | |
| "matched": 103, | |
| "gold_total": 111, | |
| "pred_total": 140 | |
| }, | |
| "overlap_span_metrics_iou_0_01": { | |
| "precision": 0.7785714285714286, | |
| "recall": 0.9819819819819819, | |
| "f1": 0.8685258964143425, | |
| "matched": 109, | |
| "gold_total": 111, | |
| "pred_total": 140, | |
| "iou_threshold": 0.01 | |
| }, | |
| "overlap_span_metrics_iou_0_50": { | |
| "precision": 0.75, | |
| "recall": 0.9459459459459459, | |
| "f1": 0.8366533864541832, | |
| "matched": 105, | |
| "gold_total": 111, | |
| "pred_total": 140, | |
| "iou_threshold": 0.5 | |
| }, | |
| "char_micro_metrics": { | |
| "precision": 0.971356003950896, | |
| "recall": 0.9955169920462762, | |
| "f1": 0.9832881016997572, | |
| "overlap_chars": 6884, | |
| "gold_chars": 6915, | |
| "pred_chars": 7087 | |
| }, | |
| "per_type_char_micro_metrics": { | |
| "clean": { | |
| "precision": 0.0, | |
| "recall": 0.0, | |
| "f1": 0.0, | |
| "overlap_chars": 0, | |
| "gold_chars": 0, | |
| "pred_chars": 72 | |
| }, | |
| "missing_tool_action_recommendation": { | |
| "precision": 0.9879728843210146, | |
| "recall": 1.0, | |
| "f1": 0.9939500604993949, | |
| "overlap_chars": 4518, | |
| "gold_chars": 4518, | |
| "pred_chars": 4573 | |
| }, | |
| "tool_output_conflict": { | |
| "precision": 0.7251908396946565, | |
| "recall": 0.8597285067873304, | |
| "f1": 0.7867494824016563, | |
| "overlap_chars": 190, | |
| "gold_chars": 221, | |
| "pred_chars": 262 | |
| }, | |
| "overgeneration": { | |
| "precision": 0.998165137614679, | |
| "recall": 1.0, | |
| "f1": 0.9990817263544537, | |
| "overlap_chars": 2176, | |
| "gold_chars": 2176, | |
| "pred_chars": 2180 | |
| } | |
| }, | |
| "num_gold_spans": 111, | |
| "num_predicted_spans": 140 | |
| } | |
| } |