{ "model_name": "span_deberta_v3_small_sliding_windows", "base_model_name": "microsoft/deberta-v3-small", "best_checkpoint_dir": "/content/drive/MyDrive/TLLM/03_models/span_deberta_v3_small_sliding_windows/best_checkpoint", "last_checkpoint_dir": "/content/drive/MyDrive/TLLM/03_models/span_deberta_v3_small_sliding_windows/last_checkpoint", "best_epoch": 8, "selected_config": { "threshold": 0.5, "min_span_chars": 1, "min_span_tokens": 1, "merge_gap_chars": 1, "strip_predicted_span_whitespace": true, "drop_spans_without_alnum": true, "score_name": "sum_non_O_probability" }, "labels": [ "O", "tool_output_conflict", "overgeneration", "missing_tool_action_recommendation" ], "label2id": { "O": 0, "tool_output_conflict": 1, "overgeneration": 2, "missing_tool_action_recommendation": 3 }, "id2label": { "0": "O", "1": "tool_output_conflict", "2": "overgeneration", "3": "missing_tool_action_recommendation" }, "final_validation_token_metrics": { "O": { "precision": 0.9997869091167383, "recall": 0.9987582487759881, "f1": 0.9992723142182701, "support": 28186, "tp": 28151, "fp": 6, "fn": 35 }, "tool_output_conflict": { "precision": 0.7666666666666667, "recall": 0.9504132231404959, "f1": 0.8487084870848709, "support": 121, "tp": 115, "fp": 35, "fn": 6 }, "overgeneration": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 428, "tp": 428, "fp": 0, "fn": 0 }, "missing_tool_action_recommendation": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 892, "tp": 892, "fp": 0, "fn": 0 }, "accuracy": 0.9986161271812873, "macro_f1": 0.9619952003257852, "weighted_f1": 0.9986898158940638, "support": 29627 }, "final_test_token_metrics": { "O": { "precision": 0.9994592359321818, "recall": 0.9951225692025084, "f1": 0.9972861881258828, "support": 31574, "tp": 31420, "fp": 17, "fn": 154 }, "tool_output_conflict": { "precision": 0.4188679245283019, "recall": 0.8671875, "f1": 0.564885496183206, "support": 128, "tp": 111, "fp": 154, "fn": 17 }, "overgeneration": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 406, "tp": 406, "fp": 0, "fn": 0 }, "missing_tool_action_recommendation": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 925, "tp": 925, "fp": 0, "fn": 0 }, "accuracy": 0.9948233584597221, "macro_f1": 0.8905429210772722, "weighted_f1": 0.9957200208094352, "support": 33033 }, "final_validation_metrics": { "config": { "threshold": 0.5, "min_span_chars": 1, "min_span_tokens": 1, "merge_gap_chars": 1, "strip_predicted_span_whitespace": true, "drop_spans_without_alnum": true, "score_name": "sum_non_O_probability" }, "num_rows": 214, "row_multiclass_metrics": { "clean": { "precision": 1.0, "recall": 0.9611650485436893, "f1": 0.9801980198019802, "support": 103, "tp": 99, "fp": 0, "fn": 4 }, "tool_output_conflict": { "precision": 0.9024390243902439, "recall": 1.0, "f1": 0.9487179487179488, "support": 37, "tp": 37, "fp": 4, "fn": 0 }, "overgeneration": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 37, "tp": 37, "fp": 0, "fn": 0 }, "missing_tool_action_recommendation": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 37, "tp": 37, "fp": 0, "fn": 0 }, "accuracy": 0.9813084112149533, "macro_f1": 0.9822289921299823, "weighted_f1": 0.9816026174867667, "support": 214 }, "binary_example_metrics": { "accuracy": 0.9813084112149533, "precision": 0.9652173913043478, "recall": 1.0, "f1": 0.9823008849557522, "tp": 111, "fp": 4, "fn": 0, "tn": 99 }, "exact_span_metrics": { "precision": 0.8333333333333334, "recall": 0.9009009009009009, "f1": 0.8658008658008659, "matched": 100, "gold_total": 111, "pred_total": 120 }, "overlap_span_metrics_iou_0_01": { "precision": 0.925, "recall": 1.0, "f1": 0.961038961038961, "matched": 111, "gold_total": 111, "pred_total": 120, "iou_threshold": 0.01 }, "overlap_span_metrics_iou_0_50": { "precision": 0.9083333333333333, "recall": 0.9819819819819819, "f1": 0.9437229437229437, "matched": 109, "gold_total": 111, "pred_total": 120, "iou_threshold": 0.5 }, "char_micro_metrics": { "precision": 0.9916501556750636, "recall": 0.9984326018808778, "f1": 0.9950298210735586, "overlap_chars": 7007, "gold_chars": 7018, "pred_chars": 7066 }, "per_type_char_micro_metrics": { "clean": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "overlap_chars": 0, "gold_chars": 0, "pred_chars": 18 }, "overgeneration": { "precision": 0.9948006932409013, "recall": 1.0, "f1": 0.9973935708079931, "overlap_chars": 2296, "gold_chars": 2296, "pred_chars": 2308 }, "tool_output_conflict": { "precision": 0.9037656903765691, "recall": 0.9515418502202643, "f1": 0.927038626609442, "overlap_chars": 216, "gold_chars": 227, "pred_chars": 239 }, "missing_tool_action_recommendation": { "precision": 0.998666962897134, "recall": 1.0, "f1": 0.9993330369052912, "overlap_chars": 4495, "gold_chars": 4495, "pred_chars": 4501 } }, "num_gold_spans": 111, "num_predicted_spans": 120 }, "final_test_metrics": { "config": { "threshold": 0.5, "min_span_chars": 1, "min_span_tokens": 1, "merge_gap_chars": 1, "strip_predicted_span_whitespace": true, "drop_spans_without_alnum": true, "score_name": "sum_non_O_probability" }, "num_rows": 207, "row_multiclass_metrics": { "clean": { "precision": 0.9777777777777777, "recall": 0.9166666666666666, "f1": 0.946236559139785, "support": 96, "tp": 88, "fp": 2, "fn": 8 }, "tool_output_conflict": { "precision": 0.813953488372093, "recall": 0.9459459459459459, "f1": 0.875, "support": 37, "tp": 35, "fp": 8, "fn": 2 }, "overgeneration": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 37, "tp": 37, "fp": 0, "fn": 0 }, "missing_tool_action_recommendation": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 37, "tp": 37, "fp": 0, "fn": 0 }, "accuracy": 0.9516908212560387, "macro_f1": 0.9553091397849462, "weighted_f1": 0.9527232351566153, "support": 207 }, "binary_example_metrics": { "accuracy": 0.9516908212560387, "precision": 0.9316239316239316, "recall": 0.9819819819819819, "f1": 0.9561403508771931, "tp": 109, "fp": 8, "fn": 2, "tn": 88 }, "exact_span_metrics": { "precision": 0.7357142857142858, "recall": 0.9279279279279279, "f1": 0.8207171314741037, "matched": 103, "gold_total": 111, "pred_total": 140 }, "overlap_span_metrics_iou_0_01": { "precision": 0.7785714285714286, "recall": 0.9819819819819819, "f1": 0.8685258964143425, "matched": 109, "gold_total": 111, "pred_total": 140, "iou_threshold": 0.01 }, "overlap_span_metrics_iou_0_50": { "precision": 0.75, "recall": 0.9459459459459459, "f1": 0.8366533864541832, "matched": 105, "gold_total": 111, "pred_total": 140, "iou_threshold": 0.5 }, "char_micro_metrics": { "precision": 0.971356003950896, "recall": 0.9955169920462762, "f1": 0.9832881016997572, "overlap_chars": 6884, "gold_chars": 6915, "pred_chars": 7087 }, "per_type_char_micro_metrics": { "clean": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "overlap_chars": 0, "gold_chars": 0, "pred_chars": 72 }, "missing_tool_action_recommendation": { "precision": 0.9879728843210146, "recall": 1.0, "f1": 0.9939500604993949, "overlap_chars": 4518, "gold_chars": 4518, "pred_chars": 4573 }, "tool_output_conflict": { "precision": 0.7251908396946565, "recall": 0.8597285067873304, "f1": 0.7867494824016563, "overlap_chars": 190, "gold_chars": 221, "pred_chars": 262 }, "overgeneration": { "precision": 0.998165137614679, "recall": 1.0, "f1": 0.9990817263544537, "overlap_chars": 2176, "gold_chars": 2176, "pred_chars": 2180 } }, "num_gold_spans": 111, "num_predicted_spans": 140 } }