| { | |
| "per_head": { | |
| "relation_to_previous": { | |
| "accuracy": 0.8768, | |
| "macro_f1": 0.7893, | |
| "per_label": { | |
| "new": { | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "f1": 1.0, | |
| "support": 102 | |
| }, | |
| "follow_up": { | |
| "precision": 0.8764, | |
| "recall": 0.9176, | |
| "f1": 0.8966, | |
| "support": 85 | |
| }, | |
| "correction": { | |
| "precision": 0.6786, | |
| "recall": 0.6552, | |
| "f1": 0.6667, | |
| "support": 29 | |
| }, | |
| "confirmation": { | |
| "precision": 0.8235, | |
| "recall": 0.7, | |
| "f1": 0.7568, | |
| "support": 20 | |
| }, | |
| "cancellation": { | |
| "precision": 0.7143, | |
| "recall": 0.4762, | |
| "f1": 0.5714, | |
| "support": 21 | |
| }, | |
| "closure": { | |
| "precision": 0.7308, | |
| "recall": 1.0, | |
| "f1": 0.8444, | |
| "support": 19 | |
| } | |
| }, | |
| "confusion_matrix": [ | |
| [ | |
| 102, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0 | |
| ], | |
| [ | |
| 0, | |
| 78, | |
| 3, | |
| 0, | |
| 2, | |
| 2 | |
| ], | |
| [ | |
| 0, | |
| 5, | |
| 19, | |
| 2, | |
| 2, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 3, | |
| 0, | |
| 14, | |
| 0, | |
| 3 | |
| ], | |
| [ | |
| 0, | |
| 3, | |
| 6, | |
| 1, | |
| 10, | |
| 1 | |
| ], | |
| [ | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 0, | |
| 19 | |
| ] | |
| ] | |
| }, | |
| "actionability": { | |
| "accuracy": 0.7101, | |
| "macro_f1": 0.6834, | |
| "per_label": { | |
| "none": { | |
| "precision": 0.6786, | |
| "recall": 0.623, | |
| "f1": 0.6496, | |
| "support": 61 | |
| }, | |
| "review": { | |
| "precision": 0.6571, | |
| "recall": 0.5823, | |
| "f1": 0.6174, | |
| "support": 79 | |
| }, | |
| "act": { | |
| "precision": 0.7467, | |
| "recall": 0.8235, | |
| "f1": 0.7832, | |
| "support": 136 | |
| } | |
| }, | |
| "confusion_matrix": [ | |
| [ | |
| 38, | |
| 13, | |
| 10 | |
| ], | |
| [ | |
| 5, | |
| 46, | |
| 28 | |
| ], | |
| [ | |
| 13, | |
| 11, | |
| 112 | |
| ] | |
| ] | |
| }, | |
| "retention": { | |
| "accuracy": 0.7029, | |
| "macro_f1": 0.6849, | |
| "per_label": { | |
| "ephemeral": { | |
| "precision": 0.6628, | |
| "recall": 0.6404, | |
| "f1": 0.6514, | |
| "support": 89 | |
| }, | |
| "useful": { | |
| "precision": 0.7025, | |
| "recall": 0.7929, | |
| "f1": 0.745, | |
| "support": 140 | |
| }, | |
| "remember": { | |
| "precision": 0.8125, | |
| "recall": 0.5532, | |
| "f1": 0.6582, | |
| "support": 47 | |
| } | |
| }, | |
| "confusion_matrix": [ | |
| [ | |
| 57, | |
| 31, | |
| 1 | |
| ], | |
| [ | |
| 24, | |
| 111, | |
| 5 | |
| ], | |
| [ | |
| 5, | |
| 16, | |
| 26 | |
| ] | |
| ] | |
| }, | |
| "urgency": { | |
| "accuracy": 0.6449, | |
| "macro_f1": 0.5777, | |
| "per_label": { | |
| "low": { | |
| "precision": 0.7536, | |
| "recall": 0.7647, | |
| "f1": 0.7591, | |
| "support": 136 | |
| }, | |
| "medium": { | |
| "precision": 0.5357, | |
| "recall": 0.6316, | |
| "f1": 0.5797, | |
| "support": 95 | |
| }, | |
| "high": { | |
| "precision": 0.5385, | |
| "recall": 0.3111, | |
| "f1": 0.3944, | |
| "support": 45 | |
| } | |
| }, | |
| "confusion_matrix": [ | |
| [ | |
| 104, | |
| 28, | |
| 4 | |
| ], | |
| [ | |
| 27, | |
| 60, | |
| 8 | |
| ], | |
| [ | |
| 7, | |
| 24, | |
| 14 | |
| ] | |
| ] | |
| } | |
| }, | |
| "overall": { | |
| "exact_match": 0.3406, | |
| "macro_average_f1": 0.6838, | |
| "automation_safe_accuracy": 0.6222, | |
| "automation_safe_coverage": 0.163, | |
| "confidence_threshold": 0.8, | |
| "confidence_calibration": { | |
| "ece": 0.395419, | |
| "bins": [ | |
| { | |
| "range": [ | |
| 0.5, | |
| 0.6 | |
| ], | |
| "count": 6, | |
| "avg_confidence": 0.5757, | |
| "accuracy": 0.3333 | |
| }, | |
| { | |
| "range": [ | |
| 0.6, | |
| 0.7 | |
| ], | |
| "count": 86, | |
| "avg_confidence": 0.6646, | |
| "accuracy": 0.2326 | |
| }, | |
| { | |
| "range": [ | |
| 0.7, | |
| 0.8 | |
| ], | |
| "count": 139, | |
| "avg_confidence": 0.7484, | |
| "accuracy": 0.3165 | |
| }, | |
| { | |
| "range": [ | |
| 0.8, | |
| 0.9 | |
| ], | |
| "count": 42, | |
| "avg_confidence": 0.8384, | |
| "accuracy": 0.5952 | |
| }, | |
| { | |
| "range": [ | |
| 0.9, | |
| 1.0 | |
| ], | |
| "count": 3, | |
| "avg_confidence": 0.9038, | |
| "accuracy": 1.0 | |
| } | |
| ] | |
| } | |
| }, | |
| "temperature_scaling": { | |
| "method": "per_head_temperature_scaling", | |
| "per_head": { | |
| "relation_to_previous": 0.630957, | |
| "actionability": 0.891251, | |
| "retention": 0.944061, | |
| "urgency": 0.891251 | |
| } | |
| } | |
| } |