{ "epochs": [ { "per_head": { "relation_to_previous": { "accuracy": 0.6775, "macro_f1": 0.2761, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.4885, "recall": 1.0, "f1": 0.6564, "support": 85 }, "correction": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 29 }, "confirmation": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 20 }, "cancellation": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 21 }, "closure": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 85, 0, 0, 0, 0 ], [ 0, 29, 0, 0, 0, 0 ], [ 0, 20, 0, 0, 0, 0 ], [ 0, 21, 0, 0, 0, 0 ], [ 0, 19, 0, 0, 0, 0 ] ] }, "actionability": { "accuracy": 0.4891, "macro_f1": 0.219, "per_label": { "none": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 61 }, "review": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 79 }, "act": { "precision": 0.4909, "recall": 0.9926, "f1": 0.6569, "support": 136 } }, "confusion_matrix": [ [ 0, 0, 61 ], [ 0, 0, 79 ], [ 1, 0, 135 ] ] }, "retention": { "accuracy": 0.5181, "macro_f1": 0.2477, "per_label": { "ephemeral": { "precision": 1.0, "recall": 0.0337, "f1": 0.0652, "support": 89 }, "useful": { "precision": 0.5128, "recall": 1.0, "f1": 0.678, "support": 140 }, "remember": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 47 } }, "confusion_matrix": [ [ 3, 86, 0 ], [ 0, 140, 0 ], [ 0, 47, 0 ] ] }, "urgency": { "accuracy": 0.4928, "macro_f1": 0.2201, "per_label": { "low": { "precision": 0.4928, "recall": 1.0, "f1": 0.6602, "support": 136 }, "medium": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 95 }, "high": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 45 } }, "confusion_matrix": [ [ 136, 0, 0 ], [ 95, 0, 0 ], [ 45, 0, 0 ] ] } }, "overall": { "exact_match": 0.0616, "macro_average_f1": 0.2407, "automation_safe_accuracy": 0.0, "automation_safe_coverage": 0.0, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.463661, "bins": [ { "range": [ 0.4, 0.5 ], "count": 58, "avg_confidence": 0.481, "accuracy": 0.0517 }, { "range": [ 0.5, 0.6 ], "count": 218, "avg_confidence": 0.537, "accuracy": 0.0642 } ] } }, "training": { "epoch": 1, "loss": 4.6568 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.7283, "macro_f1": 0.4422, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.6124, "recall": 0.9294, "f1": 0.7383, "support": 85 }, "correction": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 29 }, "confirmation": { "precision": 1.0, "recall": 0.2, "f1": 0.3333, "support": 20 }, "cancellation": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 21 }, "closure": { "precision": 0.4444, "recall": 0.8421, "f1": 0.5818, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 79, 0, 0, 0, 6 ], [ 0, 24, 0, 0, 0, 5 ], [ 0, 8, 3, 4, 0, 5 ], [ 0, 15, 2, 0, 0, 4 ], [ 0, 3, 0, 0, 0, 16 ] ] }, "actionability": { "accuracy": 0.5688, "macro_f1": 0.5548, "per_label": { "none": { "precision": 0.5303, "recall": 0.5738, "f1": 0.5512, "support": 61 }, "review": { "precision": 0.4124, "recall": 0.5063, "f1": 0.4545, "support": 79 }, "act": { "precision": 0.7257, "recall": 0.6029, "f1": 0.6586, "support": 136 } }, "confusion_matrix": [ [ 35, 18, 8 ], [ 16, 40, 23 ], [ 15, 39, 82 ] ] }, "retention": { "accuracy": 0.5254, "macro_f1": 0.3733, "per_label": { "ephemeral": { "precision": 0.4845, "recall": 0.5281, "f1": 0.5054, "support": 89 }, "useful": { "precision": 0.5475, "recall": 0.7, "f1": 0.6144, "support": 140 }, "remember": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 47 } }, "confusion_matrix": [ [ 47, 42, 0 ], [ 42, 98, 0 ], [ 8, 39, 0 ] ] }, "urgency": { "accuracy": 0.5362, "macro_f1": 0.3555, "per_label": { "low": { "precision": 0.5604, "recall": 0.8529, "f1": 0.6764, "support": 136 }, "medium": { "precision": 0.4638, "recall": 0.3368, "f1": 0.3902, "support": 95 }, "high": { "precision": 0.0, "recall": 0.0, "f1": 0.0, "support": 45 } }, "confusion_matrix": [ [ 116, 20, 0 ], [ 63, 32, 0 ], [ 28, 17, 0 ] ] } }, "overall": { "exact_match": 0.1123, "macro_average_f1": 0.4314, "automation_safe_accuracy": 0.0, "automation_safe_coverage": 0.0, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.445582, "bins": [ { "range": [ 0.4, 0.5 ], "count": 44, "avg_confidence": 0.4826, "accuracy": 0.0455 }, { "range": [ 0.5, 0.6 ], "count": 208, "avg_confidence": 0.5677, "accuracy": 0.125 }, { "range": [ 0.6, 0.7 ], "count": 24, "avg_confidence": 0.6107, "accuracy": 0.125 } ] } }, "training": { "epoch": 2, "loss": 3.7776 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.808, "macro_f1": 0.6475, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.7009, "recall": 0.9647, "f1": 0.8119, "support": 85 }, "correction": { "precision": 0.5294, "recall": 0.3103, "f1": 0.3913, "support": 29 }, "confirmation": { "precision": 0.9231, "recall": 0.6, "f1": 0.7273, "support": 20 }, "cancellation": { "precision": 0.75, "recall": 0.1429, "f1": 0.24, "support": 21 }, "closure": { "precision": 0.6522, "recall": 0.7895, "f1": 0.7143, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 82, 1, 0, 0, 2 ], [ 0, 16, 9, 1, 1, 2 ], [ 0, 4, 0, 12, 0, 4 ], [ 0, 11, 7, 0, 3, 0 ], [ 0, 4, 0, 0, 0, 15 ] ] }, "actionability": { "accuracy": 0.6304, "macro_f1": 0.5798, "per_label": { "none": { "precision": 0.6591, "recall": 0.4754, "f1": 0.5524, "support": 61 }, "review": { "precision": 0.5882, "recall": 0.3797, "f1": 0.4615, "support": 79 }, "act": { "precision": 0.6354, "recall": 0.8456, "f1": 0.7256, "support": 136 } }, "confusion_matrix": [ [ 29, 7, 25 ], [ 8, 30, 41 ], [ 7, 14, 115 ] ] }, "retention": { "accuracy": 0.6703, "macro_f1": 0.6498, "per_label": { "ephemeral": { "precision": 0.661, "recall": 0.4382, "f1": 0.527, "support": 89 }, "useful": { "precision": 0.6398, "recall": 0.85, "f1": 0.7301, "support": 140 }, "remember": { "precision": 0.871, "recall": 0.5745, "f1": 0.6923, "support": 47 } }, "confusion_matrix": [ [ 39, 49, 1 ], [ 18, 119, 3 ], [ 2, 18, 27 ] ] }, "urgency": { "accuracy": 0.5688, "macro_f1": 0.4235, "per_label": { "low": { "precision": 0.694, "recall": 0.6838, "f1": 0.6889, "support": 136 }, "medium": { "precision": 0.4565, "recall": 0.6632, "f1": 0.5408, "support": 95 }, "high": { "precision": 0.25, "recall": 0.0222, "f1": 0.0408, "support": 45 } }, "confusion_matrix": [ [ 93, 42, 1 ], [ 30, 63, 2 ], [ 11, 33, 1 ] ] } }, "overall": { "exact_match": 0.2101, "macro_average_f1": 0.5752, "automation_safe_accuracy": 0.0, "automation_safe_coverage": 0.0, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.396414, "bins": [ { "range": [ 0.4, 0.5 ], "count": 10, "avg_confidence": 0.4772, "accuracy": 0.2 }, { "range": [ 0.5, 0.6 ], "count": 128, "avg_confidence": 0.5669, "accuracy": 0.1484 }, { "range": [ 0.6, 0.7 ], "count": 129, "avg_confidence": 0.64, "accuracy": 0.2326 }, { "range": [ 0.7, 0.8 ], "count": 9, "avg_confidence": 0.7196, "accuracy": 0.7778 } ] } }, "training": { "epoch": 3, "loss": 3.3415 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.8333, "macro_f1": 0.694, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.8571, "recall": 0.9176, "f1": 0.8864, "support": 85 }, "correction": { "precision": 0.5652, "recall": 0.4483, "f1": 0.5, "support": 29 }, "confirmation": { "precision": 0.8, "recall": 0.6, "f1": 0.6857, "support": 20 }, "cancellation": { "precision": 0.5455, "recall": 0.2857, "f1": 0.375, "support": 21 }, "closure": { "precision": 0.5588, "recall": 1.0, "f1": 0.717, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 78, 3, 0, 1, 3 ], [ 0, 7, 13, 1, 4, 4 ], [ 0, 2, 0, 12, 0, 6 ], [ 0, 4, 7, 2, 6, 2 ], [ 0, 0, 0, 0, 0, 19 ] ] }, "actionability": { "accuracy": 0.6486, "macro_f1": 0.6252, "per_label": { "none": { "precision": 0.5634, "recall": 0.6557, "f1": 0.6061, "support": 61 }, "review": { "precision": 0.5882, "recall": 0.5063, "f1": 0.5442, "support": 79 }, "act": { "precision": 0.7226, "recall": 0.7279, "f1": 0.7253, "support": 136 } }, "confusion_matrix": [ [ 40, 8, 13 ], [ 14, 40, 25 ], [ 17, 20, 99 ] ] }, "retention": { "accuracy": 0.6703, "macro_f1": 0.6542, "per_label": { "ephemeral": { "precision": 0.6067, "recall": 0.6067, "f1": 0.6067, "support": 89 }, "useful": { "precision": 0.673, "recall": 0.7643, "f1": 0.7157, "support": 140 }, "remember": { "precision": 0.8571, "recall": 0.5106, "f1": 0.64, "support": 47 } }, "confusion_matrix": [ [ 54, 35, 0 ], [ 29, 107, 4 ], [ 6, 17, 24 ] ] }, "urgency": { "accuracy": 0.5906, "macro_f1": 0.4633, "per_label": { "low": { "precision": 0.6477, "recall": 0.8382, "f1": 0.7308, "support": 136 }, "medium": { "precision": 0.4783, "recall": 0.4632, "f1": 0.4706, "support": 95 }, "high": { "precision": 0.625, "recall": 0.1111, "f1": 0.1887, "support": 45 } }, "confusion_matrix": [ [ 114, 21, 1 ], [ 49, 44, 2 ], [ 13, 27, 5 ] ] } }, "overall": { "exact_match": 0.2319, "macro_average_f1": 0.6092, "automation_safe_accuracy": 0.0, "automation_safe_coverage": 0.0, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.402291, "bins": [ { "range": [ 0.4, 0.5 ], "count": 4, "avg_confidence": 0.4762, "accuracy": 0.0 }, { "range": [ 0.5, 0.6 ], "count": 74, "avg_confidence": 0.563, "accuracy": 0.1486 }, { "range": [ 0.6, 0.7 ], "count": 159, "avg_confidence": 0.6473, "accuracy": 0.2138 }, { "range": [ 0.7, 0.8 ], "count": 39, "avg_confidence": 0.7317, "accuracy": 0.4872 } ] } }, "training": { "epoch": 4, "loss": 2.9715 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.8442, "macro_f1": 0.7157, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.8571, "recall": 0.9176, "f1": 0.8864, "support": 85 }, "correction": { "precision": 0.5926, "recall": 0.5517, "f1": 0.5714, "support": 29 }, "confirmation": { "precision": 0.8571, "recall": 0.6, "f1": 0.7059, "support": 20 }, "cancellation": { "precision": 0.6667, "recall": 0.2857, "f1": 0.4, "support": 21 }, "closure": { "precision": 0.5758, "recall": 1.0, "f1": 0.7308, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 78, 3, 0, 1, 3 ], [ 0, 7, 16, 1, 2, 3 ], [ 0, 2, 0, 12, 0, 6 ], [ 0, 4, 8, 1, 6, 2 ], [ 0, 0, 0, 0, 0, 19 ] ] }, "actionability": { "accuracy": 0.6558, "macro_f1": 0.6342, "per_label": { "none": { "precision": 0.5972, "recall": 0.7049, "f1": 0.6466, "support": 61 }, "review": { "precision": 0.5493, "recall": 0.4937, "f1": 0.52, "support": 79 }, "act": { "precision": 0.7444, "recall": 0.7279, "f1": 0.7361, "support": 136 } }, "confusion_matrix": [ [ 43, 12, 6 ], [ 12, 39, 28 ], [ 17, 20, 99 ] ] }, "retention": { "accuracy": 0.6703, "macro_f1": 0.6666, "per_label": { "ephemeral": { "precision": 0.5816, "recall": 0.6404, "f1": 0.6096, "support": 89 }, "useful": { "precision": 0.6846, "recall": 0.7286, "f1": 0.7059, "support": 140 }, "remember": { "precision": 0.8966, "recall": 0.5532, "f1": 0.6842, "support": 47 } }, "confusion_matrix": [ [ 57, 32, 0 ], [ 35, 102, 3 ], [ 6, 15, 26 ] ] }, "urgency": { "accuracy": 0.5978, "macro_f1": 0.4845, "per_label": { "low": { "precision": 0.6948, "recall": 0.7868, "f1": 0.7379, "support": 136 }, "medium": { "precision": 0.4815, "recall": 0.5474, "f1": 0.5123, "support": 95 }, "high": { "precision": 0.4286, "recall": 0.1333, "f1": 0.2034, "support": 45 } }, "confusion_matrix": [ [ 107, 27, 2 ], [ 37, 52, 6 ], [ 10, 29, 6 ] ] } }, "overall": { "exact_match": 0.25, "macro_average_f1": 0.6252, "automation_safe_accuracy": 0.8, "automation_safe_coverage": 0.0181, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.407114, "bins": [ { "range": [ 0.4, 0.5 ], "count": 1, "avg_confidence": 0.4972, "accuracy": 0.0 }, { "range": [ 0.5, 0.6 ], "count": 55, "avg_confidence": 0.5704, "accuracy": 0.1818 }, { "range": [ 0.6, 0.7 ], "count": 143, "avg_confidence": 0.6475, "accuracy": 0.1538 }, { "range": [ 0.7, 0.8 ], "count": 72, "avg_confidence": 0.7343, "accuracy": 0.4583 }, { "range": [ 0.8, 0.9 ], "count": 5, "avg_confidence": 0.8067, "accuracy": 0.8 } ] } }, "training": { "epoch": 5, "loss": 2.7301 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.8587, "macro_f1": 0.7646, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.7921, "recall": 0.9412, "f1": 0.8602, "support": 85 }, "correction": { "precision": 0.64, "recall": 0.5517, "f1": 0.5926, "support": 29 }, "confirmation": { "precision": 0.8125, "recall": 0.65, "f1": 0.7222, "support": 20 }, "cancellation": { "precision": 0.8182, "recall": 0.4286, "f1": 0.5625, "support": 21 }, "closure": { "precision": 0.8095, "recall": 0.8947, "f1": 0.85, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 80, 4, 0, 0, 1 ], [ 0, 8, 16, 2, 2, 1 ], [ 0, 4, 1, 13, 0, 2 ], [ 0, 7, 4, 1, 9, 0 ], [ 0, 2, 0, 0, 0, 17 ] ] }, "actionability": { "accuracy": 0.6884, "macro_f1": 0.6666, "per_label": { "none": { "precision": 0.6333, "recall": 0.623, "f1": 0.6281, "support": 61 }, "review": { "precision": 0.5976, "recall": 0.6203, "f1": 0.6087, "support": 79 }, "act": { "precision": 0.7687, "recall": 0.7574, "f1": 0.763, "support": 136 } }, "confusion_matrix": [ [ 38, 14, 9 ], [ 8, 49, 22 ], [ 14, 19, 103 ] ] }, "retention": { "accuracy": 0.6703, "macro_f1": 0.6452, "per_label": { "ephemeral": { "precision": 0.6, "recall": 0.6067, "f1": 0.6034, "support": 89 }, "useful": { "precision": 0.6707, "recall": 0.7857, "f1": 0.7237, "support": 140 }, "remember": { "precision": 0.9545, "recall": 0.4468, "f1": 0.6087, "support": 47 } }, "confusion_matrix": [ [ 54, 35, 0 ], [ 29, 110, 1 ], [ 7, 19, 21 ] ] }, "urgency": { "accuracy": 0.6196, "macro_f1": 0.5411, "per_label": { "low": { "precision": 0.7576, "recall": 0.7353, "f1": 0.7463, "support": 136 }, "medium": { "precision": 0.5, "recall": 0.6316, "f1": 0.5581, "support": 95 }, "high": { "precision": 0.4583, "recall": 0.2444, "f1": 0.3188, "support": 45 } }, "confusion_matrix": [ [ 100, 32, 4 ], [ 26, 60, 9 ], [ 6, 28, 11 ] ] } }, "overall": { "exact_match": 0.308, "macro_average_f1": 0.6544, "automation_safe_accuracy": 1.0, "automation_safe_coverage": 0.0072, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.360595, "bins": [ { "range": [ 0.5, 0.6 ], "count": 43, "avg_confidence": 0.5795, "accuracy": 0.1628 }, { "range": [ 0.6, 0.7 ], "count": 154, "avg_confidence": 0.6525, "accuracy": 0.2338 }, { "range": [ 0.7, 0.8 ], "count": 77, "avg_confidence": 0.737, "accuracy": 0.5195 }, { "range": [ 0.8, 0.9 ], "count": 2, "avg_confidence": 0.8115, "accuracy": 1.0 } ] } }, "training": { "epoch": 6, "loss": 2.5877 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.8659, "macro_f1": 0.7757, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.8387, "recall": 0.9176, "f1": 0.8764, "support": 85 }, "correction": { "precision": 0.6296, "recall": 0.5862, "f1": 0.6071, "support": 29 }, "confirmation": { "precision": 0.875, "recall": 0.7, "f1": 0.7778, "support": 20 }, "cancellation": { "precision": 0.6923, "recall": 0.4286, "f1": 0.5294, "support": 21 }, "closure": { "precision": 0.76, "recall": 1.0, "f1": 0.8636, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 78, 3, 0, 2, 2 ], [ 0, 8, 17, 1, 2, 1 ], [ 0, 4, 0, 14, 0, 2 ], [ 0, 3, 7, 1, 9, 1 ], [ 0, 0, 0, 0, 0, 19 ] ] }, "actionability": { "accuracy": 0.6812, "macro_f1": 0.6558, "per_label": { "none": { "precision": 0.6032, "recall": 0.623, "f1": 0.6129, "support": 61 }, "review": { "precision": 0.5974, "recall": 0.5823, "f1": 0.5897, "support": 79 }, "act": { "precision": 0.7647, "recall": 0.7647, "f1": 0.7647, "support": 136 } }, "confusion_matrix": [ [ 38, 15, 8 ], [ 9, 46, 24 ], [ 16, 16, 104 ] ] }, "retention": { "accuracy": 0.6848, "macro_f1": 0.6739, "per_label": { "ephemeral": { "precision": 0.6235, "recall": 0.5955, "f1": 0.6092, "support": 89 }, "useful": { "precision": 0.6855, "recall": 0.7786, "f1": 0.7291, "support": 140 }, "remember": { "precision": 0.8438, "recall": 0.5745, "f1": 0.6835, "support": 47 } }, "confusion_matrix": [ [ 53, 35, 1 ], [ 27, 109, 4 ], [ 5, 15, 27 ] ] }, "urgency": { "accuracy": 0.6449, "macro_f1": 0.5761, "per_label": { "low": { "precision": 0.75, "recall": 0.7721, "f1": 0.7609, "support": 136 }, "medium": { "precision": 0.5413, "recall": 0.6211, "f1": 0.5784, "support": 95 }, "high": { "precision": 0.5185, "recall": 0.3111, "f1": 0.3889, "support": 45 } }, "confusion_matrix": [ [ 105, 27, 4 ], [ 27, 59, 9 ], [ 8, 23, 14 ] ] } }, "overall": { "exact_match": 0.3116, "macro_average_f1": 0.6704, "automation_safe_accuracy": 0.8, "automation_safe_coverage": 0.0362, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.370519, "bins": [ { "range": [ 0.5, 0.6 ], "count": 24, "avg_confidence": 0.5696, "accuracy": 0.25 }, { "range": [ 0.6, 0.7 ], "count": 141, "avg_confidence": 0.6522, "accuracy": 0.2057 }, { "range": [ 0.7, 0.8 ], "count": 101, "avg_confidence": 0.7378, "accuracy": 0.4257 }, { "range": [ 0.8, 0.9 ], "count": 10, "avg_confidence": 0.8123, "accuracy": 0.8 } ] } }, "training": { "epoch": 7, "loss": 2.4515 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.8768, "macro_f1": 0.7893, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.8764, "recall": 0.9176, "f1": 0.8966, "support": 85 }, "correction": { "precision": 0.6786, "recall": 0.6552, "f1": 0.6667, "support": 29 }, "confirmation": { "precision": 0.8235, "recall": 0.7, "f1": 0.7568, "support": 20 }, "cancellation": { "precision": 0.7143, "recall": 0.4762, "f1": 0.5714, "support": 21 }, "closure": { "precision": 0.7308, "recall": 1.0, "f1": 0.8444, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 78, 3, 0, 2, 2 ], [ 0, 5, 19, 2, 2, 1 ], [ 0, 3, 0, 14, 0, 3 ], [ 0, 3, 6, 1, 10, 1 ], [ 0, 0, 0, 0, 0, 19 ] ] }, "actionability": { "accuracy": 0.7101, "macro_f1": 0.6834, "per_label": { "none": { "precision": 0.6786, "recall": 0.623, "f1": 0.6496, "support": 61 }, "review": { "precision": 0.6571, "recall": 0.5823, "f1": 0.6174, "support": 79 }, "act": { "precision": 0.7467, "recall": 0.8235, "f1": 0.7832, "support": 136 } }, "confusion_matrix": [ [ 38, 13, 10 ], [ 5, 46, 28 ], [ 13, 11, 112 ] ] }, "retention": { "accuracy": 0.7029, "macro_f1": 0.6849, "per_label": { "ephemeral": { "precision": 0.6628, "recall": 0.6404, "f1": 0.6514, "support": 89 }, "useful": { "precision": 0.7025, "recall": 0.7929, "f1": 0.745, "support": 140 }, "remember": { "precision": 0.8125, "recall": 0.5532, "f1": 0.6582, "support": 47 } }, "confusion_matrix": [ [ 57, 31, 1 ], [ 24, 111, 5 ], [ 5, 16, 26 ] ] }, "urgency": { "accuracy": 0.6449, "macro_f1": 0.5777, "per_label": { "low": { "precision": 0.7536, "recall": 0.7647, "f1": 0.7591, "support": 136 }, "medium": { "precision": 0.5357, "recall": 0.6316, "f1": 0.5797, "support": 95 }, "high": { "precision": 0.5385, "recall": 0.3111, "f1": 0.3944, "support": 45 } }, "confusion_matrix": [ [ 104, 28, 4 ], [ 27, 60, 8 ], [ 7, 24, 14 ] ] } }, "overall": { "exact_match": 0.3406, "macro_average_f1": 0.6838, "automation_safe_accuracy": 0.8, "automation_safe_coverage": 0.0543, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.347199, "bins": [ { "range": [ 0.5, 0.6 ], "count": 23, "avg_confidence": 0.572, "accuracy": 0.2174 }, { "range": [ 0.6, 0.7 ], "count": 134, "avg_confidence": 0.6518, "accuracy": 0.2239 }, { "range": [ 0.7, 0.8 ], "count": 104, "avg_confidence": 0.7415, "accuracy": 0.4519 }, { "range": [ 0.8, 0.9 ], "count": 15, "avg_confidence": 0.8136, "accuracy": 0.8 } ] } }, "training": { "epoch": 8, "loss": 2.3349 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.8841, "macro_f1": 0.8031, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.8764, "recall": 0.9176, "f1": 0.8966, "support": 85 }, "correction": { "precision": 0.7, "recall": 0.7241, "f1": 0.7119, "support": 29 }, "confirmation": { "precision": 0.875, "recall": 0.7, "f1": 0.7778, "support": 20 }, "cancellation": { "precision": 0.7692, "recall": 0.4762, "f1": 0.5882, "support": 21 }, "closure": { "precision": 0.7308, "recall": 1.0, "f1": 0.8444, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 78, 3, 0, 2, 2 ], [ 0, 5, 21, 1, 1, 1 ], [ 0, 3, 0, 14, 0, 3 ], [ 0, 3, 6, 1, 10, 1 ], [ 0, 0, 0, 0, 0, 19 ] ] }, "actionability": { "accuracy": 0.6993, "macro_f1": 0.6742, "per_label": { "none": { "precision": 0.629, "recall": 0.6393, "f1": 0.6341, "support": 61 }, "review": { "precision": 0.6267, "recall": 0.5949, "f1": 0.6104, "support": 79 }, "act": { "precision": 0.7698, "recall": 0.7868, "f1": 0.7782, "support": 136 } }, "confusion_matrix": [ [ 39, 14, 8 ], [ 8, 47, 24 ], [ 15, 14, 107 ] ] }, "retention": { "accuracy": 0.6739, "macro_f1": 0.6684, "per_label": { "ephemeral": { "precision": 0.5842, "recall": 0.6629, "f1": 0.6211, "support": 89 }, "useful": { "precision": 0.7042, "recall": 0.7143, "f1": 0.7092, "support": 140 }, "remember": { "precision": 0.8182, "recall": 0.5745, "f1": 0.675, "support": 47 } }, "confusion_matrix": [ [ 59, 29, 1 ], [ 35, 100, 5 ], [ 7, 13, 27 ] ] }, "urgency": { "accuracy": 0.6413, "macro_f1": 0.5729, "per_label": { "low": { "precision": 0.7413, "recall": 0.7794, "f1": 0.7599, "support": 136 }, "medium": { "precision": 0.5327, "recall": 0.6, "f1": 0.5644, "support": 95 }, "high": { "precision": 0.5385, "recall": 0.3111, "f1": 0.3944, "support": 45 } }, "confusion_matrix": [ [ 106, 26, 4 ], [ 30, 57, 8 ], [ 7, 24, 14 ] ] } }, "overall": { "exact_match": 0.3188, "macro_average_f1": 0.6797, "automation_safe_accuracy": 0.6818, "automation_safe_coverage": 0.0797, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.370942, "bins": [ { "range": [ 0.5, 0.6 ], "count": 28, "avg_confidence": 0.5705, "accuracy": 0.1071 }, { "range": [ 0.6, 0.7 ], "count": 129, "avg_confidence": 0.6544, "accuracy": 0.2713 }, { "range": [ 0.7, 0.8 ], "count": 97, "avg_confidence": 0.7429, "accuracy": 0.3608 }, { "range": [ 0.8, 0.9 ], "count": 22, "avg_confidence": 0.815, "accuracy": 0.6818 } ] } }, "training": { "epoch": 9, "loss": 2.2636 } }, { "per_head": { "relation_to_previous": { "accuracy": 0.8804, "macro_f1": 0.7966, "per_label": { "new": { "precision": 1.0, "recall": 1.0, "f1": 1.0, "support": 102 }, "follow_up": { "precision": 0.8764, "recall": 0.9176, "f1": 0.8966, "support": 85 }, "correction": { "precision": 0.6897, "recall": 0.6897, "f1": 0.6897, "support": 29 }, "confirmation": { "precision": 0.875, "recall": 0.7, "f1": 0.7778, "support": 20 }, "cancellation": { "precision": 0.7143, "recall": 0.4762, "f1": 0.5714, "support": 21 }, "closure": { "precision": 0.7308, "recall": 1.0, "f1": 0.8444, "support": 19 } }, "confusion_matrix": [ [ 102, 0, 0, 0, 0, 0 ], [ 0, 78, 3, 0, 2, 2 ], [ 0, 5, 20, 1, 2, 1 ], [ 0, 3, 0, 14, 0, 3 ], [ 0, 3, 6, 1, 10, 1 ], [ 0, 0, 0, 0, 0, 19 ] ] }, "actionability": { "accuracy": 0.7174, "macro_f1": 0.697, "per_label": { "none": { "precision": 0.6557, "recall": 0.6557, "f1": 0.6557, "support": 61 }, "review": { "precision": 0.642, "recall": 0.6582, "f1": 0.65, "support": 79 }, "act": { "precision": 0.791, "recall": 0.7794, "f1": 0.7852, "support": 136 } }, "confusion_matrix": [ [ 40, 14, 7 ], [ 6, 52, 21 ], [ 15, 15, 106 ] ] }, "retention": { "accuracy": 0.6848, "macro_f1": 0.6687, "per_label": { "ephemeral": { "precision": 0.6222, "recall": 0.6292, "f1": 0.6257, "support": 89 }, "useful": { "precision": 0.6993, "recall": 0.7643, "f1": 0.7304, "support": 140 }, "remember": { "precision": 0.7879, "recall": 0.5532, "f1": 0.65, "support": 47 } }, "confusion_matrix": [ [ 56, 32, 1 ], [ 27, 107, 6 ], [ 7, 14, 26 ] ] }, "urgency": { "accuracy": 0.6304, "macro_f1": 0.5648, "per_label": { "low": { "precision": 0.7324, "recall": 0.7647, "f1": 0.7482, "support": 136 }, "medium": { "precision": 0.5185, "recall": 0.5895, "f1": 0.5517, "support": 95 }, "high": { "precision": 0.5385, "recall": 0.3111, "f1": 0.3944, "support": 45 } }, "confusion_matrix": [ [ 104, 28, 4 ], [ 31, 56, 8 ], [ 7, 24, 14 ] ] } }, "overall": { "exact_match": 0.337, "macro_average_f1": 0.6818, "automation_safe_accuracy": 0.7, "automation_safe_coverage": 0.0725, "confidence_threshold": 0.8, "confidence_calibration": { "ece": 0.357574, "bins": [ { "range": [ 0.5, 0.6 ], "count": 22, "avg_confidence": 0.5729, "accuracy": 0.1364 }, { "range": [ 0.6, 0.7 ], "count": 128, "avg_confidence": 0.6553, "accuracy": 0.2344 }, { "range": [ 0.7, 0.8 ], "count": 106, "avg_confidence": 0.7441, "accuracy": 0.434 }, { "range": [ 0.8, 0.9 ], "count": 20, "avg_confidence": 0.817, "accuracy": 0.7 } ] } }, "training": { "epoch": 10, "loss": 2.2342 } } ], "best_macro_average_f1": 0.6838 }