tiny-router-checkpoint / history.json
Dexifried's picture
Tiny-router checkpoint (encoder=microsoft/MiniLM-L12-H384-uncased, epochs=10)
db34ed6 verified
{
"epochs": [
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.6775,
"macro_f1": 0.2761,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.4885,
"recall": 1.0,
"f1": 0.6564,
"support": 85
},
"correction": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 29
},
"confirmation": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 20
},
"cancellation": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 21
},
"closure": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
85,
0,
0,
0,
0
],
[
0,
29,
0,
0,
0,
0
],
[
0,
20,
0,
0,
0,
0
],
[
0,
21,
0,
0,
0,
0
],
[
0,
19,
0,
0,
0,
0
]
]
},
"actionability": {
"accuracy": 0.4891,
"macro_f1": 0.219,
"per_label": {
"none": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 61
},
"review": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 79
},
"act": {
"precision": 0.4909,
"recall": 0.9926,
"f1": 0.6569,
"support": 136
}
},
"confusion_matrix": [
[
0,
0,
61
],
[
0,
0,
79
],
[
1,
0,
135
]
]
},
"retention": {
"accuracy": 0.5181,
"macro_f1": 0.2477,
"per_label": {
"ephemeral": {
"precision": 1.0,
"recall": 0.0337,
"f1": 0.0652,
"support": 89
},
"useful": {
"precision": 0.5128,
"recall": 1.0,
"f1": 0.678,
"support": 140
},
"remember": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 47
}
},
"confusion_matrix": [
[
3,
86,
0
],
[
0,
140,
0
],
[
0,
47,
0
]
]
},
"urgency": {
"accuracy": 0.4928,
"macro_f1": 0.2201,
"per_label": {
"low": {
"precision": 0.4928,
"recall": 1.0,
"f1": 0.6602,
"support": 136
},
"medium": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 95
},
"high": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 45
}
},
"confusion_matrix": [
[
136,
0,
0
],
[
95,
0,
0
],
[
45,
0,
0
]
]
}
},
"overall": {
"exact_match": 0.0616,
"macro_average_f1": 0.2407,
"automation_safe_accuracy": 0.0,
"automation_safe_coverage": 0.0,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.463661,
"bins": [
{
"range": [
0.4,
0.5
],
"count": 58,
"avg_confidence": 0.481,
"accuracy": 0.0517
},
{
"range": [
0.5,
0.6
],
"count": 218,
"avg_confidence": 0.537,
"accuracy": 0.0642
}
]
}
},
"training": {
"epoch": 1,
"loss": 4.6568
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.7283,
"macro_f1": 0.4422,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.6124,
"recall": 0.9294,
"f1": 0.7383,
"support": 85
},
"correction": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 29
},
"confirmation": {
"precision": 1.0,
"recall": 0.2,
"f1": 0.3333,
"support": 20
},
"cancellation": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 21
},
"closure": {
"precision": 0.4444,
"recall": 0.8421,
"f1": 0.5818,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
79,
0,
0,
0,
6
],
[
0,
24,
0,
0,
0,
5
],
[
0,
8,
3,
4,
0,
5
],
[
0,
15,
2,
0,
0,
4
],
[
0,
3,
0,
0,
0,
16
]
]
},
"actionability": {
"accuracy": 0.5688,
"macro_f1": 0.5548,
"per_label": {
"none": {
"precision": 0.5303,
"recall": 0.5738,
"f1": 0.5512,
"support": 61
},
"review": {
"precision": 0.4124,
"recall": 0.5063,
"f1": 0.4545,
"support": 79
},
"act": {
"precision": 0.7257,
"recall": 0.6029,
"f1": 0.6586,
"support": 136
}
},
"confusion_matrix": [
[
35,
18,
8
],
[
16,
40,
23
],
[
15,
39,
82
]
]
},
"retention": {
"accuracy": 0.5254,
"macro_f1": 0.3733,
"per_label": {
"ephemeral": {
"precision": 0.4845,
"recall": 0.5281,
"f1": 0.5054,
"support": 89
},
"useful": {
"precision": 0.5475,
"recall": 0.7,
"f1": 0.6144,
"support": 140
},
"remember": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 47
}
},
"confusion_matrix": [
[
47,
42,
0
],
[
42,
98,
0
],
[
8,
39,
0
]
]
},
"urgency": {
"accuracy": 0.5362,
"macro_f1": 0.3555,
"per_label": {
"low": {
"precision": 0.5604,
"recall": 0.8529,
"f1": 0.6764,
"support": 136
},
"medium": {
"precision": 0.4638,
"recall": 0.3368,
"f1": 0.3902,
"support": 95
},
"high": {
"precision": 0.0,
"recall": 0.0,
"f1": 0.0,
"support": 45
}
},
"confusion_matrix": [
[
116,
20,
0
],
[
63,
32,
0
],
[
28,
17,
0
]
]
}
},
"overall": {
"exact_match": 0.1123,
"macro_average_f1": 0.4314,
"automation_safe_accuracy": 0.0,
"automation_safe_coverage": 0.0,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.445582,
"bins": [
{
"range": [
0.4,
0.5
],
"count": 44,
"avg_confidence": 0.4826,
"accuracy": 0.0455
},
{
"range": [
0.5,
0.6
],
"count": 208,
"avg_confidence": 0.5677,
"accuracy": 0.125
},
{
"range": [
0.6,
0.7
],
"count": 24,
"avg_confidence": 0.6107,
"accuracy": 0.125
}
]
}
},
"training": {
"epoch": 2,
"loss": 3.7776
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.808,
"macro_f1": 0.6475,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.7009,
"recall": 0.9647,
"f1": 0.8119,
"support": 85
},
"correction": {
"precision": 0.5294,
"recall": 0.3103,
"f1": 0.3913,
"support": 29
},
"confirmation": {
"precision": 0.9231,
"recall": 0.6,
"f1": 0.7273,
"support": 20
},
"cancellation": {
"precision": 0.75,
"recall": 0.1429,
"f1": 0.24,
"support": 21
},
"closure": {
"precision": 0.6522,
"recall": 0.7895,
"f1": 0.7143,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
82,
1,
0,
0,
2
],
[
0,
16,
9,
1,
1,
2
],
[
0,
4,
0,
12,
0,
4
],
[
0,
11,
7,
0,
3,
0
],
[
0,
4,
0,
0,
0,
15
]
]
},
"actionability": {
"accuracy": 0.6304,
"macro_f1": 0.5798,
"per_label": {
"none": {
"precision": 0.6591,
"recall": 0.4754,
"f1": 0.5524,
"support": 61
},
"review": {
"precision": 0.5882,
"recall": 0.3797,
"f1": 0.4615,
"support": 79
},
"act": {
"precision": 0.6354,
"recall": 0.8456,
"f1": 0.7256,
"support": 136
}
},
"confusion_matrix": [
[
29,
7,
25
],
[
8,
30,
41
],
[
7,
14,
115
]
]
},
"retention": {
"accuracy": 0.6703,
"macro_f1": 0.6498,
"per_label": {
"ephemeral": {
"precision": 0.661,
"recall": 0.4382,
"f1": 0.527,
"support": 89
},
"useful": {
"precision": 0.6398,
"recall": 0.85,
"f1": 0.7301,
"support": 140
},
"remember": {
"precision": 0.871,
"recall": 0.5745,
"f1": 0.6923,
"support": 47
}
},
"confusion_matrix": [
[
39,
49,
1
],
[
18,
119,
3
],
[
2,
18,
27
]
]
},
"urgency": {
"accuracy": 0.5688,
"macro_f1": 0.4235,
"per_label": {
"low": {
"precision": 0.694,
"recall": 0.6838,
"f1": 0.6889,
"support": 136
},
"medium": {
"precision": 0.4565,
"recall": 0.6632,
"f1": 0.5408,
"support": 95
},
"high": {
"precision": 0.25,
"recall": 0.0222,
"f1": 0.0408,
"support": 45
}
},
"confusion_matrix": [
[
93,
42,
1
],
[
30,
63,
2
],
[
11,
33,
1
]
]
}
},
"overall": {
"exact_match": 0.2101,
"macro_average_f1": 0.5752,
"automation_safe_accuracy": 0.0,
"automation_safe_coverage": 0.0,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.396414,
"bins": [
{
"range": [
0.4,
0.5
],
"count": 10,
"avg_confidence": 0.4772,
"accuracy": 0.2
},
{
"range": [
0.5,
0.6
],
"count": 128,
"avg_confidence": 0.5669,
"accuracy": 0.1484
},
{
"range": [
0.6,
0.7
],
"count": 129,
"avg_confidence": 0.64,
"accuracy": 0.2326
},
{
"range": [
0.7,
0.8
],
"count": 9,
"avg_confidence": 0.7196,
"accuracy": 0.7778
}
]
}
},
"training": {
"epoch": 3,
"loss": 3.3415
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.8333,
"macro_f1": 0.694,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.8571,
"recall": 0.9176,
"f1": 0.8864,
"support": 85
},
"correction": {
"precision": 0.5652,
"recall": 0.4483,
"f1": 0.5,
"support": 29
},
"confirmation": {
"precision": 0.8,
"recall": 0.6,
"f1": 0.6857,
"support": 20
},
"cancellation": {
"precision": 0.5455,
"recall": 0.2857,
"f1": 0.375,
"support": 21
},
"closure": {
"precision": 0.5588,
"recall": 1.0,
"f1": 0.717,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
78,
3,
0,
1,
3
],
[
0,
7,
13,
1,
4,
4
],
[
0,
2,
0,
12,
0,
6
],
[
0,
4,
7,
2,
6,
2
],
[
0,
0,
0,
0,
0,
19
]
]
},
"actionability": {
"accuracy": 0.6486,
"macro_f1": 0.6252,
"per_label": {
"none": {
"precision": 0.5634,
"recall": 0.6557,
"f1": 0.6061,
"support": 61
},
"review": {
"precision": 0.5882,
"recall": 0.5063,
"f1": 0.5442,
"support": 79
},
"act": {
"precision": 0.7226,
"recall": 0.7279,
"f1": 0.7253,
"support": 136
}
},
"confusion_matrix": [
[
40,
8,
13
],
[
14,
40,
25
],
[
17,
20,
99
]
]
},
"retention": {
"accuracy": 0.6703,
"macro_f1": 0.6542,
"per_label": {
"ephemeral": {
"precision": 0.6067,
"recall": 0.6067,
"f1": 0.6067,
"support": 89
},
"useful": {
"precision": 0.673,
"recall": 0.7643,
"f1": 0.7157,
"support": 140
},
"remember": {
"precision": 0.8571,
"recall": 0.5106,
"f1": 0.64,
"support": 47
}
},
"confusion_matrix": [
[
54,
35,
0
],
[
29,
107,
4
],
[
6,
17,
24
]
]
},
"urgency": {
"accuracy": 0.5906,
"macro_f1": 0.4633,
"per_label": {
"low": {
"precision": 0.6477,
"recall": 0.8382,
"f1": 0.7308,
"support": 136
},
"medium": {
"precision": 0.4783,
"recall": 0.4632,
"f1": 0.4706,
"support": 95
},
"high": {
"precision": 0.625,
"recall": 0.1111,
"f1": 0.1887,
"support": 45
}
},
"confusion_matrix": [
[
114,
21,
1
],
[
49,
44,
2
],
[
13,
27,
5
]
]
}
},
"overall": {
"exact_match": 0.2319,
"macro_average_f1": 0.6092,
"automation_safe_accuracy": 0.0,
"automation_safe_coverage": 0.0,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.402291,
"bins": [
{
"range": [
0.4,
0.5
],
"count": 4,
"avg_confidence": 0.4762,
"accuracy": 0.0
},
{
"range": [
0.5,
0.6
],
"count": 74,
"avg_confidence": 0.563,
"accuracy": 0.1486
},
{
"range": [
0.6,
0.7
],
"count": 159,
"avg_confidence": 0.6473,
"accuracy": 0.2138
},
{
"range": [
0.7,
0.8
],
"count": 39,
"avg_confidence": 0.7317,
"accuracy": 0.4872
}
]
}
},
"training": {
"epoch": 4,
"loss": 2.9715
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.8442,
"macro_f1": 0.7157,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.8571,
"recall": 0.9176,
"f1": 0.8864,
"support": 85
},
"correction": {
"precision": 0.5926,
"recall": 0.5517,
"f1": 0.5714,
"support": 29
},
"confirmation": {
"precision": 0.8571,
"recall": 0.6,
"f1": 0.7059,
"support": 20
},
"cancellation": {
"precision": 0.6667,
"recall": 0.2857,
"f1": 0.4,
"support": 21
},
"closure": {
"precision": 0.5758,
"recall": 1.0,
"f1": 0.7308,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
78,
3,
0,
1,
3
],
[
0,
7,
16,
1,
2,
3
],
[
0,
2,
0,
12,
0,
6
],
[
0,
4,
8,
1,
6,
2
],
[
0,
0,
0,
0,
0,
19
]
]
},
"actionability": {
"accuracy": 0.6558,
"macro_f1": 0.6342,
"per_label": {
"none": {
"precision": 0.5972,
"recall": 0.7049,
"f1": 0.6466,
"support": 61
},
"review": {
"precision": 0.5493,
"recall": 0.4937,
"f1": 0.52,
"support": 79
},
"act": {
"precision": 0.7444,
"recall": 0.7279,
"f1": 0.7361,
"support": 136
}
},
"confusion_matrix": [
[
43,
12,
6
],
[
12,
39,
28
],
[
17,
20,
99
]
]
},
"retention": {
"accuracy": 0.6703,
"macro_f1": 0.6666,
"per_label": {
"ephemeral": {
"precision": 0.5816,
"recall": 0.6404,
"f1": 0.6096,
"support": 89
},
"useful": {
"precision": 0.6846,
"recall": 0.7286,
"f1": 0.7059,
"support": 140
},
"remember": {
"precision": 0.8966,
"recall": 0.5532,
"f1": 0.6842,
"support": 47
}
},
"confusion_matrix": [
[
57,
32,
0
],
[
35,
102,
3
],
[
6,
15,
26
]
]
},
"urgency": {
"accuracy": 0.5978,
"macro_f1": 0.4845,
"per_label": {
"low": {
"precision": 0.6948,
"recall": 0.7868,
"f1": 0.7379,
"support": 136
},
"medium": {
"precision": 0.4815,
"recall": 0.5474,
"f1": 0.5123,
"support": 95
},
"high": {
"precision": 0.4286,
"recall": 0.1333,
"f1": 0.2034,
"support": 45
}
},
"confusion_matrix": [
[
107,
27,
2
],
[
37,
52,
6
],
[
10,
29,
6
]
]
}
},
"overall": {
"exact_match": 0.25,
"macro_average_f1": 0.6252,
"automation_safe_accuracy": 0.8,
"automation_safe_coverage": 0.0181,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.407114,
"bins": [
{
"range": [
0.4,
0.5
],
"count": 1,
"avg_confidence": 0.4972,
"accuracy": 0.0
},
{
"range": [
0.5,
0.6
],
"count": 55,
"avg_confidence": 0.5704,
"accuracy": 0.1818
},
{
"range": [
0.6,
0.7
],
"count": 143,
"avg_confidence": 0.6475,
"accuracy": 0.1538
},
{
"range": [
0.7,
0.8
],
"count": 72,
"avg_confidence": 0.7343,
"accuracy": 0.4583
},
{
"range": [
0.8,
0.9
],
"count": 5,
"avg_confidence": 0.8067,
"accuracy": 0.8
}
]
}
},
"training": {
"epoch": 5,
"loss": 2.7301
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.8587,
"macro_f1": 0.7646,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.7921,
"recall": 0.9412,
"f1": 0.8602,
"support": 85
},
"correction": {
"precision": 0.64,
"recall": 0.5517,
"f1": 0.5926,
"support": 29
},
"confirmation": {
"precision": 0.8125,
"recall": 0.65,
"f1": 0.7222,
"support": 20
},
"cancellation": {
"precision": 0.8182,
"recall": 0.4286,
"f1": 0.5625,
"support": 21
},
"closure": {
"precision": 0.8095,
"recall": 0.8947,
"f1": 0.85,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
80,
4,
0,
0,
1
],
[
0,
8,
16,
2,
2,
1
],
[
0,
4,
1,
13,
0,
2
],
[
0,
7,
4,
1,
9,
0
],
[
0,
2,
0,
0,
0,
17
]
]
},
"actionability": {
"accuracy": 0.6884,
"macro_f1": 0.6666,
"per_label": {
"none": {
"precision": 0.6333,
"recall": 0.623,
"f1": 0.6281,
"support": 61
},
"review": {
"precision": 0.5976,
"recall": 0.6203,
"f1": 0.6087,
"support": 79
},
"act": {
"precision": 0.7687,
"recall": 0.7574,
"f1": 0.763,
"support": 136
}
},
"confusion_matrix": [
[
38,
14,
9
],
[
8,
49,
22
],
[
14,
19,
103
]
]
},
"retention": {
"accuracy": 0.6703,
"macro_f1": 0.6452,
"per_label": {
"ephemeral": {
"precision": 0.6,
"recall": 0.6067,
"f1": 0.6034,
"support": 89
},
"useful": {
"precision": 0.6707,
"recall": 0.7857,
"f1": 0.7237,
"support": 140
},
"remember": {
"precision": 0.9545,
"recall": 0.4468,
"f1": 0.6087,
"support": 47
}
},
"confusion_matrix": [
[
54,
35,
0
],
[
29,
110,
1
],
[
7,
19,
21
]
]
},
"urgency": {
"accuracy": 0.6196,
"macro_f1": 0.5411,
"per_label": {
"low": {
"precision": 0.7576,
"recall": 0.7353,
"f1": 0.7463,
"support": 136
},
"medium": {
"precision": 0.5,
"recall": 0.6316,
"f1": 0.5581,
"support": 95
},
"high": {
"precision": 0.4583,
"recall": 0.2444,
"f1": 0.3188,
"support": 45
}
},
"confusion_matrix": [
[
100,
32,
4
],
[
26,
60,
9
],
[
6,
28,
11
]
]
}
},
"overall": {
"exact_match": 0.308,
"macro_average_f1": 0.6544,
"automation_safe_accuracy": 1.0,
"automation_safe_coverage": 0.0072,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.360595,
"bins": [
{
"range": [
0.5,
0.6
],
"count": 43,
"avg_confidence": 0.5795,
"accuracy": 0.1628
},
{
"range": [
0.6,
0.7
],
"count": 154,
"avg_confidence": 0.6525,
"accuracy": 0.2338
},
{
"range": [
0.7,
0.8
],
"count": 77,
"avg_confidence": 0.737,
"accuracy": 0.5195
},
{
"range": [
0.8,
0.9
],
"count": 2,
"avg_confidence": 0.8115,
"accuracy": 1.0
}
]
}
},
"training": {
"epoch": 6,
"loss": 2.5877
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.8659,
"macro_f1": 0.7757,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.8387,
"recall": 0.9176,
"f1": 0.8764,
"support": 85
},
"correction": {
"precision": 0.6296,
"recall": 0.5862,
"f1": 0.6071,
"support": 29
},
"confirmation": {
"precision": 0.875,
"recall": 0.7,
"f1": 0.7778,
"support": 20
},
"cancellation": {
"precision": 0.6923,
"recall": 0.4286,
"f1": 0.5294,
"support": 21
},
"closure": {
"precision": 0.76,
"recall": 1.0,
"f1": 0.8636,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
78,
3,
0,
2,
2
],
[
0,
8,
17,
1,
2,
1
],
[
0,
4,
0,
14,
0,
2
],
[
0,
3,
7,
1,
9,
1
],
[
0,
0,
0,
0,
0,
19
]
]
},
"actionability": {
"accuracy": 0.6812,
"macro_f1": 0.6558,
"per_label": {
"none": {
"precision": 0.6032,
"recall": 0.623,
"f1": 0.6129,
"support": 61
},
"review": {
"precision": 0.5974,
"recall": 0.5823,
"f1": 0.5897,
"support": 79
},
"act": {
"precision": 0.7647,
"recall": 0.7647,
"f1": 0.7647,
"support": 136
}
},
"confusion_matrix": [
[
38,
15,
8
],
[
9,
46,
24
],
[
16,
16,
104
]
]
},
"retention": {
"accuracy": 0.6848,
"macro_f1": 0.6739,
"per_label": {
"ephemeral": {
"precision": 0.6235,
"recall": 0.5955,
"f1": 0.6092,
"support": 89
},
"useful": {
"precision": 0.6855,
"recall": 0.7786,
"f1": 0.7291,
"support": 140
},
"remember": {
"precision": 0.8438,
"recall": 0.5745,
"f1": 0.6835,
"support": 47
}
},
"confusion_matrix": [
[
53,
35,
1
],
[
27,
109,
4
],
[
5,
15,
27
]
]
},
"urgency": {
"accuracy": 0.6449,
"macro_f1": 0.5761,
"per_label": {
"low": {
"precision": 0.75,
"recall": 0.7721,
"f1": 0.7609,
"support": 136
},
"medium": {
"precision": 0.5413,
"recall": 0.6211,
"f1": 0.5784,
"support": 95
},
"high": {
"precision": 0.5185,
"recall": 0.3111,
"f1": 0.3889,
"support": 45
}
},
"confusion_matrix": [
[
105,
27,
4
],
[
27,
59,
9
],
[
8,
23,
14
]
]
}
},
"overall": {
"exact_match": 0.3116,
"macro_average_f1": 0.6704,
"automation_safe_accuracy": 0.8,
"automation_safe_coverage": 0.0362,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.370519,
"bins": [
{
"range": [
0.5,
0.6
],
"count": 24,
"avg_confidence": 0.5696,
"accuracy": 0.25
},
{
"range": [
0.6,
0.7
],
"count": 141,
"avg_confidence": 0.6522,
"accuracy": 0.2057
},
{
"range": [
0.7,
0.8
],
"count": 101,
"avg_confidence": 0.7378,
"accuracy": 0.4257
},
{
"range": [
0.8,
0.9
],
"count": 10,
"avg_confidence": 0.8123,
"accuracy": 0.8
}
]
}
},
"training": {
"epoch": 7,
"loss": 2.4515
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.8768,
"macro_f1": 0.7893,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.8764,
"recall": 0.9176,
"f1": 0.8966,
"support": 85
},
"correction": {
"precision": 0.6786,
"recall": 0.6552,
"f1": 0.6667,
"support": 29
},
"confirmation": {
"precision": 0.8235,
"recall": 0.7,
"f1": 0.7568,
"support": 20
},
"cancellation": {
"precision": 0.7143,
"recall": 0.4762,
"f1": 0.5714,
"support": 21
},
"closure": {
"precision": 0.7308,
"recall": 1.0,
"f1": 0.8444,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
78,
3,
0,
2,
2
],
[
0,
5,
19,
2,
2,
1
],
[
0,
3,
0,
14,
0,
3
],
[
0,
3,
6,
1,
10,
1
],
[
0,
0,
0,
0,
0,
19
]
]
},
"actionability": {
"accuracy": 0.7101,
"macro_f1": 0.6834,
"per_label": {
"none": {
"precision": 0.6786,
"recall": 0.623,
"f1": 0.6496,
"support": 61
},
"review": {
"precision": 0.6571,
"recall": 0.5823,
"f1": 0.6174,
"support": 79
},
"act": {
"precision": 0.7467,
"recall": 0.8235,
"f1": 0.7832,
"support": 136
}
},
"confusion_matrix": [
[
38,
13,
10
],
[
5,
46,
28
],
[
13,
11,
112
]
]
},
"retention": {
"accuracy": 0.7029,
"macro_f1": 0.6849,
"per_label": {
"ephemeral": {
"precision": 0.6628,
"recall": 0.6404,
"f1": 0.6514,
"support": 89
},
"useful": {
"precision": 0.7025,
"recall": 0.7929,
"f1": 0.745,
"support": 140
},
"remember": {
"precision": 0.8125,
"recall": 0.5532,
"f1": 0.6582,
"support": 47
}
},
"confusion_matrix": [
[
57,
31,
1
],
[
24,
111,
5
],
[
5,
16,
26
]
]
},
"urgency": {
"accuracy": 0.6449,
"macro_f1": 0.5777,
"per_label": {
"low": {
"precision": 0.7536,
"recall": 0.7647,
"f1": 0.7591,
"support": 136
},
"medium": {
"precision": 0.5357,
"recall": 0.6316,
"f1": 0.5797,
"support": 95
},
"high": {
"precision": 0.5385,
"recall": 0.3111,
"f1": 0.3944,
"support": 45
}
},
"confusion_matrix": [
[
104,
28,
4
],
[
27,
60,
8
],
[
7,
24,
14
]
]
}
},
"overall": {
"exact_match": 0.3406,
"macro_average_f1": 0.6838,
"automation_safe_accuracy": 0.8,
"automation_safe_coverage": 0.0543,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.347199,
"bins": [
{
"range": [
0.5,
0.6
],
"count": 23,
"avg_confidence": 0.572,
"accuracy": 0.2174
},
{
"range": [
0.6,
0.7
],
"count": 134,
"avg_confidence": 0.6518,
"accuracy": 0.2239
},
{
"range": [
0.7,
0.8
],
"count": 104,
"avg_confidence": 0.7415,
"accuracy": 0.4519
},
{
"range": [
0.8,
0.9
],
"count": 15,
"avg_confidence": 0.8136,
"accuracy": 0.8
}
]
}
},
"training": {
"epoch": 8,
"loss": 2.3349
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.8841,
"macro_f1": 0.8031,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.8764,
"recall": 0.9176,
"f1": 0.8966,
"support": 85
},
"correction": {
"precision": 0.7,
"recall": 0.7241,
"f1": 0.7119,
"support": 29
},
"confirmation": {
"precision": 0.875,
"recall": 0.7,
"f1": 0.7778,
"support": 20
},
"cancellation": {
"precision": 0.7692,
"recall": 0.4762,
"f1": 0.5882,
"support": 21
},
"closure": {
"precision": 0.7308,
"recall": 1.0,
"f1": 0.8444,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
78,
3,
0,
2,
2
],
[
0,
5,
21,
1,
1,
1
],
[
0,
3,
0,
14,
0,
3
],
[
0,
3,
6,
1,
10,
1
],
[
0,
0,
0,
0,
0,
19
]
]
},
"actionability": {
"accuracy": 0.6993,
"macro_f1": 0.6742,
"per_label": {
"none": {
"precision": 0.629,
"recall": 0.6393,
"f1": 0.6341,
"support": 61
},
"review": {
"precision": 0.6267,
"recall": 0.5949,
"f1": 0.6104,
"support": 79
},
"act": {
"precision": 0.7698,
"recall": 0.7868,
"f1": 0.7782,
"support": 136
}
},
"confusion_matrix": [
[
39,
14,
8
],
[
8,
47,
24
],
[
15,
14,
107
]
]
},
"retention": {
"accuracy": 0.6739,
"macro_f1": 0.6684,
"per_label": {
"ephemeral": {
"precision": 0.5842,
"recall": 0.6629,
"f1": 0.6211,
"support": 89
},
"useful": {
"precision": 0.7042,
"recall": 0.7143,
"f1": 0.7092,
"support": 140
},
"remember": {
"precision": 0.8182,
"recall": 0.5745,
"f1": 0.675,
"support": 47
}
},
"confusion_matrix": [
[
59,
29,
1
],
[
35,
100,
5
],
[
7,
13,
27
]
]
},
"urgency": {
"accuracy": 0.6413,
"macro_f1": 0.5729,
"per_label": {
"low": {
"precision": 0.7413,
"recall": 0.7794,
"f1": 0.7599,
"support": 136
},
"medium": {
"precision": 0.5327,
"recall": 0.6,
"f1": 0.5644,
"support": 95
},
"high": {
"precision": 0.5385,
"recall": 0.3111,
"f1": 0.3944,
"support": 45
}
},
"confusion_matrix": [
[
106,
26,
4
],
[
30,
57,
8
],
[
7,
24,
14
]
]
}
},
"overall": {
"exact_match": 0.3188,
"macro_average_f1": 0.6797,
"automation_safe_accuracy": 0.6818,
"automation_safe_coverage": 0.0797,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.370942,
"bins": [
{
"range": [
0.5,
0.6
],
"count": 28,
"avg_confidence": 0.5705,
"accuracy": 0.1071
},
{
"range": [
0.6,
0.7
],
"count": 129,
"avg_confidence": 0.6544,
"accuracy": 0.2713
},
{
"range": [
0.7,
0.8
],
"count": 97,
"avg_confidence": 0.7429,
"accuracy": 0.3608
},
{
"range": [
0.8,
0.9
],
"count": 22,
"avg_confidence": 0.815,
"accuracy": 0.6818
}
]
}
},
"training": {
"epoch": 9,
"loss": 2.2636
}
},
{
"per_head": {
"relation_to_previous": {
"accuracy": 0.8804,
"macro_f1": 0.7966,
"per_label": {
"new": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 102
},
"follow_up": {
"precision": 0.8764,
"recall": 0.9176,
"f1": 0.8966,
"support": 85
},
"correction": {
"precision": 0.6897,
"recall": 0.6897,
"f1": 0.6897,
"support": 29
},
"confirmation": {
"precision": 0.875,
"recall": 0.7,
"f1": 0.7778,
"support": 20
},
"cancellation": {
"precision": 0.7143,
"recall": 0.4762,
"f1": 0.5714,
"support": 21
},
"closure": {
"precision": 0.7308,
"recall": 1.0,
"f1": 0.8444,
"support": 19
}
},
"confusion_matrix": [
[
102,
0,
0,
0,
0,
0
],
[
0,
78,
3,
0,
2,
2
],
[
0,
5,
20,
1,
2,
1
],
[
0,
3,
0,
14,
0,
3
],
[
0,
3,
6,
1,
10,
1
],
[
0,
0,
0,
0,
0,
19
]
]
},
"actionability": {
"accuracy": 0.7174,
"macro_f1": 0.697,
"per_label": {
"none": {
"precision": 0.6557,
"recall": 0.6557,
"f1": 0.6557,
"support": 61
},
"review": {
"precision": 0.642,
"recall": 0.6582,
"f1": 0.65,
"support": 79
},
"act": {
"precision": 0.791,
"recall": 0.7794,
"f1": 0.7852,
"support": 136
}
},
"confusion_matrix": [
[
40,
14,
7
],
[
6,
52,
21
],
[
15,
15,
106
]
]
},
"retention": {
"accuracy": 0.6848,
"macro_f1": 0.6687,
"per_label": {
"ephemeral": {
"precision": 0.6222,
"recall": 0.6292,
"f1": 0.6257,
"support": 89
},
"useful": {
"precision": 0.6993,
"recall": 0.7643,
"f1": 0.7304,
"support": 140
},
"remember": {
"precision": 0.7879,
"recall": 0.5532,
"f1": 0.65,
"support": 47
}
},
"confusion_matrix": [
[
56,
32,
1
],
[
27,
107,
6
],
[
7,
14,
26
]
]
},
"urgency": {
"accuracy": 0.6304,
"macro_f1": 0.5648,
"per_label": {
"low": {
"precision": 0.7324,
"recall": 0.7647,
"f1": 0.7482,
"support": 136
},
"medium": {
"precision": 0.5185,
"recall": 0.5895,
"f1": 0.5517,
"support": 95
},
"high": {
"precision": 0.5385,
"recall": 0.3111,
"f1": 0.3944,
"support": 45
}
},
"confusion_matrix": [
[
104,
28,
4
],
[
31,
56,
8
],
[
7,
24,
14
]
]
}
},
"overall": {
"exact_match": 0.337,
"macro_average_f1": 0.6818,
"automation_safe_accuracy": 0.7,
"automation_safe_coverage": 0.0725,
"confidence_threshold": 0.8,
"confidence_calibration": {
"ece": 0.357574,
"bins": [
{
"range": [
0.5,
0.6
],
"count": 22,
"avg_confidence": 0.5729,
"accuracy": 0.1364
},
{
"range": [
0.6,
0.7
],
"count": 128,
"avg_confidence": 0.6553,
"accuracy": 0.2344
},
{
"range": [
0.7,
0.8
],
"count": 106,
"avg_confidence": 0.7441,
"accuracy": 0.434
},
{
"range": [
0.8,
0.9
],
"count": 20,
"avg_confidence": 0.817,
"accuracy": 0.7
}
]
}
},
"training": {
"epoch": 10,
"loss": 2.2342
}
}
],
"best_macro_average_f1": 0.6838
}