File size: 2,389 Bytes
0584798 53d5d9f 0584798 53d5d9f 1519226 0584798 1519226 0584798 53d5d9f 0584798 53d5d9f 0584798 53d5d9f 0584798 53d5d9f 0584798 53d5d9f 0584798 37d98fb 0584798 37d98fb 0584798 bedab52 0584798 bedab52 0584798 53d5d9f 0584798 53d5d9f 0584798 53d5d9f 37d98fb 0584798 53d5d9f 0584798 1519226 0584798 37d98fb 1519226 0584798 53d5d9f 0584798 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | {
"accepted_accuracy": 0.9619,
"accepted_coverage": 1.0,
"accuracy": 0.9619,
"confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
"count": 105,
"dataset_path": "/content/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
"difficulty_breakdown": {
"easy": {
"accepted_accuracy": 0.9714,
"accepted_coverage": 1.0,
"accuracy": 0.9714,
"count": 35,
"fallback_rate": 0.0,
"macro_f1": 0.9711
},
"hard": {
"accepted_accuracy": 0.9143,
"accepted_coverage": 1.0,
"accuracy": 0.9143,
"count": 35,
"fallback_rate": 0.0,
"macro_f1": 0.9194
},
"medium": {
"accepted_accuracy": 1.0,
"accepted_coverage": 1.0,
"accuracy": 1.0,
"count": 35,
"fallback_rate": 0.0,
"macro_f1": 1.0
}
},
"fallback_rate": 0.0,
"head": "decision_phase",
"macro_f1": 0.9635,
"per_class_metrics": {
"accuracy": 0.9619047619047619,
"action": {
"f1-score": 0.9655172413793104,
"precision": 1.0,
"recall": 0.9333333333333333,
"support": 15.0
},
"awareness": {
"f1-score": 0.9655172413793104,
"precision": 1.0,
"recall": 0.9333333333333333,
"support": 15.0
},
"consideration": {
"f1-score": 0.9655172413793104,
"precision": 1.0,
"recall": 0.9333333333333333,
"support": 15.0
},
"decision": {
"f1-score": 0.9655172413793104,
"precision": 1.0,
"recall": 0.9333333333333333,
"support": 15.0
},
"macro avg": {
"f1-score": 0.9634888438133874,
"precision": 0.9699248120300752,
"recall": 0.9619047619047619,
"support": 105.0
},
"post_purchase": {
"f1-score": 1.0,
"precision": 1.0,
"recall": 1.0,
"support": 15.0
},
"research": {
"f1-score": 0.8823529411764706,
"precision": 0.7894736842105263,
"recall": 1.0,
"support": 15.0
},
"support": {
"f1-score": 1.0,
"precision": 1.0,
"recall": 1.0,
"support": 15.0
},
"weighted avg": {
"f1-score": 0.9634888438133875,
"precision": 0.9699248120300752,
"recall": 0.9619047619047619,
"support": 105.0
}
},
"suite": "difficulty_benchmark"
}
|