agentic-intent-classifier / artifacts /evaluation /latest /decision_phase_difficulty_benchmark_report.json
| { | |
| "accepted_accuracy": 0.9619, | |
| "accepted_coverage": 1.0, | |
| "accuracy": 0.9619, | |
| "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv", | |
| "count": 105, | |
| "dataset_path": "/content/agentic-intent-classifier/data/decision_phase_benchmark.jsonl", | |
| "difficulty_breakdown": { | |
| "easy": { | |
| "accepted_accuracy": 0.9714, | |
| "accepted_coverage": 1.0, | |
| "accuracy": 0.9714, | |
| "count": 35, | |
| "fallback_rate": 0.0, | |
| "macro_f1": 0.9711 | |
| }, | |
| "hard": { | |
| "accepted_accuracy": 0.9143, | |
| "accepted_coverage": 1.0, | |
| "accuracy": 0.9143, | |
| "count": 35, | |
| "fallback_rate": 0.0, | |
| "macro_f1": 0.9194 | |
| }, | |
| "medium": { | |
| "accepted_accuracy": 1.0, | |
| "accepted_coverage": 1.0, | |
| "accuracy": 1.0, | |
| "count": 35, | |
| "fallback_rate": 0.0, | |
| "macro_f1": 1.0 | |
| } | |
| }, | |
| "fallback_rate": 0.0, | |
| "head": "decision_phase", | |
| "macro_f1": 0.9635, | |
| "per_class_metrics": { | |
| "accuracy": 0.9619047619047619, | |
| "action": { | |
| "f1-score": 0.9655172413793104, | |
| "precision": 1.0, | |
| "recall": 0.9333333333333333, | |
| "support": 15.0 | |
| }, | |
| "awareness": { | |
| "f1-score": 0.9655172413793104, | |
| "precision": 1.0, | |
| "recall": 0.9333333333333333, | |
| "support": 15.0 | |
| }, | |
| "consideration": { | |
| "f1-score": 0.9655172413793104, | |
| "precision": 1.0, | |
| "recall": 0.9333333333333333, | |
| "support": 15.0 | |
| }, | |
| "decision": { | |
| "f1-score": 0.9655172413793104, | |
| "precision": 1.0, | |
| "recall": 0.9333333333333333, | |
| "support": 15.0 | |
| }, | |
| "macro avg": { | |
| "f1-score": 0.9634888438133874, | |
| "precision": 0.9699248120300752, | |
| "recall": 0.9619047619047619, | |
| "support": 105.0 | |
| }, | |
| "post_purchase": { | |
| "f1-score": 1.0, | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "support": 15.0 | |
| }, | |
| "research": { | |
| "f1-score": 0.8823529411764706, | |
| "precision": 0.7894736842105263, | |
| "recall": 1.0, | |
| "support": 15.0 | |
| }, | |
| "support": { | |
| "f1-score": 1.0, | |
| "precision": 1.0, | |
| "recall": 1.0, | |
| "support": 15.0 | |
| }, | |
| "weighted avg": { | |
| "f1-score": 0.9634888438133875, | |
| "precision": 0.9699248120300752, | |
| "recall": 0.9619047619047619, | |
| "support": 105.0 | |
| } | |
| }, | |
| "suite": "difficulty_benchmark" | |
| } | |