wanderlust-chatbot / docs /eval_intent.json
Kiriten892's picture
Wave 2: compare_destinations data + NER fixes + evaluation update
6feeee8
Raw
History Blame Contribute Delete
3.63 kB
{
"model": "intent_classifier",
"test_file": "app/data/datasets/intent_test.json",
"is_heldout": true,
"samples": 1094,
"accuracy": 0.9387568555758684,
"macro_f1": 0.941181830217566,
"per_intent": {
"activity_suggest": {
"precision": 0.8770491803278688,
"recall": 0.9224137931034483,
"f1": 0.8991596638655462,
"support": 116
},
"budget_advice": {
"precision": 0.9435483870967742,
"recall": 0.975,
"f1": 0.9590163934426229,
"support": 120
},
"compare_destinations": {
"precision": 0.8947368421052632,
"recall": 0.9444444444444444,
"f1": 0.918918918918919,
"support": 18
},
"fallback": {
"precision": 0.905982905982906,
"recall": 0.8833333333333333,
"f1": 0.8945147679324894,
"support": 120
},
"find_flight": {
"precision": 0.952,
"recall": 0.9916666666666667,
"f1": 0.9714285714285714,
"support": 120
},
"find_hotel": {
"precision": 0.9666666666666667,
"recall": 0.9666666666666667,
"f1": 0.9666666666666667,
"support": 30
},
"food_recommend": {
"precision": 0.8879310344827587,
"recall": 0.8583333333333333,
"f1": 0.8728813559322034,
"support": 120
},
"greeting": {
"precision": 0.84375,
"recall": 0.9,
"f1": 0.8709677419354839,
"support": 30
},
"plan_trip": {
"precision": 0.991304347826087,
"recall": 0.95,
"f1": 0.9702127659574468,
"support": 120
},
"thank": {
"precision": 1.0,
"recall": 1.0,
"f1": 1.0,
"support": 30
},
"transport_info": {
"precision": 0.991304347826087,
"recall": 0.95,
"f1": 0.9702127659574468,
"support": 120
},
"visa_info": {
"precision": 1.0,
"recall": 0.9666666666666667,
"f1": 0.9830508474576272,
"support": 30
},
"weather_info": {
"precision": 0.9583333333333334,
"recall": 0.9583333333333334,
"f1": 0.9583333333333334,
"support": 120
}
},
"flagged_low_f1": [],
"top_confusions": [
[
"food_recommend",
"activity_suggest",
8
],
[
"fallback",
"food_recommend",
6
],
[
"food_recommend",
"fallback",
6
],
[
"activity_suggest",
"food_recommend",
5
],
[
"budget_advice",
"find_flight",
3
],
[
"transport_info",
"find_flight",
3
],
[
"activity_suggest",
"budget_advice",
2
],
[
"fallback",
"weather_info",
2
],
[
"plan_trip",
"activity_suggest",
2
],
[
"weather_info",
"fallback",
2
],
[
"activity_suggest",
"fallback",
1
],
[
"activity_suggest",
"greeting",
1
],
[
"compare_destinations",
"food_recommend",
1
],
[
"fallback",
"activity_suggest",
1
],
[
"fallback",
"budget_advice",
1
],
[
"fallback",
"compare_destinations",
1
],
[
"fallback",
"greeting",
1
],
[
"fallback",
"plan_trip",
1
],
[
"fallback",
"transport_info",
1
],
[
"find_flight",
"budget_advice",
1
]
],
"confidence_distribution": {
"high": 862,
"mid": 215,
"low": 17
},
"method_counts": {
"ml_model": 1060,
"ood_rejector": 2,
"keyword_fallback": 32
}
}