manikumargouni commited on
Commit
b751bb5
·
verified ·
1 Parent(s): b2e10ba

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. HF_MODEL_CARD.md +208 -0
  2. README.md +449 -54
  3. artifacts/calibration/decision_phase.json +9 -9
  4. artifacts/calibration/iab_content.json +14 -14
  5. artifacts/calibration/intent_subtype.json +13 -13
  6. artifacts/calibration/intent_type.json +13 -13
  7. artifacts/evaluation/latest/combined_demo_benchmark.json +168 -245
  8. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv +3 -3
  9. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json +26 -26
  10. artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json +2 -2
  11. artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv +1 -1
  12. artifacts/evaluation/latest/decision_phase_hard_cases_report.json +16 -16
  13. artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv +2 -2
  14. artifacts/evaluation/latest/decision_phase_test_report.json +20 -20
  15. artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv +3 -3
  16. artifacts/evaluation/latest/decision_phase_train_report.json +22 -22
  17. artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv +1 -1
  18. artifacts/evaluation/latest/decision_phase_val_report.json +16 -16
  19. artifacts/evaluation/latest/iab_behavior_lock_regression.json +197 -55
  20. artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json +68 -63
  21. artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json +65 -61
  22. artifacts/evaluation/latest/iab_content_extended_cases_report.json +41 -36
  23. artifacts/evaluation/latest/iab_content_hard_cases_report.json +42 -42
  24. artifacts/evaluation/latest/iab_content_test_report.json +39 -34
  25. artifacts/evaluation/latest/iab_content_train_report.json +46 -50
  26. artifacts/evaluation/latest/iab_content_val_report.json +46 -50
  27. artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json +0 -0
  28. artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json +457 -706
  29. artifacts/evaluation/latest/iab_quality_target_eval.json +74 -77
  30. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv +11 -11
  31. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json +62 -62
  32. artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv +3 -3
  33. artifacts/evaluation/latest/intent_subtype_extended_cases_report.json +23 -23
  34. artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv +4 -4
  35. artifacts/evaluation/latest/intent_subtype_hard_cases_report.json +32 -32
  36. artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv +5 -5
  37. artifacts/evaluation/latest/intent_subtype_test_report.json +29 -29
  38. artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv +10 -10
  39. artifacts/evaluation/latest/intent_subtype_train_report.json +48 -48
  40. artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv +5 -5
  41. artifacts/evaluation/latest/intent_subtype_val_report.json +33 -33
  42. artifacts/evaluation/latest/intent_type_difficulty_benchmark_confusion_matrix.csv +1 -1
  43. artifacts/evaluation/latest/intent_type_difficulty_benchmark_report.json +19 -19
  44. artifacts/evaluation/latest/intent_type_hard_cases_report.json +2 -2
  45. artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv +3 -3
  46. artifacts/evaluation/latest/intent_type_test_report.json +23 -23
  47. artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv +2 -2
  48. artifacts/evaluation/latest/intent_type_third_wave_cases_report.json +13 -13
  49. artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv +1 -1
  50. artifacts/evaluation/latest/intent_type_train_report.json +16 -16
HF_MODEL_CARD.md ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: transformers
5
+ pipeline_tag: text-classification
6
+ base_model: distilbert-base-uncased
7
+ metrics:
8
+ - accuracy
9
+ - f1
10
+ tags:
11
+ - intent-classification
12
+ - multitask
13
+ - iab
14
+ - conversational-ai
15
+ - adtech
16
+ - calibrated-confidence
17
+ license: apache-2.0
18
+ ---
19
+
20
+ # admesh/agentic-intent-classifier
21
+
22
+ Production-ready intent + IAB classifier bundle for conversational traffic.
23
+
24
+ Combines multitask intent modeling, supervised IAB content classification, and per-head confidence calibration to support safe monetization decisions in real time.
25
+
26
+ ## Links
27
+
28
+ - Hugging Face: https://huggingface.co/admesh/agentic-intent-classifier
29
+ - GitHub: https://github.com/GouniManikumar12/agentic-intent-classifier
30
+
31
+ ## What It Predicts
32
+
33
+ | Field | Description |
34
+ |---|---|
35
+ | `intent.type` | `commercial`, `informational`, `navigational`, `transactional`, … |
36
+ | `intent.subtype` | `product_discovery`, `comparison`, `how_to`, … |
37
+ | `intent.decision_phase` | `awareness`, `consideration`, `decision`, … |
38
+ | `iab_content` | IAB Content Taxonomy 3.0 tier1 / tier2 / tier3 labels |
39
+ | `component_confidence` | Per-head calibrated confidence with threshold flags |
40
+ | `system_decision` | Monetization eligibility, opportunity type, policy |
41
+
42
+ ---
43
+
44
+ ## Deployment Options
45
+
46
+ ### 1. `transformers.pipeline()` — one line anywhere
47
+
48
+ ```python
49
+ from transformers import pipeline
50
+
51
+ clf = pipeline(
52
+ "admesh-intent",
53
+ model="admesh/agentic-intent-classifier",
54
+ trust_remote_code=True,
55
+ )
56
+
57
+ result = clf("Which laptop should I buy for college?")
58
+ ```
59
+
60
+ Batch and custom thresholds:
61
+
62
+ ```python
63
+ # batch
64
+ results = clf([
65
+ "Best running shoes under $100",
66
+ "How does TCP work?",
67
+ "Buy noise-cancelling headphones",
68
+ ])
69
+
70
+ # custom confidence thresholds
71
+ result = clf(
72
+ "Buy headphones",
73
+ threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
74
+ )
75
+ ```
76
+
77
+ ---
78
+
79
+ ### 2. HF Inference Endpoints (managed, deploy to AWS / Azure / GCP)
80
+
81
+ 1. Go to https://ui.endpoints.huggingface.co
82
+ 2. **New Endpoint** → select `admesh/agentic-intent-classifier`
83
+ 3. Framework: **PyTorch** — Task: **Text Classification**
84
+ 4. Enable **"Load with trust_remote_code"**
85
+ 5. Deploy
86
+
87
+ The endpoint serves the same `pipeline()` interface above via REST:
88
+
89
+ ```bash
90
+ curl https://<your-endpoint>.endpoints.huggingface.cloud \
91
+ -H "Authorization: Bearer $HF_TOKEN" \
92
+ -H "Content-Type: application/json" \
93
+ -d '{"inputs": "Which laptop should I buy for college?"}'
94
+ ```
95
+
96
+ ---
97
+
98
+ ### 3. HF Spaces (Gradio / Streamlit demo)
99
+
100
+ ```python
101
+ # app.py for a Gradio Space
102
+ import gradio as gr
103
+ from transformers import pipeline
104
+
105
+ clf = pipeline(
106
+ "admesh-intent",
107
+ model="admesh/agentic-intent-classifier",
108
+ trust_remote_code=True,
109
+ )
110
+
111
+ def classify(text):
112
+ return clf(text)
113
+
114
+ gr.Interface(fn=classify, inputs="text", outputs="json").launch()
115
+ ```
116
+
117
+ ---
118
+
119
+ ### 4. Local / notebook via `snapshot_download`
120
+
121
+ ```python
122
+ import sys
123
+ from huggingface_hub import snapshot_download
124
+
125
+ local_dir = snapshot_download(
126
+ repo_id="admesh/agentic-intent-classifier",
127
+ repo_type="model",
128
+ )
129
+ sys.path.insert(0, local_dir)
130
+
131
+ from pipeline import AdmeshIntentPipeline
132
+ clf = AdmeshIntentPipeline()
133
+ result = clf("I need a CRM for a 5-person startup")
134
+ ```
135
+
136
+ Or the one-liner factory:
137
+
138
+ ```python
139
+ from pipeline import AdmeshIntentPipeline
140
+ clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Example Output
146
+
147
+ ```json
148
+ {
149
+ "model_output": {
150
+ "classification": {
151
+ "iab_content": {
152
+ "taxonomy": "IAB Content Taxonomy",
153
+ "taxonomy_version": "3.0",
154
+ "tier1": {"id": "552", "label": "Style & Fashion"},
155
+ "tier2": {"id": "579", "label": "Men's Fashion"},
156
+ "mapping_mode": "exact",
157
+ "mapping_confidence": 0.73
158
+ },
159
+ "intent": {
160
+ "type": "commercial",
161
+ "subtype": "product_discovery",
162
+ "decision_phase": "consideration",
163
+ "confidence": 0.9549,
164
+ "commercial_score": 0.656
165
+ }
166
+ }
167
+ },
168
+ "system_decision": {
169
+ "policy": {
170
+ "monetization_eligibility": "allowed_with_caution",
171
+ "eligibility_reason": "commercial_discovery_signal_present"
172
+ },
173
+ "opportunity": {"type": "soft_recommendation", "strength": "medium"}
174
+ },
175
+ "meta": {
176
+ "system_version": "0.6.0-phase4",
177
+ "calibration_enabled": true,
178
+ "iab_mapping_is_placeholder": false
179
+ }
180
+ }
181
+ ```
182
+
183
+ ## Reproducible Revision
184
+
185
+ ```python
186
+ from huggingface_hub import snapshot_download
187
+ local_dir = snapshot_download(
188
+ repo_id="admesh/agentic-intent-classifier",
189
+ repo_type="model",
190
+ revision="0584798f8efee6beccd778b0afa06782ab5add60",
191
+ )
192
+ ```
193
+
194
+ ## Included Artifacts
195
+
196
+ | Path | Contents |
197
+ |---|---|
198
+ | `multitask_intent_model_output/` | DistilBERT multitask weights + tokenizer |
199
+ | `iab_classifier_model_output/` | IAB content classifier weights + tokenizer |
200
+ | `artifacts/calibration/` | Per-head temperature + threshold JSONs |
201
+ | `pipeline.py` | `AdmeshIntentPipeline` (transformers.Pipeline subclass) |
202
+ | `combined_inference.py` | Core inference logic |
203
+
204
+ ## Notes
205
+
206
+ - `trust_remote_code=True` is required because this model uses a custom multi-head architecture that does not map to a single standard `AutoModel` checkpoint.
207
+ - `meta.iab_mapping_is_placeholder: true` means IAB artifacts were missing or skipped; train and calibrate IAB for full production accuracy.
208
+ - For long-running servers, instantiate once and reuse — models are cached in memory after the first call.
README.md CHANGED
@@ -1,81 +1,188 @@
1
- ---
2
- language:
3
- - en
4
- library_name: transformers
5
- pipeline_tag: text-classification
6
- base_model: distilbert-base-uncased
7
- metrics:
8
- - accuracy
9
- - f1
10
- tags:
11
- - intent-classification
12
- - multitask
13
- - iab
14
- - conversational-ai
15
- - adtech
16
- - calibrated-confidence
17
- license: apache-2.0
18
- ---
19
-
20
- # admesh/agentic-intent-classifier
21
 
22
- Production-ready intent + IAB classifier bundle for conversational traffic.
23
 
24
- This package combines multitask intent modeling, supervised IAB classification, and confidence calibration to support safe monetization decisions in real time.
25
-
26
- ## What It Predicts
27
 
28
  - `intent.type`
29
  - `intent.subtype`
30
  - `intent.decision_phase`
31
  - `iab_content`
32
- - per-head calibrated confidence
33
- - fallback/policy/opportunity decision envelope
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- ## Why It Is Useful
36
 
37
- - Single package for intent, phase, subtype, and IAB routing
38
- - Calibrated thresholds for safer downstream decisions
39
- - Works out of the box with `combined_inference.py` and `demo_api.py`
40
- - Easy local run, Colab run, or server integration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- ## Links
43
 
44
- - Hugging Face model: https://huggingface.co/admesh/agentic-intent-classifier
45
- - GitHub source: https://github.com/GouniManikumar12/agentic-intent-classifier
46
 
47
- ## Quick Start
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  ```python
 
50
  from huggingface_hub import snapshot_download
51
 
 
52
  local_dir = snapshot_download(
53
  repo_id="admesh/agentic-intent-classifier",
54
  repo_type="model",
55
  )
56
- print(local_dir)
 
 
 
 
 
 
 
 
 
57
  ```
58
 
59
- ```bash
60
- cd "<LOCAL_DIR_FROM_PRINT>"
61
- python3 training/pipeline_verify.py
62
- python3 combined_inference.py "Which laptop should I buy for college?"
 
 
 
63
  ```
64
 
65
- ## API Mode
66
 
67
- ```bash
68
- cd "<LOCAL_DIR_FROM_PRINT>"
69
- python3 demo_api.py
 
 
 
 
 
 
 
 
 
 
70
  ```
71
 
 
 
72
  ```bash
73
- curl -sS -X POST http://127.0.0.1:8008/classify \
74
- -H 'Content-Type: application/json' \
75
- -d '{"text":"I need CRM for a 5 person startup"}'
76
  ```
77
 
78
- ## Reproducible Revision
79
 
80
  ```python
81
  local_dir = snapshot_download(
@@ -85,13 +192,301 @@ local_dir = snapshot_download(
85
  )
86
  ```
87
 
88
- ## Included Folders
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- - `multitask_intent_model_output/`
91
- - `iab_classifier_model_output/`
92
  - `artifacts/calibration/`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- ## Notes
95
 
96
- - Use the three folders above together for expected behavior.
97
- - If integrating in production, prefer long-lived API processes with preloaded models.
 
 
 
1
+ # Agentic Intent Classifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ `agentic-intent-classifier` is a multi-head query classification stack for conversational traffic.
4
 
5
+ It currently produces:
 
 
6
 
7
  - `intent.type`
8
  - `intent.subtype`
9
  - `intent.decision_phase`
10
  - `iab_content`
11
+ - calibrated confidence per head
12
+ - combined fallback / policy / opportunity decisions
13
+
14
+ The repo is beyond the original v0.1 baseline. It now includes:
15
+
16
+ - shared config and label ownership
17
+ - reusable model runtime
18
+ - calibrated confidence and threshold gating
19
+ - combined inference with fallback/policy logic
20
+ - request/response validation in the demo API
21
+ - repeatable evaluation and regression suites
22
+ - full-TSV IAB taxonomy retrieval support through tier4
23
+ - a local embedding index for taxonomy-node retrieval over IAB content paths
24
+ - a separate synthetic full-intent-taxonomy augmentation dataset for non-IAB heads
25
+ - a dedicated intent-type difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
26
+ - a dedicated decision-phase difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
27
+
28
+ Generated model weights are intentionally not committed.
29
+
30
+ ## Current Taxonomy
31
+
32
+ ### `intent.type`
33
+
34
+ - `informational`
35
+ - `exploratory`
36
+ - `commercial`
37
+ - `transactional`
38
+ - `support`
39
+ - `personal_reflection`
40
+ - `creative_generation`
41
+ - `chit_chat`
42
+ - `ambiguous`
43
+ - `prohibited`
44
+
45
+ ### `intent.decision_phase`
46
+
47
+ - `awareness`
48
+ - `research`
49
+ - `consideration`
50
+ - `decision`
51
+ - `action`
52
+ - `post_purchase`
53
+ - `support`
54
 
55
+ ### `intent.subtype`
56
 
57
+ - `education`
58
+ - `product_discovery`
59
+ - `comparison`
60
+ - `evaluation`
61
+ - `deal_seeking`
62
+ - `provider_selection`
63
+ - `signup`
64
+ - `purchase`
65
+ - `booking`
66
+ - `download`
67
+ - `contact_sales`
68
+ - `task_execution`
69
+ - `onboarding_setup`
70
+ - `troubleshooting`
71
+ - `account_help`
72
+ - `billing_help`
73
+ - `follow_up`
74
+ - `emotional_reflection`
75
 
76
+ ### `iab_content`
77
 
78
+ - candidates are derived from every row in [data/iab-content/Content Taxonomy 3.0.tsv](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab-content/Content%20Taxonomy%203.0.tsv)
79
+ - retrieval output supports `tier1`, `tier2`, `tier3`, and optional `tier4`
80
 
81
+ ## What The System Does
82
+
83
+ - runs three classifier heads:
84
+ - `intent_type`
85
+ - `intent_subtype`
86
+ - `decision_phase`
87
+ - resolves `iab_content` through a local embedding index over taxonomy nodes plus generic label/path reranking
88
+ - applies calibration artifacts when present
89
+ - computes `commercial_score`
90
+ - applies fallback when confidence is too weak or policy-safe blocking is required
91
+ - emits a schema-validated combined envelope
92
+
93
+ ## What The System Does Not Do
94
+
95
+ - it is not a multi-turn memory system
96
+ - it is not a production-optimized low-latency serving path
97
+ - it is not yet trained on large real-traffic human-labeled intent data
98
+ - combined decision logic is still heuristic, even though it is materially stronger than the original baseline
99
+
100
+ ## Project Layout
101
+
102
+ - [config.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/config.py): labels, thresholds, artifact paths, model paths
103
+ - [model_runtime.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/model_runtime.py): shared calibrated inference runtime
104
+ - [combined_inference.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/combined_inference.py): composed system response
105
+ - [inference_intent_type.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_intent_type.py): direct `intent_type` inference entrypoint
106
+ - [inference_iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_iab_classifier.py): direct supervised `iab_content` inference entrypoint
107
+ - [schemas.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/schemas.py): request/response validation
108
+ - [demo_api.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/demo_api.py): local validated API
109
+ - [iab_taxonomy.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_taxonomy.py): full taxonomy parser/index
110
+ - [iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_classifier.py): supervised IAB runtime with taxonomy-aware parent fallback
111
+ - [iab_retrieval.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_retrieval.py): optional shadow retrieval baseline
112
+ - [training/build_full_intent_taxonomy_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_full_intent_taxonomy_dataset.py): separate synthetic intent augmentation dataset
113
+ - [training/build_intent_type_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_intent_type_difficulty_dataset.py): extra `intent_type` augmentation plus held-out difficulty benchmark
114
+ - [training/build_decision_phase_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_decision_phase_difficulty_dataset.py): extra `decision_phase` augmentation plus held-out difficulty benchmark
115
+ - [training/build_subtype_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_difficulty_dataset.py): extra `intent_subtype` augmentation plus held-out difficulty benchmark
116
+ - [training/build_subtype_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_dataset.py): subtype dataset generation from existing corpora
117
+ - [training/train_iab.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/train_iab.py): train the supervised IAB classifier head
118
+ - [training/build_iab_taxonomy_embeddings.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_iab_taxonomy_embeddings.py): build local IAB node embedding artifacts
119
+ - [training/run_full_training_pipeline.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/run_full_training_pipeline.py): full multi-head training/calibration/eval pipeline
120
+ - [evaluation/run_evaluation.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_evaluation.py): repeatable benchmark runner
121
+ - [evaluation/run_regression_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_regression_suite.py): known-failure regression runner
122
+ - [evaluation/run_iab_mapping_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_mapping_suite.py): IAB behavior-lock regression runner
123
+ - [evaluation/run_iab_quality_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_quality_suite.py): curated IAB quality-target runner
124
+ - [known_limitations.md](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/known_limitations.md): current gaps and caveats
125
+
126
+ ## Quickstart: Run From Hugging Face
127
+
128
+ Download the trained bundle and run inference in three lines — no local training required.
129
 
130
  ```python
131
+ import sys
132
  from huggingface_hub import snapshot_download
133
 
134
+ # Download the full bundle (models + calibration + code)
135
  local_dir = snapshot_download(
136
  repo_id="admesh/agentic-intent-classifier",
137
  repo_type="model",
138
  )
139
+ sys.path.insert(0, local_dir)
140
+
141
+ # Import and instantiate
142
+ from pipeline import AdmeshIntentPipeline
143
+ clf = AdmeshIntentPipeline()
144
+
145
+ # Classify
146
+ import json
147
+ result = clf("Which laptop should I buy for college?")
148
+ print(json.dumps(result, indent=2))
149
  ```
150
 
151
+ Or use the one-liner factory method:
152
+
153
+ ```python
154
+ from pipeline import AdmeshIntentPipeline # after sys.path.insert above
155
+
156
+ clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
157
+ result = clf("I need a CRM for a 5-person startup")
158
  ```
159
 
160
+ Batch mode and custom thresholds are also supported:
161
 
162
+ ```python
163
+ # Batch
164
+ results = clf([
165
+ "Best running shoes under $100",
166
+ "How does gradient descent work?",
167
+ "Buy noise-cancelling headphones",
168
+ ])
169
+
170
+ # Custom confidence thresholds
171
+ result = clf(
172
+ "Buy noise-cancelling headphones",
173
+ threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
174
+ )
175
  ```
176
 
177
+ Verify artifacts and run a smoke test from the CLI:
178
+
179
  ```bash
180
+ cd "<local_dir>"
181
+ python3 training/pipeline_verify.py
182
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
183
  ```
184
 
185
+ Pin a specific revision for reproducibility:
186
 
187
  ```python
188
  local_dir = snapshot_download(
 
192
  )
193
  ```
194
 
195
+ ---
196
+
197
+ ## Setup (for local training)
198
+
199
+ ```bash
200
+ python3 -m venv .venv
201
+ source .venv/bin/activate
202
+ pip install -r agentic-intent-classifier/requirements.txt
203
+ ```
204
+
205
+ ## Inference (local training path)
206
+
207
+ Run one query locally:
208
+
209
+ ```bash
210
+ cd agentic-intent-classifier
211
+ python3 training/train_iab.py
212
+ python3 training/calibrate_confidence.py --head iab_content
213
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
214
+ ```
215
+
216
+ Run only the `intent_type` head:
217
+
218
+ ```bash
219
+ cd agentic-intent-classifier
220
+ python3 inference_intent_type.py "best shoes under 100"
221
+ ```
222
+
223
+ Run the demo API:
224
+
225
+ ```bash
226
+ cd agentic-intent-classifier
227
+ python3 demo_api.py
228
+ ```
229
+
230
+ Example request:
231
+
232
+ ```bash
233
+ curl -sS -X POST http://127.0.0.1:8008/classify \
234
+ -H 'Content-Type: application/json' \
235
+ -d '{"text":"I cannot log into my account"}'
236
+ ```
237
+
238
+ Infra endpoints:
239
+
240
+ ```bash
241
+ curl -sS http://127.0.0.1:8008/health
242
+ curl -sS http://127.0.0.1:8008/version
243
+ ```
244
+
245
+ Train only the IAB classifier head:
246
+
247
+ ```bash
248
+ cd agentic-intent-classifier
249
+ python3 training/train_iab.py
250
+ python3 training/calibrate_confidence.py --head iab_content
251
+ ```
252
+
253
+ The online `iab_content` path now uses the compact supervised classifier. Retrieval is still available as an optional shadow baseline.
254
+
255
+ Build the optional retrieval shadow index:
256
+
257
+ ```bash
258
+ cd agentic-intent-classifier
259
+ python3 training/build_iab_taxonomy_embeddings.py
260
+ ```
261
+
262
+ By default the shadow retrieval path uses `Alibaba-NLP/gte-Qwen2-1.5B-instruct`. The retrieval runtime applies the model's query-side instruction format and last-token pooling, matching the Hugging Face usage guidance. If you want to point retrieval at a different embedding model, set `IAB_RETRIEVAL_MODEL_NAME_OVERRIDE` before building the index.
263
+
264
+ Open-source users can swap in their own embedding model, but the contract is:
265
+
266
+ - query embeddings and taxonomy-node embeddings must be produced by the same model and model revision
267
+ - after changing models, you must rebuild `artifacts/iab/taxonomy_embeddings.pt`
268
+ - the repository only tests and supports the default model path out of the box
269
+ - not every Hugging Face embedding model is drop-in compatible with this runtime; some require custom pooling, query instructions, or `trust_remote_code`
270
+
271
+ Example override:
272
+
273
+ ```bash
274
+ cd agentic-intent-classifier
275
+ export IAB_RETRIEVAL_MODEL_NAME_OVERRIDE=mixedbread-ai/mxbai-embed-large-v1
276
+ python3 training/build_iab_taxonomy_embeddings.py
277
+ ```
278
+
279
+ This writes:
280
+
281
+ - `artifacts/iab/taxonomy_nodes.json`
282
+ - `artifacts/iab/taxonomy_embeddings.pt`
283
+
284
+ ## Training
285
+
286
+ ### Full local pipeline
287
+
288
+ ```bash
289
+ cd agentic-intent-classifier
290
+ python3 training/run_full_training_pipeline.py
291
+ ```
292
+
293
+ This pipeline now does:
294
+
295
+ 1. build separate full-intent-taxonomy augmentation data
296
+ 2. build separate `intent_type` difficulty augmentation + benchmark
297
+ 3. train `intent_type`
298
+ 4. build subtype corpus
299
+ 5. build separate `intent_subtype` difficulty augmentation + benchmark
300
+ 6. train `intent_subtype`
301
+ 7. build separate `decision_phase` difficulty augmentation + benchmark
302
+ 8. train `decision_phase`
303
+ 9. train `iab_content`
304
+ 10. calibrate all classifier heads, including `iab_content`
305
+ 11. run regression/evaluation unless `--skip-full-eval` is used
306
+
307
+ ### Build datasets individually
308
+
309
+ Separate full-intent augmentation:
310
+
311
+ ```bash
312
+ cd agentic-intent-classifier
313
+ python3 training/build_full_intent_taxonomy_dataset.py
314
+ ```
315
+
316
+ Intent-type difficulty augmentation and benchmark:
317
+
318
+ ```bash
319
+ cd agentic-intent-classifier
320
+ python3 training/build_intent_type_difficulty_dataset.py
321
+ ```
322
+
323
+ Decision-phase difficulty augmentation and benchmark:
324
+
325
+ ```bash
326
+ cd agentic-intent-classifier
327
+ python3 training/build_decision_phase_difficulty_dataset.py
328
+ ```
329
+
330
+ Subtype difficulty augmentation and benchmark:
331
+
332
+ ```bash
333
+ cd agentic-intent-classifier
334
+ python3 training/build_subtype_difficulty_dataset.py
335
+ ```
336
+
337
+ Subtype dataset:
338
+
339
+ ```bash
340
+ cd agentic-intent-classifier
341
+ python3 training/build_subtype_dataset.py
342
+ ```
343
+
344
+ IAB embedding index:
345
+
346
+ ```bash
347
+ cd agentic-intent-classifier
348
+ python3 training/build_iab_taxonomy_embeddings.py
349
+ ```
350
+
351
+ ### Train heads individually
352
+
353
+ ```bash
354
+ cd agentic-intent-classifier
355
+ python3 training/train.py
356
+ python3 training/train_subtype.py
357
+ python3 training/train_decision_phase.py
358
+ ```
359
+
360
+ ### Calibration
361
+
362
+ ```bash
363
+ cd agentic-intent-classifier
364
+ python3 training/calibrate_confidence.py --head intent_type
365
+ python3 training/calibrate_confidence.py --head intent_subtype
366
+ python3 training/calibrate_confidence.py --head decision_phase
367
+ ```
368
+
369
+ ## Evaluation
370
+
371
+ Full evaluation:
372
+
373
+ ```bash
374
+ cd agentic-intent-classifier
375
+ python3 evaluation/run_evaluation.py
376
+ ```
377
+
378
+ Known-failure regression:
379
+
380
+ ```bash
381
+ cd agentic-intent-classifier
382
+ python3 evaluation/run_regression_suite.py
383
+ ```
384
+
385
+ IAB behavior-lock regression:
386
+
387
+ ```bash
388
+ cd agentic-intent-classifier
389
+ python3 evaluation/run_iab_mapping_suite.py
390
+ ```
391
+
392
+ IAB quality-target evaluation:
393
+
394
+ ```bash
395
+ cd agentic-intent-classifier
396
+ python3 evaluation/run_iab_quality_suite.py
397
+ ```
398
+
399
+ Threshold sweeps:
400
+
401
+ ```bash
402
+ cd agentic-intent-classifier
403
+ python3 evaluation/sweep_intent_threshold.py
404
+ ```
405
+
406
+ Artifacts are written to:
407
 
 
 
408
  - `artifacts/calibration/`
409
+ - `artifacts/evaluation/latest/`
410
+
411
+ ## Google Colab
412
+
413
+ Use Colab for the full retraining pass if local memory is limited.
414
+
415
+ Clone once:
416
+
417
+ ```bash
418
+ %cd /content
419
+ !git clone https://github.com/GouniManikumar12/agentic-intent-classifier.git
420
+ %cd /content/agentic-intent-classifier
421
+ ```
422
+
423
+ If the repo is already cloned and you want the latest code, pull manually:
424
+
425
+ ```bash
426
+ !git pull origin main
427
+ ```
428
+
429
+ Full pipeline:
430
+
431
+ ```bash
432
+ !python training/run_full_training_pipeline.py
433
+ ```
434
+
435
+ If full evaluation is too heavy for the current Colab runtime:
436
+
437
+ ```bash
438
+ !python training/run_full_training_pipeline.py \
439
+ --iab-embedding-batch-size 32 \
440
+ --skip-full-eval
441
+ ```
442
+
443
+ Then run eval separately after training:
444
+
445
+ ```bash
446
+ !python evaluation/run_regression_suite.py
447
+ !python evaluation/run_iab_mapping_suite.py
448
+ !python evaluation/run_iab_quality_suite.py
449
+ !python evaluation/run_evaluation.py
450
+ ```
451
+
452
+ ## Current Saved Metrics
453
+
454
+ Generate fresh metrics with:
455
+
456
+ ```bash
457
+ cd agentic-intent-classifier
458
+ python3 evaluation/run_evaluation.py
459
+ ```
460
+
461
+ Do not treat any checked-in summary as canonical unless it was regenerated after the current code and artifacts were built. The IAB path is now retrieval-based, so older saved reports from the deleted hierarchy stack are not meaningful.
462
+
463
+ ## Latency Note
464
+
465
+ `combined_inference.py` is a debugging/offline path, not a production latency path.
466
+
467
+ Current production truth:
468
+
469
+ - per-request CLI execution is not a sub-50ms architecture
470
+ - production serving should use a long-lived API process with preloaded models
471
+ - if sub-50ms becomes a hard requirement, the serving path will need:
472
+ - persistent loaded models
473
+ - runtime optimization
474
+ - likely fewer model passes or a shared multi-head model
475
+
476
+ ## Current Status
477
+
478
+ Current repo status:
479
+
480
+ - full 10-class `intent.type` taxonomy is wired
481
+ - subtype and phase heads are present
482
+ - difficulty benchmarks are wired for `intent_type`, `intent_subtype`, and `decision_phase`
483
+ - full-TSV IAB taxonomy retrieval is wired through tier4
484
+ - separate full-intent augmentation dataset is in place
485
+ - evaluation/runtime memory handling is improved for large IAB splits
486
 
487
+ The main remaining gap is not basic infrastructure anymore. It is improving real-world robustness, especially for:
488
 
489
+ - `decision_phase`
490
+ - `intent_subtype`
491
+ - confidence quality on borderline commercial queries
492
+ - real-traffic supervision beyond synthetic data
artifacts/calibration/decision_phase.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
- "generated_at": "2026-03-25T05:10:14.092098+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
  "calibrated_accuracy": 0.8621,
8
- "calibrated_expected_calibration_error": 0.0877,
9
- "calibrated_negative_log_likelihood": 0.5315,
10
- "mean_calibrated_confidence": 0.866,
11
- "mean_raw_confidence": 0.8684,
12
  "raw_accuracy": 0.8621,
13
- "raw_expected_calibration_error": 0.087,
14
- "raw_negative_log_likelihood": 0.5317
15
  },
16
  "minimum_threshold_floor": 0.22,
17
- "optimized_temperature_candidate": 1.008347,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.8621,
20
  "coverage": 1.0,
@@ -22,7 +22,7 @@
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 1.008347,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.8621,
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
+ "generated_at": "2026-03-25T07:02:39.749873+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
  "calibrated_accuracy": 0.8621,
8
+ "calibrated_expected_calibration_error": 0.0915,
9
+ "calibrated_negative_log_likelihood": 0.5003,
10
+ "mean_calibrated_confidence": 0.8724,
11
+ "mean_raw_confidence": 0.8716,
12
  "raw_accuracy": 0.8621,
13
+ "raw_expected_calibration_error": 0.0911,
14
+ "raw_negative_log_likelihood": 0.5003
15
  },
16
  "minimum_threshold_floor": 0.22,
17
+ "optimized_temperature_candidate": 0.997346,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.8621,
20
  "coverage": 1.0,
 
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 0.997346,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.8621,
artifacts/calibration/iab_content.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
- "generated_at": "2026-03-25T05:12:02.550364+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
- "calibrated_accuracy": 0.9442,
8
- "calibrated_expected_calibration_error": 0.2773,
9
- "calibrated_negative_log_likelihood": 0.5519,
10
- "mean_calibrated_confidence": 0.6669,
11
- "mean_raw_confidence": 0.2286,
12
- "raw_accuracy": 0.9442,
13
- "raw_expected_calibration_error": 0.7157,
14
- "raw_negative_log_likelihood": 1.6567
15
  },
16
  "minimum_threshold_floor": 0.12,
17
- "optimized_temperature_candidate": 0.607335,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.9442,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
- "temperature": 0.607335,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.9478,
29
- "coverage": 0.9915,
30
  "threshold": 0.12
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
+ "generated_at": "2026-03-25T07:04:35.676097+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.9159,
8
+ "calibrated_expected_calibration_error": 0.2475,
9
+ "calibrated_negative_log_likelihood": 0.5736,
10
+ "mean_calibrated_confidence": 0.6684,
11
+ "mean_raw_confidence": 0.1932,
12
+ "raw_accuracy": 0.9159,
13
+ "raw_expected_calibration_error": 0.7227,
14
+ "raw_negative_log_likelihood": 1.8448
15
  },
16
  "minimum_threshold_floor": 0.12,
17
+ "optimized_temperature_candidate": 0.562804,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.9159,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
+ "temperature": 0.562804,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.921,
29
+ "coverage": 0.9878,
30
  "threshold": 0.12
31
  }
32
  }
artifacts/calibration/intent_subtype.json CHANGED
@@ -1,31 +1,31 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
- "generated_at": "2026-03-25T05:09:58.809351+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
- "calibrated_accuracy": 0.875,
8
- "calibrated_expected_calibration_error": 0.0778,
9
- "calibrated_negative_log_likelihood": 0.4165,
10
- "mean_calibrated_confidence": 0.8225,
11
- "mean_raw_confidence": 0.7521,
12
- "raw_accuracy": 0.875,
13
- "raw_expected_calibration_error": 0.1475,
14
- "raw_negative_log_likelihood": 0.4843
15
  },
16
  "minimum_threshold_floor": 0.25,
17
- "optimized_temperature_candidate": 0.834211,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.875,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 0.834211,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.875,
29
  "coverage": 1.0,
30
  "threshold": 0.25
31
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
+ "generated_at": "2026-03-25T07:02:24.141670+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.9,
8
+ "calibrated_expected_calibration_error": 0.1181,
9
+ "calibrated_negative_log_likelihood": 0.4376,
10
+ "mean_calibrated_confidence": 0.8188,
11
+ "mean_raw_confidence": 0.7458,
12
+ "raw_accuracy": 0.9,
13
+ "raw_expected_calibration_error": 0.1548,
14
+ "raw_negative_log_likelihood": 0.5071
15
  },
16
  "minimum_threshold_floor": 0.25,
17
+ "optimized_temperature_candidate": 0.831169,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.9,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 0.831169,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.9,
29
  "coverage": 1.0,
30
  "threshold": 0.25
31
  }
artifacts/calibration/intent_type.json CHANGED
@@ -1,31 +1,31 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
- "generated_at": "2026-03-25T05:09:42.900721+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
- "calibrated_accuracy": 0.9362,
8
- "calibrated_expected_calibration_error": 0.0424,
9
- "calibrated_negative_log_likelihood": 0.3117,
10
- "mean_calibrated_confidence": 0.8993,
11
- "mean_raw_confidence": 0.8741,
12
- "raw_accuracy": 0.9362,
13
- "raw_expected_calibration_error": 0.0788,
14
- "raw_negative_log_likelihood": 0.3262
15
  },
16
  "minimum_threshold_floor": 0.4,
17
- "optimized_temperature_candidate": 0.916196,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.9362,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
- "temperature": 0.916196,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.9362,
29
  "coverage": 1.0,
30
  "threshold": 0.4
31
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
+ "generated_at": "2026-03-25T07:02:07.798259+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.9149,
8
+ "calibrated_expected_calibration_error": 0.061,
9
+ "calibrated_negative_log_likelihood": 0.3056,
10
+ "mean_calibrated_confidence": 0.9173,
11
+ "mean_raw_confidence": 0.8989,
12
+ "raw_accuracy": 0.9149,
13
+ "raw_expected_calibration_error": 0.0532,
14
+ "raw_negative_log_likelihood": 0.314
15
  },
16
  "minimum_threshold_floor": 0.4,
17
+ "optimized_temperature_candidate": 0.93857,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.9149,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
+ "temperature": 0.93857,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.9149,
29
  "coverage": 1.0,
30
  "threshold": 0.4
31
  }
artifacts/evaluation/latest/combined_demo_benchmark.json CHANGED
@@ -5,26 +5,19 @@
5
  "response": {
6
  "meta": {
7
  "calibration_enabled": true,
 
8
  "system_version": "0.6.0-phase4"
9
  },
10
  "model_output": {
11
  "classification": {
12
  "iab_content": {
13
- "mapping_confidence": 0.9035,
14
- "mapping_mode": "nearest_equivalent",
15
  "taxonomy": "IAB Content Taxonomy",
16
  "taxonomy_version": "3.0",
17
  "tier1": {
18
  "id": "596",
19
  "label": "Technology & Computing"
20
- },
21
- "tier2": {
22
- "id": "599",
23
- "label": "Computing"
24
- },
25
- "tier3": {
26
- "id": "602",
27
- "label": "Software and Applications"
28
  }
29
  },
30
  "intent": {
@@ -32,31 +25,31 @@
32
  "component_confidence": {
33
  "decision_phase": {
34
  "calibrated": true,
35
- "confidence": 0.9947,
36
  "confidence_threshold": 0.22,
37
  "label": "awareness",
38
  "meets_threshold": true,
39
- "raw_confidence": 0.9788
40
  },
41
  "intent_subtype": {
42
  "calibrated": true,
43
- "confidence": 0.9547,
44
  "confidence_threshold": 0.25,
45
  "label": "education",
46
  "meets_threshold": true,
47
- "raw_confidence": 0.9547
48
  },
49
  "intent_type": {
50
  "calibrated": true,
51
- "confidence": 0.9972,
52
  "confidence_threshold": 0.4,
53
  "label": "informational",
54
  "meets_threshold": true,
55
- "raw_confidence": 0.9662
56
  },
57
  "overall_strategy": "min_required_component_confidence"
58
  },
59
- "confidence": 0.9947,
60
  "decision_phase": "awareness",
61
  "subtype": "education",
62
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -95,18 +88,19 @@
95
  "response": {
96
  "meta": {
97
  "calibration_enabled": true,
 
98
  "system_version": "0.6.0-phase4"
99
  },
100
  "model_output": {
101
  "classification": {
102
  "iab_content": {
103
- "mapping_confidence": 0.8427,
104
- "mapping_mode": "nearest_equivalent",
105
  "taxonomy": "IAB Content Taxonomy",
106
  "taxonomy_version": "3.0",
107
  "tier1": {
108
- "id": "1",
109
- "label": "Automotive"
110
  }
111
  },
112
  "intent": {
@@ -114,31 +108,31 @@
114
  "component_confidence": {
115
  "decision_phase": {
116
  "calibrated": true,
117
- "confidence": 0.9944,
118
  "confidence_threshold": 0.22,
119
  "label": "awareness",
120
  "meets_threshold": true,
121
- "raw_confidence": 0.9779
122
  },
123
  "intent_subtype": {
124
  "calibrated": true,
125
- "confidence": 0.955,
126
  "confidence_threshold": 0.25,
127
  "label": "education",
128
  "meets_threshold": true,
129
- "raw_confidence": 0.955
130
  },
131
  "intent_type": {
132
  "calibrated": true,
133
- "confidence": 0.9969,
134
  "confidence_threshold": 0.4,
135
  "label": "informational",
136
  "meets_threshold": true,
137
- "raw_confidence": 0.9637
138
  },
139
  "overall_strategy": "min_required_component_confidence"
140
  },
141
- "confidence": 0.9944,
142
  "decision_phase": "awareness",
143
  "subtype": "education",
144
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -177,53 +171,54 @@
177
  "response": {
178
  "meta": {
179
  "calibration_enabled": true,
 
180
  "system_version": "0.6.0-phase4"
181
  },
182
  "model_output": {
183
  "classification": {
184
  "iab_content": {
185
- "mapping_confidence": 0.7798,
186
  "mapping_mode": "nearest_equivalent",
187
  "taxonomy": "IAB Content Taxonomy",
188
  "taxonomy_version": "3.0",
189
  "tier1": {
190
- "id": "483",
191
- "label": "Sports"
192
  }
193
  },
194
  "intent": {
195
- "commercial_score": 0.656,
196
  "component_confidence": {
197
  "decision_phase": {
198
  "calibrated": true,
199
- "confidence": 0.9965,
200
  "confidence_threshold": 0.22,
201
  "label": "consideration",
202
  "meets_threshold": true,
203
- "raw_confidence": 0.9846
204
  },
205
  "intent_subtype": {
206
  "calibrated": true,
207
- "confidence": 0.4682,
208
  "confidence_threshold": 0.25,
209
- "label": "product_discovery",
210
  "meets_threshold": true,
211
- "raw_confidence": 0.4682
212
  },
213
  "intent_type": {
214
  "calibrated": true,
215
- "confidence": 0.9995,
216
  "confidence_threshold": 0.4,
217
  "label": "commercial",
218
  "meets_threshold": true,
219
- "raw_confidence": 0.9895
220
  },
221
  "overall_strategy": "min_required_component_confidence"
222
  },
223
- "confidence": 0.4682,
224
  "decision_phase": "consideration",
225
- "subtype": "product_discovery",
226
- "summary": "Classified as commercial intent with subtype product_discovery in the consideration phase.",
227
  "type": "commercial"
228
  }
229
  },
@@ -234,8 +229,8 @@
234
  "consideration"
235
  ],
236
  "opportunity": {
237
- "strength": "medium",
238
- "type": "soft_recommendation"
239
  },
240
  "policy": {
241
  "applied_thresholds": {
@@ -245,7 +240,7 @@
245
  "intent_type_confidence_min": 0.4
246
  },
247
  "decision_basis": "score_threshold",
248
- "eligibility_reason": "commercial_discovery_signal_present",
249
  "monetization_eligibility": "allowed_with_caution",
250
  "regulated_vertical": false,
251
  "sensitivity": "low"
@@ -259,26 +254,19 @@
259
  "response": {
260
  "meta": {
261
  "calibration_enabled": true,
 
262
  "system_version": "0.6.0-phase4"
263
  },
264
  "model_output": {
265
  "classification": {
266
  "iab_content": {
267
- "mapping_confidence": 0.8606,
268
  "mapping_mode": "nearest_equivalent",
269
  "taxonomy": "IAB Content Taxonomy",
270
  "taxonomy_version": "3.0",
271
  "tier1": {
272
- "id": "596",
273
- "label": "Technology & Computing"
274
- },
275
- "tier2": {
276
- "id": "599",
277
- "label": "Computing"
278
- },
279
- "tier3": {
280
- "id": "619",
281
- "label": "Internet"
282
  }
283
  },
284
  "intent": {
@@ -286,31 +274,31 @@
286
  "component_confidence": {
287
  "decision_phase": {
288
  "calibrated": true,
289
- "confidence": 0.9964,
290
  "confidence_threshold": 0.22,
291
  "label": "consideration",
292
  "meets_threshold": true,
293
- "raw_confidence": 0.9842
294
  },
295
  "intent_subtype": {
296
  "calibrated": true,
297
- "confidence": 0.9449,
298
  "confidence_threshold": 0.25,
299
  "label": "comparison",
300
  "meets_threshold": true,
301
- "raw_confidence": 0.9449
302
  },
303
  "intent_type": {
304
  "calibrated": true,
305
- "confidence": 0.9995,
306
  "confidence_threshold": 0.4,
307
  "label": "commercial",
308
  "meets_threshold": true,
309
- "raw_confidence": 0.9892
310
  },
311
  "overall_strategy": "min_required_component_confidence"
312
  },
313
- "confidence": 0.9449,
314
  "decision_phase": "consideration",
315
  "subtype": "comparison",
316
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -349,22 +337,19 @@
349
  "response": {
350
  "meta": {
351
  "calibration_enabled": true,
 
352
  "system_version": "0.6.0-phase4"
353
  },
354
  "model_output": {
355
  "classification": {
356
  "iab_content": {
357
- "mapping_confidence": 0.8737,
358
  "mapping_mode": "nearest_equivalent",
359
  "taxonomy": "IAB Content Taxonomy",
360
  "taxonomy_version": "3.0",
361
  "tier1": {
362
- "id": "52",
363
- "label": "Business and Finance"
364
- },
365
- "tier2": {
366
- "id": "53",
367
- "label": "Business"
368
  }
369
  },
370
  "intent": {
@@ -372,31 +357,31 @@
372
  "component_confidence": {
373
  "decision_phase": {
374
  "calibrated": true,
375
- "confidence": 0.963,
376
  "confidence_threshold": 0.22,
377
  "label": "decision",
378
  "meets_threshold": true,
379
- "raw_confidence": 0.9122
380
  },
381
  "intent_subtype": {
382
  "calibrated": true,
383
- "confidence": 0.9119,
384
  "confidence_threshold": 0.25,
385
  "label": "provider_selection",
386
  "meets_threshold": true,
387
- "raw_confidence": 0.9119
388
  },
389
  "intent_type": {
390
  "calibrated": true,
391
- "confidence": 0.9994,
392
  "confidence_threshold": 0.4,
393
  "label": "commercial",
394
  "meets_threshold": true,
395
- "raw_confidence": 0.9874
396
  },
397
  "overall_strategy": "min_required_component_confidence"
398
  },
399
- "confidence": 0.9119,
400
  "decision_phase": "decision",
401
  "subtype": "provider_selection",
402
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
@@ -435,26 +420,19 @@
435
  "response": {
436
  "meta": {
437
  "calibration_enabled": true,
 
438
  "system_version": "0.6.0-phase4"
439
  },
440
  "model_output": {
441
  "classification": {
442
  "iab_content": {
443
- "mapping_confidence": 0.7133,
444
  "mapping_mode": "nearest_equivalent",
445
  "taxonomy": "IAB Content Taxonomy",
446
  "taxonomy_version": "3.0",
447
  "tier1": {
448
- "id": "239",
449
- "label": "Hobbies & Interests"
450
- },
451
- "tier2": {
452
- "id": "264",
453
- "label": "Content Production"
454
- },
455
- "tier3": {
456
- "id": "266",
457
- "label": "Freelance Writing"
458
  }
459
  },
460
  "intent": {
@@ -462,31 +440,31 @@
462
  "component_confidence": {
463
  "decision_phase": {
464
  "calibrated": true,
465
- "confidence": 0.9991,
466
  "confidence_threshold": 0.22,
467
  "label": "action",
468
  "meets_threshold": true,
469
- "raw_confidence": 0.9947
470
  },
471
  "intent_subtype": {
472
  "calibrated": true,
473
- "confidence": 0.9382,
474
  "confidence_threshold": 0.25,
475
  "label": "signup",
476
  "meets_threshold": true,
477
- "raw_confidence": 0.9382
478
  },
479
  "intent_type": {
480
  "calibrated": true,
481
- "confidence": 0.9996,
482
  "confidence_threshold": 0.4,
483
  "label": "transactional",
484
  "meets_threshold": true,
485
- "raw_confidence": 0.9902
486
  },
487
  "overall_strategy": "min_required_component_confidence"
488
  },
489
- "confidence": 0.9382,
490
  "decision_phase": "action",
491
  "subtype": "signup",
492
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -525,13 +503,14 @@
525
  "response": {
526
  "meta": {
527
  "calibration_enabled": true,
 
528
  "system_version": "0.6.0-phase4"
529
  },
530
  "model_output": {
531
  "classification": {
532
  "iab_content": {
533
- "mapping_confidence": 0.7997,
534
- "mapping_mode": "nearest_equivalent",
535
  "taxonomy": "IAB Content Taxonomy",
536
  "taxonomy_version": "3.0",
537
  "tier1": {
@@ -548,31 +527,31 @@
548
  "component_confidence": {
549
  "decision_phase": {
550
  "calibrated": true,
551
- "confidence": 0.999,
552
  "confidence_threshold": 0.22,
553
  "label": "action",
554
  "meets_threshold": true,
555
- "raw_confidence": 0.9945
556
  },
557
  "intent_subtype": {
558
  "calibrated": true,
559
- "confidence": 0.8724,
560
  "confidence_threshold": 0.25,
561
  "label": "booking",
562
  "meets_threshold": true,
563
- "raw_confidence": 0.8724
564
  },
565
  "intent_type": {
566
  "calibrated": true,
567
- "confidence": 0.9996,
568
  "confidence_threshold": 0.4,
569
  "label": "transactional",
570
  "meets_threshold": true,
571
- "raw_confidence": 0.9901
572
  },
573
  "overall_strategy": "min_required_component_confidence"
574
  },
575
- "confidence": 0.8724,
576
  "decision_phase": "action",
577
  "subtype": "booking",
578
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
@@ -611,30 +590,19 @@
611
  "response": {
612
  "meta": {
613
  "calibration_enabled": true,
 
614
  "system_version": "0.6.0-phase4"
615
  },
616
  "model_output": {
617
  "classification": {
618
  "iab_content": {
619
- "mapping_confidence": 0.8423,
620
  "mapping_mode": "nearest_equivalent",
621
  "taxonomy": "IAB Content Taxonomy",
622
  "taxonomy_version": "3.0",
623
  "tier1": {
624
  "id": "596",
625
  "label": "Technology & Computing"
626
- },
627
- "tier2": {
628
- "id": "599",
629
- "label": "Computing"
630
- },
631
- "tier3": {
632
- "id": "619",
633
- "label": "Internet"
634
- },
635
- "tier4": {
636
- "id": "620",
637
- "label": "Cloud Computing"
638
  }
639
  },
640
  "intent": {
@@ -642,31 +610,31 @@
642
  "component_confidence": {
643
  "decision_phase": {
644
  "calibrated": true,
645
- "confidence": 0.9736,
646
  "confidence_threshold": 0.22,
647
  "label": "post_purchase",
648
  "meets_threshold": true,
649
- "raw_confidence": 0.9264
650
  },
651
  "intent_subtype": {
652
  "calibrated": true,
653
- "confidence": 0.921,
654
  "confidence_threshold": 0.25,
655
  "label": "onboarding_setup",
656
  "meets_threshold": true,
657
- "raw_confidence": 0.921
658
  },
659
  "intent_type": {
660
  "calibrated": true,
661
- "confidence": 0.9935,
662
  "confidence_threshold": 0.4,
663
  "label": "transactional",
664
  "meets_threshold": true,
665
- "raw_confidence": 0.9448
666
  },
667
  "overall_strategy": "min_required_component_confidence"
668
  },
669
- "confidence": 0.9736,
670
  "decision_phase": "post_purchase",
671
  "subtype": "onboarding_setup",
672
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
@@ -705,26 +673,19 @@
705
  "response": {
706
  "meta": {
707
  "calibration_enabled": true,
 
708
  "system_version": "0.6.0-phase4"
709
  },
710
  "model_output": {
711
  "classification": {
712
  "iab_content": {
713
- "mapping_confidence": 0.8039,
714
  "mapping_mode": "nearest_equivalent",
715
  "taxonomy": "IAB Content Taxonomy",
716
  "taxonomy_version": "3.0",
717
  "tier1": {
718
- "id": "596",
719
- "label": "Technology & Computing"
720
- },
721
- "tier2": {
722
- "id": "599",
723
- "label": "Computing"
724
- },
725
- "tier3": {
726
- "id": "619",
727
- "label": "Internet"
728
  }
729
  },
730
  "intent": {
@@ -732,31 +693,31 @@
732
  "component_confidence": {
733
  "decision_phase": {
734
  "calibrated": true,
735
- "confidence": 0.9969,
736
  "confidence_threshold": 0.22,
737
  "label": "support",
738
  "meets_threshold": true,
739
- "raw_confidence": 0.9863
740
  },
741
  "intent_subtype": {
742
  "calibrated": true,
743
- "confidence": 0.923,
744
  "confidence_threshold": 0.25,
745
  "label": "account_help",
746
  "meets_threshold": true,
747
- "raw_confidence": 0.923
748
  },
749
  "intent_type": {
750
  "calibrated": true,
751
- "confidence": 0.9988,
752
  "confidence_threshold": 0.4,
753
  "label": "support",
754
  "meets_threshold": true,
755
- "raw_confidence": 0.9811
756
  },
757
  "overall_strategy": "min_required_component_confidence"
758
  },
759
- "confidence": 0.923,
760
  "decision_phase": "support",
761
  "subtype": "account_help",
762
  "summary": "Classified as support intent with subtype account_help in the support phase.",
@@ -801,22 +762,19 @@
801
  "response": {
802
  "meta": {
803
  "calibration_enabled": true,
 
804
  "system_version": "0.6.0-phase4"
805
  },
806
  "model_output": {
807
  "classification": {
808
  "iab_content": {
809
- "mapping_confidence": 0.7854,
810
- "mapping_mode": "nearest_equivalent",
811
  "taxonomy": "IAB Content Taxonomy",
812
  "taxonomy_version": "3.0",
813
  "tier1": {
814
- "id": "286",
815
- "label": "Medical Health"
816
- },
817
- "tier2": {
818
- "id": "287",
819
- "label": "Diseases and Conditions"
820
  }
821
  },
822
  "intent": {
@@ -824,31 +782,31 @@
824
  "component_confidence": {
825
  "decision_phase": {
826
  "calibrated": true,
827
- "confidence": 0.9699,
828
  "confidence_threshold": 0.22,
829
  "label": "awareness",
830
  "meets_threshold": true,
831
- "raw_confidence": 0.9258
832
  },
833
  "intent_subtype": {
834
  "calibrated": true,
835
- "confidence": 0.9435,
836
  "confidence_threshold": 0.25,
837
  "label": "emotional_reflection",
838
  "meets_threshold": true,
839
- "raw_confidence": 0.9435
840
  },
841
  "intent_type": {
842
  "calibrated": true,
843
- "confidence": 0.9916,
844
  "confidence_threshold": 0.4,
845
  "label": "personal_reflection",
846
  "meets_threshold": true,
847
- "raw_confidence": 0.9406
848
  },
849
  "overall_strategy": "min_required_component_confidence"
850
  },
851
- "confidence": 0.9435,
852
  "decision_phase": "awareness",
853
  "subtype": "emotional_reflection",
854
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
@@ -893,18 +851,19 @@
893
  "response": {
894
  "meta": {
895
  "calibration_enabled": true,
 
896
  "system_version": "0.6.0-phase4"
897
  },
898
  "model_output": {
899
  "classification": {
900
  "iab_content": {
901
- "mapping_confidence": 0.7304,
902
  "mapping_mode": "nearest_equivalent",
903
  "taxonomy": "IAB Content Taxonomy",
904
  "taxonomy_version": "3.0",
905
  "tier1": {
906
- "id": "SPSHQ5",
907
- "label": "Genres"
908
  }
909
  },
910
  "intent": {
@@ -912,31 +871,31 @@
912
  "component_confidence": {
913
  "decision_phase": {
914
  "calibrated": true,
915
- "confidence": 0.9934,
916
  "confidence_threshold": 0.22,
917
  "label": "research",
918
  "meets_threshold": true,
919
- "raw_confidence": 0.9746
920
  },
921
  "intent_subtype": {
922
  "calibrated": true,
923
- "confidence": 0.9631,
924
  "confidence_threshold": 0.25,
925
  "label": "follow_up",
926
  "meets_threshold": true,
927
- "raw_confidence": 0.9631
928
  },
929
  "intent_type": {
930
  "calibrated": true,
931
- "confidence": 0.9934,
932
  "confidence_threshold": 0.4,
933
  "label": "ambiguous",
934
  "meets_threshold": true,
935
- "raw_confidence": 0.9405
936
  },
937
  "overall_strategy": "min_required_component_confidence"
938
  },
939
- "confidence": 0.9934,
940
  "decision_phase": "research",
941
  "subtype": "follow_up",
942
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -981,22 +940,19 @@
981
  "response": {
982
  "meta": {
983
  "calibration_enabled": true,
 
984
  "system_version": "0.6.0-phase4"
985
  },
986
  "model_output": {
987
  "classification": {
988
  "iab_content": {
989
- "mapping_confidence": 0.7779,
990
  "mapping_mode": "nearest_equivalent",
991
  "taxonomy": "IAB Content Taxonomy",
992
  "taxonomy_version": "3.0",
993
  "tier1": {
994
- "id": "52",
995
- "label": "Business and Finance"
996
- },
997
- "tier2": {
998
- "id": "53",
999
- "label": "Business"
1000
  }
1001
  },
1002
  "intent": {
@@ -1004,31 +960,31 @@
1004
  "component_confidence": {
1005
  "decision_phase": {
1006
  "calibrated": true,
1007
- "confidence": 0.9888,
1008
  "confidence_threshold": 0.22,
1009
  "label": "research",
1010
  "meets_threshold": true,
1011
- "raw_confidence": 0.9639
1012
  },
1013
  "intent_subtype": {
1014
  "calibrated": true,
1015
- "confidence": 0.9487,
1016
  "confidence_threshold": 0.25,
1017
  "label": "follow_up",
1018
  "meets_threshold": true,
1019
- "raw_confidence": 0.9487
1020
  },
1021
  "intent_type": {
1022
  "calibrated": true,
1023
- "confidence": 0.9916,
1024
  "confidence_threshold": 0.4,
1025
  "label": "ambiguous",
1026
  "meets_threshold": true,
1027
- "raw_confidence": 0.9321
1028
  },
1029
  "overall_strategy": "min_required_component_confidence"
1030
  },
1031
- "confidence": 0.9888,
1032
  "decision_phase": "research",
1033
  "subtype": "follow_up",
1034
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -1073,30 +1029,19 @@
1073
  "response": {
1074
  "meta": {
1075
  "calibration_enabled": true,
 
1076
  "system_version": "0.6.0-phase4"
1077
  },
1078
  "model_output": {
1079
  "classification": {
1080
  "iab_content": {
1081
- "mapping_confidence": 0.7753,
1082
  "mapping_mode": "nearest_equivalent",
1083
  "taxonomy": "IAB Content Taxonomy",
1084
  "taxonomy_version": "3.0",
1085
  "tier1": {
1086
- "id": "596",
1087
- "label": "Technology & Computing"
1088
- },
1089
- "tier2": {
1090
- "id": "599",
1091
- "label": "Computing"
1092
- },
1093
- "tier3": {
1094
- "id": "619",
1095
- "label": "Internet"
1096
- },
1097
- "tier4": {
1098
- "id": "623",
1099
- "label": "Email"
1100
  }
1101
  },
1102
  "intent": {
@@ -1104,31 +1049,31 @@
1104
  "component_confidence": {
1105
  "decision_phase": {
1106
  "calibrated": true,
1107
- "confidence": 0.9991,
1108
  "confidence_threshold": 0.22,
1109
  "label": "action",
1110
  "meets_threshold": true,
1111
- "raw_confidence": 0.9948
1112
  },
1113
  "intent_subtype": {
1114
  "calibrated": true,
1115
- "confidence": 0.8874,
1116
  "confidence_threshold": 0.25,
1117
  "label": "signup",
1118
  "meets_threshold": true,
1119
- "raw_confidence": 0.8874
1120
  },
1121
  "intent_type": {
1122
  "calibrated": true,
1123
- "confidence": 0.9996,
1124
  "confidence_threshold": 0.4,
1125
  "label": "transactional",
1126
  "meets_threshold": true,
1127
- "raw_confidence": 0.9908
1128
  },
1129
  "overall_strategy": "min_required_component_confidence"
1130
  },
1131
- "confidence": 0.8874,
1132
  "decision_phase": "action",
1133
  "subtype": "signup",
1134
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -1167,30 +1112,19 @@
1167
  "response": {
1168
  "meta": {
1169
  "calibration_enabled": true,
 
1170
  "system_version": "0.6.0-phase4"
1171
  },
1172
  "model_output": {
1173
  "classification": {
1174
  "iab_content": {
1175
- "mapping_confidence": 0.8626,
1176
- "mapping_mode": "nearest_equivalent",
1177
  "taxonomy": "IAB Content Taxonomy",
1178
  "taxonomy_version": "3.0",
1179
  "tier1": {
1180
  "id": "596",
1181
  "label": "Technology & Computing"
1182
- },
1183
- "tier2": {
1184
- "id": "599",
1185
- "label": "Computing"
1186
- },
1187
- "tier3": {
1188
- "id": "619",
1189
- "label": "Internet"
1190
- },
1191
- "tier4": {
1192
- "id": "627",
1193
- "label": "Search"
1194
  }
1195
  },
1196
  "intent": {
@@ -1198,31 +1132,31 @@
1198
  "component_confidence": {
1199
  "decision_phase": {
1200
  "calibrated": true,
1201
- "confidence": 0.9966,
1202
  "confidence_threshold": 0.22,
1203
  "label": "consideration",
1204
  "meets_threshold": true,
1205
- "raw_confidence": 0.9852
1206
  },
1207
  "intent_subtype": {
1208
  "calibrated": true,
1209
- "confidence": 0.9415,
1210
  "confidence_threshold": 0.25,
1211
  "label": "comparison",
1212
  "meets_threshold": true,
1213
- "raw_confidence": 0.9415
1214
  },
1215
  "intent_type": {
1216
  "calibrated": true,
1217
- "confidence": 0.9994,
1218
  "confidence_threshold": 0.4,
1219
  "label": "commercial",
1220
  "meets_threshold": true,
1221
- "raw_confidence": 0.9884
1222
  },
1223
  "overall_strategy": "min_required_component_confidence"
1224
  },
1225
- "confidence": 0.9415,
1226
  "decision_phase": "consideration",
1227
  "subtype": "comparison",
1228
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -1261,30 +1195,19 @@
1261
  "response": {
1262
  "meta": {
1263
  "calibration_enabled": true,
 
1264
  "system_version": "0.6.0-phase4"
1265
  },
1266
  "model_output": {
1267
  "classification": {
1268
  "iab_content": {
1269
- "mapping_confidence": 0.8741,
1270
- "mapping_mode": "nearest_equivalent",
1271
  "taxonomy": "IAB Content Taxonomy",
1272
  "taxonomy_version": "3.0",
1273
  "tier1": {
1274
  "id": "596",
1275
  "label": "Technology & Computing"
1276
- },
1277
- "tier2": {
1278
- "id": "599",
1279
- "label": "Computing"
1280
- },
1281
- "tier3": {
1282
- "id": "619",
1283
- "label": "Internet"
1284
- },
1285
- "tier4": {
1286
- "id": "620",
1287
- "label": "Cloud Computing"
1288
  }
1289
  },
1290
  "intent": {
@@ -1292,31 +1215,31 @@
1292
  "component_confidence": {
1293
  "decision_phase": {
1294
  "calibrated": true,
1295
- "confidence": 0.9939,
1296
  "confidence_threshold": 0.22,
1297
  "label": "awareness",
1298
  "meets_threshold": true,
1299
- "raw_confidence": 0.9764
1300
  },
1301
  "intent_subtype": {
1302
  "calibrated": true,
1303
- "confidence": 0.9545,
1304
  "confidence_threshold": 0.25,
1305
  "label": "education",
1306
  "meets_threshold": true,
1307
- "raw_confidence": 0.9545
1308
  },
1309
  "intent_type": {
1310
  "calibrated": true,
1311
- "confidence": 0.9964,
1312
  "confidence_threshold": 0.4,
1313
  "label": "informational",
1314
  "meets_threshold": true,
1315
- "raw_confidence": 0.961
1316
  },
1317
  "overall_strategy": "min_required_component_confidence"
1318
  },
1319
- "confidence": 0.9939,
1320
  "decision_phase": "awareness",
1321
  "subtype": "education",
1322
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
5
  "response": {
6
  "meta": {
7
  "calibration_enabled": true,
8
+ "iab_mapping_is_placeholder": false,
9
  "system_version": "0.6.0-phase4"
10
  },
11
  "model_output": {
12
  "classification": {
13
  "iab_content": {
14
+ "mapping_confidence": 0.2243,
15
+ "mapping_mode": "exact",
16
  "taxonomy": "IAB Content Taxonomy",
17
  "taxonomy_version": "3.0",
18
  "tier1": {
19
  "id": "596",
20
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
21
  }
22
  },
23
  "intent": {
 
25
  "component_confidence": {
26
  "decision_phase": {
27
  "calibrated": true,
28
+ "confidence": 0.9632,
29
  "confidence_threshold": 0.22,
30
  "label": "awareness",
31
  "meets_threshold": true,
32
+ "raw_confidence": 0.9627
33
  },
34
  "intent_subtype": {
35
  "calibrated": true,
36
+ "confidence": 0.9866,
37
  "confidence_threshold": 0.25,
38
  "label": "education",
39
  "meets_threshold": true,
40
+ "raw_confidence": 0.9572
41
  },
42
  "intent_type": {
43
  "calibrated": true,
44
+ "confidence": 0.9737,
45
  "confidence_threshold": 0.4,
46
  "label": "informational",
47
  "meets_threshold": true,
48
+ "raw_confidence": 0.9629
49
  },
50
  "overall_strategy": "min_required_component_confidence"
51
  },
52
+ "confidence": 0.9632,
53
  "decision_phase": "awareness",
54
  "subtype": "education",
55
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
88
  "response": {
89
  "meta": {
90
  "calibration_enabled": true,
91
+ "iab_mapping_is_placeholder": false,
92
  "system_version": "0.6.0-phase4"
93
  },
94
  "model_output": {
95
  "classification": {
96
  "iab_content": {
97
+ "mapping_confidence": 0.1254,
98
+ "mapping_mode": "exact",
99
  "taxonomy": "IAB Content Taxonomy",
100
  "taxonomy_version": "3.0",
101
  "tier1": {
102
+ "id": "596",
103
+ "label": "Technology & Computing"
104
  }
105
  },
106
  "intent": {
 
108
  "component_confidence": {
109
  "decision_phase": {
110
  "calibrated": true,
111
+ "confidence": 0.9477,
112
  "confidence_threshold": 0.22,
113
  "label": "awareness",
114
  "meets_threshold": true,
115
+ "raw_confidence": 0.9471
116
  },
117
  "intent_subtype": {
118
  "calibrated": true,
119
+ "confidence": 0.9851,
120
  "confidence_threshold": 0.25,
121
  "label": "education",
122
  "meets_threshold": true,
123
+ "raw_confidence": 0.9541
124
  },
125
  "intent_type": {
126
  "calibrated": true,
127
+ "confidence": 0.973,
128
  "confidence_threshold": 0.4,
129
  "label": "informational",
130
  "meets_threshold": true,
131
+ "raw_confidence": 0.9621
132
  },
133
  "overall_strategy": "min_required_component_confidence"
134
  },
135
+ "confidence": 0.9477,
136
  "decision_phase": "awareness",
137
  "subtype": "education",
138
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
171
  "response": {
172
  "meta": {
173
  "calibration_enabled": true,
174
+ "iab_mapping_is_placeholder": false,
175
  "system_version": "0.6.0-phase4"
176
  },
177
  "model_output": {
178
  "classification": {
179
  "iab_content": {
180
+ "mapping_confidence": 0.1886,
181
  "mapping_mode": "nearest_equivalent",
182
  "taxonomy": "IAB Content Taxonomy",
183
  "taxonomy_version": "3.0",
184
  "tier1": {
185
+ "id": "1",
186
+ "label": "Automotive"
187
  }
188
  },
189
  "intent": {
190
+ "commercial_score": 0.728,
191
  "component_confidence": {
192
  "decision_phase": {
193
  "calibrated": true,
194
+ "confidence": 0.9402,
195
  "confidence_threshold": 0.22,
196
  "label": "consideration",
197
  "meets_threshold": true,
198
+ "raw_confidence": 0.9395
199
  },
200
  "intent_subtype": {
201
  "calibrated": true,
202
+ "confidence": 0.518,
203
  "confidence_threshold": 0.25,
204
+ "label": "comparison",
205
  "meets_threshold": true,
206
+ "raw_confidence": 0.4557
207
  },
208
  "intent_type": {
209
  "calibrated": true,
210
+ "confidence": 0.9808,
211
  "confidence_threshold": 0.4,
212
  "label": "commercial",
213
  "meets_threshold": true,
214
+ "raw_confidence": 0.9724
215
  },
216
  "overall_strategy": "min_required_component_confidence"
217
  },
218
+ "confidence": 0.518,
219
  "decision_phase": "consideration",
220
+ "subtype": "comparison",
221
+ "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
222
  "type": "commercial"
223
  }
224
  },
 
229
  "consideration"
230
  ],
231
  "opportunity": {
232
+ "strength": "high",
233
+ "type": "comparison_slot"
234
  },
235
  "policy": {
236
  "applied_thresholds": {
 
240
  "intent_type_confidence_min": 0.4
241
  },
242
  "decision_basis": "score_threshold",
243
+ "eligibility_reason": "commercial_comparison_signal_present",
244
  "monetization_eligibility": "allowed_with_caution",
245
  "regulated_vertical": false,
246
  "sensitivity": "low"
 
254
  "response": {
255
  "meta": {
256
  "calibration_enabled": true,
257
+ "iab_mapping_is_placeholder": false,
258
  "system_version": "0.6.0-phase4"
259
  },
260
  "model_output": {
261
  "classification": {
262
  "iab_content": {
263
+ "mapping_confidence": 0.0941,
264
  "mapping_mode": "nearest_equivalent",
265
  "taxonomy": "IAB Content Taxonomy",
266
  "taxonomy_version": "3.0",
267
  "tier1": {
268
+ "id": "123",
269
+ "label": "Careers"
 
 
 
 
 
 
 
 
270
  }
271
  },
272
  "intent": {
 
274
  "component_confidence": {
275
  "decision_phase": {
276
  "calibrated": true,
277
+ "confidence": 0.9117,
278
  "confidence_threshold": 0.22,
279
  "label": "consideration",
280
  "meets_threshold": true,
281
+ "raw_confidence": 0.9108
282
  },
283
  "intent_subtype": {
284
  "calibrated": true,
285
+ "confidence": 0.9762,
286
  "confidence_threshold": 0.25,
287
  "label": "comparison",
288
  "meets_threshold": true,
289
+ "raw_confidence": 0.9343
290
  },
291
  "intent_type": {
292
  "calibrated": true,
293
+ "confidence": 0.9639,
294
  "confidence_threshold": 0.4,
295
  "label": "commercial",
296
  "meets_threshold": true,
297
+ "raw_confidence": 0.9503
298
  },
299
  "overall_strategy": "min_required_component_confidence"
300
  },
301
+ "confidence": 0.9117,
302
  "decision_phase": "consideration",
303
  "subtype": "comparison",
304
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
337
  "response": {
338
  "meta": {
339
  "calibration_enabled": true,
340
+ "iab_mapping_is_placeholder": false,
341
  "system_version": "0.6.0-phase4"
342
  },
343
  "model_output": {
344
  "classification": {
345
  "iab_content": {
346
+ "mapping_confidence": 0.411,
347
  "mapping_mode": "nearest_equivalent",
348
  "taxonomy": "IAB Content Taxonomy",
349
  "taxonomy_version": "3.0",
350
  "tier1": {
351
+ "id": "596",
352
+ "label": "Technology & Computing"
 
 
 
 
353
  }
354
  },
355
  "intent": {
 
357
  "component_confidence": {
358
  "decision_phase": {
359
  "calibrated": true,
360
+ "confidence": 0.6147,
361
  "confidence_threshold": 0.22,
362
  "label": "decision",
363
  "meets_threshold": true,
364
+ "raw_confidence": 0.614
365
  },
366
  "intent_subtype": {
367
  "calibrated": true,
368
+ "confidence": 0.758,
369
  "confidence_threshold": 0.25,
370
  "label": "provider_selection",
371
  "meets_threshold": true,
372
+ "raw_confidence": 0.6571
373
  },
374
  "intent_type": {
375
  "calibrated": true,
376
+ "confidence": 0.9801,
377
  "confidence_threshold": 0.4,
378
  "label": "commercial",
379
  "meets_threshold": true,
380
+ "raw_confidence": 0.9714
381
  },
382
  "overall_strategy": "min_required_component_confidence"
383
  },
384
+ "confidence": 0.6147,
385
  "decision_phase": "decision",
386
  "subtype": "provider_selection",
387
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
 
420
  "response": {
421
  "meta": {
422
  "calibration_enabled": true,
423
+ "iab_mapping_is_placeholder": false,
424
  "system_version": "0.6.0-phase4"
425
  },
426
  "model_output": {
427
  "classification": {
428
  "iab_content": {
429
+ "mapping_confidence": 0.1002,
430
  "mapping_mode": "nearest_equivalent",
431
  "taxonomy": "IAB Content Taxonomy",
432
  "taxonomy_version": "3.0",
433
  "tier1": {
434
+ "id": "v9i3On",
435
+ "label": "Sensitive Topics"
 
 
 
 
 
 
 
 
436
  }
437
  },
438
  "intent": {
 
440
  "component_confidence": {
441
  "decision_phase": {
442
  "calibrated": true,
443
+ "confidence": 0.9175,
444
  "confidence_threshold": 0.22,
445
  "label": "action",
446
  "meets_threshold": true,
447
+ "raw_confidence": 0.9167
448
  },
449
  "intent_subtype": {
450
  "calibrated": true,
451
+ "confidence": 0.9477,
452
  "confidence_threshold": 0.25,
453
  "label": "signup",
454
  "meets_threshold": true,
455
+ "raw_confidence": 0.8793
456
  },
457
  "intent_type": {
458
  "calibrated": true,
459
+ "confidence": 0.9518,
460
  "confidence_threshold": 0.4,
461
  "label": "transactional",
462
  "meets_threshold": true,
463
+ "raw_confidence": 0.9354
464
  },
465
  "overall_strategy": "min_required_component_confidence"
466
  },
467
+ "confidence": 0.9175,
468
  "decision_phase": "action",
469
  "subtype": "signup",
470
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
503
  "response": {
504
  "meta": {
505
  "calibration_enabled": true,
506
+ "iab_mapping_is_placeholder": false,
507
  "system_version": "0.6.0-phase4"
508
  },
509
  "model_output": {
510
  "classification": {
511
  "iab_content": {
512
+ "mapping_confidence": 0.3828,
513
+ "mapping_mode": "exact",
514
  "taxonomy": "IAB Content Taxonomy",
515
  "taxonomy_version": "3.0",
516
  "tier1": {
 
527
  "component_confidence": {
528
  "decision_phase": {
529
  "calibrated": true,
530
+ "confidence": 0.9432,
531
  "confidence_threshold": 0.22,
532
  "label": "action",
533
  "meets_threshold": true,
534
+ "raw_confidence": 0.9425
535
  },
536
  "intent_subtype": {
537
  "calibrated": true,
538
+ "confidence": 0.7947,
539
  "confidence_threshold": 0.25,
540
  "label": "booking",
541
  "meets_threshold": true,
542
+ "raw_confidence": 0.6973
543
  },
544
  "intent_type": {
545
  "calibrated": true,
546
+ "confidence": 0.9554,
547
  "confidence_threshold": 0.4,
548
  "label": "transactional",
549
  "meets_threshold": true,
550
+ "raw_confidence": 0.9398
551
  },
552
  "overall_strategy": "min_required_component_confidence"
553
  },
554
+ "confidence": 0.7947,
555
  "decision_phase": "action",
556
  "subtype": "booking",
557
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
 
590
  "response": {
591
  "meta": {
592
  "calibration_enabled": true,
593
+ "iab_mapping_is_placeholder": false,
594
  "system_version": "0.6.0-phase4"
595
  },
596
  "model_output": {
597
  "classification": {
598
  "iab_content": {
599
+ "mapping_confidence": 0.5835,
600
  "mapping_mode": "nearest_equivalent",
601
  "taxonomy": "IAB Content Taxonomy",
602
  "taxonomy_version": "3.0",
603
  "tier1": {
604
  "id": "596",
605
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
 
 
 
 
606
  }
607
  },
608
  "intent": {
 
610
  "component_confidence": {
611
  "decision_phase": {
612
  "calibrated": true,
613
+ "confidence": 0.9641,
614
  "confidence_threshold": 0.22,
615
  "label": "post_purchase",
616
  "meets_threshold": true,
617
+ "raw_confidence": 0.9637
618
  },
619
  "intent_subtype": {
620
  "calibrated": true,
621
+ "confidence": 0.9717,
622
  "confidence_threshold": 0.25,
623
  "label": "onboarding_setup",
624
  "meets_threshold": true,
625
+ "raw_confidence": 0.9232
626
  },
627
  "intent_type": {
628
  "calibrated": true,
629
+ "confidence": 0.4496,
630
  "confidence_threshold": 0.4,
631
  "label": "transactional",
632
  "meets_threshold": true,
633
+ "raw_confidence": 0.4228
634
  },
635
  "overall_strategy": "min_required_component_confidence"
636
  },
637
+ "confidence": 0.4496,
638
  "decision_phase": "post_purchase",
639
  "subtype": "onboarding_setup",
640
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
 
673
  "response": {
674
  "meta": {
675
  "calibration_enabled": true,
676
+ "iab_mapping_is_placeholder": false,
677
  "system_version": "0.6.0-phase4"
678
  },
679
  "model_output": {
680
  "classification": {
681
  "iab_content": {
682
+ "mapping_confidence": 0.3535,
683
  "mapping_mode": "nearest_equivalent",
684
  "taxonomy": "IAB Content Taxonomy",
685
  "taxonomy_version": "3.0",
686
  "tier1": {
687
+ "id": "391",
688
+ "label": "Personal Finance"
 
 
 
 
 
 
 
 
689
  }
690
  },
691
  "intent": {
 
693
  "component_confidence": {
694
  "decision_phase": {
695
  "calibrated": true,
696
+ "confidence": 0.953,
697
  "confidence_threshold": 0.22,
698
  "label": "support",
699
  "meets_threshold": true,
700
+ "raw_confidence": 0.9525
701
  },
702
  "intent_subtype": {
703
  "calibrated": true,
704
+ "confidence": 0.9154,
705
  "confidence_threshold": 0.25,
706
  "label": "account_help",
707
  "meets_threshold": true,
708
+ "raw_confidence": 0.8312
709
  },
710
  "intent_type": {
711
  "calibrated": true,
712
+ "confidence": 0.9602,
713
  "confidence_threshold": 0.4,
714
  "label": "support",
715
  "meets_threshold": true,
716
+ "raw_confidence": 0.946
717
  },
718
  "overall_strategy": "min_required_component_confidence"
719
  },
720
+ "confidence": 0.9154,
721
  "decision_phase": "support",
722
  "subtype": "account_help",
723
  "summary": "Classified as support intent with subtype account_help in the support phase.",
 
762
  "response": {
763
  "meta": {
764
  "calibration_enabled": true,
765
+ "iab_mapping_is_placeholder": false,
766
  "system_version": "0.6.0-phase4"
767
  },
768
  "model_output": {
769
  "classification": {
770
  "iab_content": {
771
+ "mapping_confidence": 0.1373,
772
+ "mapping_mode": "exact",
773
  "taxonomy": "IAB Content Taxonomy",
774
  "taxonomy_version": "3.0",
775
  "tier1": {
776
+ "id": "186",
777
+ "label": "Family and Relationships"
 
 
 
 
778
  }
779
  },
780
  "intent": {
 
782
  "component_confidence": {
783
  "decision_phase": {
784
  "calibrated": true,
785
+ "confidence": 0.9173,
786
  "confidence_threshold": 0.22,
787
  "label": "awareness",
788
  "meets_threshold": true,
789
+ "raw_confidence": 0.9165
790
  },
791
  "intent_subtype": {
792
  "calibrated": true,
793
+ "confidence": 0.9644,
794
  "confidence_threshold": 0.25,
795
  "label": "emotional_reflection",
796
  "meets_threshold": true,
797
+ "raw_confidence": 0.9072
798
  },
799
  "intent_type": {
800
  "calibrated": true,
801
+ "confidence": 0.96,
802
  "confidence_threshold": 0.4,
803
  "label": "personal_reflection",
804
  "meets_threshold": true,
805
+ "raw_confidence": 0.9459
806
  },
807
  "overall_strategy": "min_required_component_confidence"
808
  },
809
+ "confidence": 0.9173,
810
  "decision_phase": "awareness",
811
  "subtype": "emotional_reflection",
812
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
 
851
  "response": {
852
  "meta": {
853
  "calibration_enabled": true,
854
+ "iab_mapping_is_placeholder": false,
855
  "system_version": "0.6.0-phase4"
856
  },
857
  "model_output": {
858
  "classification": {
859
  "iab_content": {
860
+ "mapping_confidence": 0.0961,
861
  "mapping_mode": "nearest_equivalent",
862
  "taxonomy": "IAB Content Taxonomy",
863
  "taxonomy_version": "3.0",
864
  "tier1": {
865
+ "id": "v9i3On",
866
+ "label": "Sensitive Topics"
867
  }
868
  },
869
  "intent": {
 
871
  "component_confidence": {
872
  "decision_phase": {
873
  "calibrated": true,
874
+ "confidence": 0.8376,
875
  "confidence_threshold": 0.22,
876
  "label": "research",
877
  "meets_threshold": true,
878
+ "raw_confidence": 0.8363
879
  },
880
  "intent_subtype": {
881
  "calibrated": true,
882
+ "confidence": 0.9649,
883
  "confidence_threshold": 0.25,
884
  "label": "follow_up",
885
  "meets_threshold": true,
886
+ "raw_confidence": 0.9077
887
  },
888
  "intent_type": {
889
  "calibrated": true,
890
+ "confidence": 0.9456,
891
  "confidence_threshold": 0.4,
892
  "label": "ambiguous",
893
  "meets_threshold": true,
894
+ "raw_confidence": 0.9278
895
  },
896
  "overall_strategy": "min_required_component_confidence"
897
  },
898
+ "confidence": 0.8376,
899
  "decision_phase": "research",
900
  "subtype": "follow_up",
901
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
940
  "response": {
941
  "meta": {
942
  "calibration_enabled": true,
943
+ "iab_mapping_is_placeholder": false,
944
  "system_version": "0.6.0-phase4"
945
  },
946
  "model_output": {
947
  "classification": {
948
  "iab_content": {
949
+ "mapping_confidence": 0.1013,
950
  "mapping_mode": "nearest_equivalent",
951
  "taxonomy": "IAB Content Taxonomy",
952
  "taxonomy_version": "3.0",
953
  "tier1": {
954
+ "id": "473",
955
+ "label": "Shopping"
 
 
 
 
956
  }
957
  },
958
  "intent": {
 
960
  "component_confidence": {
961
  "decision_phase": {
962
  "calibrated": true,
963
+ "confidence": 0.9155,
964
  "confidence_threshold": 0.22,
965
  "label": "research",
966
  "meets_threshold": true,
967
+ "raw_confidence": 0.9146
968
  },
969
  "intent_subtype": {
970
  "calibrated": true,
971
+ "confidence": 0.9201,
972
  "confidence_threshold": 0.25,
973
  "label": "follow_up",
974
  "meets_threshold": true,
975
+ "raw_confidence": 0.8294
976
  },
977
  "intent_type": {
978
  "calibrated": true,
979
+ "confidence": 0.8933,
980
  "confidence_threshold": 0.4,
981
  "label": "ambiguous",
982
  "meets_threshold": true,
983
+ "raw_confidence": 0.8671
984
  },
985
  "overall_strategy": "min_required_component_confidence"
986
  },
987
+ "confidence": 0.8933,
988
  "decision_phase": "research",
989
  "subtype": "follow_up",
990
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
1029
  "response": {
1030
  "meta": {
1031
  "calibration_enabled": true,
1032
+ "iab_mapping_is_placeholder": false,
1033
  "system_version": "0.6.0-phase4"
1034
  },
1035
  "model_output": {
1036
  "classification": {
1037
  "iab_content": {
1038
+ "mapping_confidence": 0.0593,
1039
  "mapping_mode": "nearest_equivalent",
1040
  "taxonomy": "IAB Content Taxonomy",
1041
  "taxonomy_version": "3.0",
1042
  "tier1": {
1043
+ "id": "v9i3On",
1044
+ "label": "Sensitive Topics"
 
 
 
 
 
 
 
 
 
 
 
 
1045
  }
1046
  },
1047
  "intent": {
 
1049
  "component_confidence": {
1050
  "decision_phase": {
1051
  "calibrated": true,
1052
+ "confidence": 0.9532,
1053
  "confidence_threshold": 0.22,
1054
  "label": "action",
1055
  "meets_threshold": true,
1056
+ "raw_confidence": 0.9527
1057
  },
1058
  "intent_subtype": {
1059
  "calibrated": true,
1060
+ "confidence": 0.8947,
1061
  "confidence_threshold": 0.25,
1062
  "label": "signup",
1063
  "meets_threshold": true,
1064
+ "raw_confidence": 0.8015
1065
  },
1066
  "intent_type": {
1067
  "calibrated": true,
1068
+ "confidence": 0.9685,
1069
  "confidence_threshold": 0.4,
1070
  "label": "transactional",
1071
  "meets_threshold": true,
1072
+ "raw_confidence": 0.9565
1073
  },
1074
  "overall_strategy": "min_required_component_confidence"
1075
  },
1076
+ "confidence": 0.8947,
1077
  "decision_phase": "action",
1078
  "subtype": "signup",
1079
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
1112
  "response": {
1113
  "meta": {
1114
  "calibration_enabled": true,
1115
+ "iab_mapping_is_placeholder": false,
1116
  "system_version": "0.6.0-phase4"
1117
  },
1118
  "model_output": {
1119
  "classification": {
1120
  "iab_content": {
1121
+ "mapping_confidence": 0.1316,
1122
+ "mapping_mode": "exact",
1123
  "taxonomy": "IAB Content Taxonomy",
1124
  "taxonomy_version": "3.0",
1125
  "tier1": {
1126
  "id": "596",
1127
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
 
 
 
 
1128
  }
1129
  },
1130
  "intent": {
 
1132
  "component_confidence": {
1133
  "decision_phase": {
1134
  "calibrated": true,
1135
+ "confidence": 0.9582,
1136
  "confidence_threshold": 0.22,
1137
  "label": "consideration",
1138
  "meets_threshold": true,
1139
+ "raw_confidence": 0.9576
1140
  },
1141
  "intent_subtype": {
1142
  "calibrated": true,
1143
+ "confidence": 0.9612,
1144
  "confidence_threshold": 0.25,
1145
  "label": "comparison",
1146
  "meets_threshold": true,
1147
+ "raw_confidence": 0.9052
1148
  },
1149
  "intent_type": {
1150
  "calibrated": true,
1151
+ "confidence": 0.9594,
1152
  "confidence_threshold": 0.4,
1153
  "label": "commercial",
1154
  "meets_threshold": true,
1155
+ "raw_confidence": 0.9447
1156
  },
1157
  "overall_strategy": "min_required_component_confidence"
1158
  },
1159
+ "confidence": 0.9582,
1160
  "decision_phase": "consideration",
1161
  "subtype": "comparison",
1162
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
1195
  "response": {
1196
  "meta": {
1197
  "calibration_enabled": true,
1198
+ "iab_mapping_is_placeholder": false,
1199
  "system_version": "0.6.0-phase4"
1200
  },
1201
  "model_output": {
1202
  "classification": {
1203
  "iab_content": {
1204
+ "mapping_confidence": 0.1245,
1205
+ "mapping_mode": "exact",
1206
  "taxonomy": "IAB Content Taxonomy",
1207
  "taxonomy_version": "3.0",
1208
  "tier1": {
1209
  "id": "596",
1210
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
 
 
 
 
1211
  }
1212
  },
1213
  "intent": {
 
1215
  "component_confidence": {
1216
  "decision_phase": {
1217
  "calibrated": true,
1218
+ "confidence": 0.9531,
1219
  "confidence_threshold": 0.22,
1220
  "label": "awareness",
1221
  "meets_threshold": true,
1222
+ "raw_confidence": 0.9526
1223
  },
1224
  "intent_subtype": {
1225
  "calibrated": true,
1226
+ "confidence": 0.9844,
1227
  "confidence_threshold": 0.25,
1228
  "label": "education",
1229
  "meets_threshold": true,
1230
+ "raw_confidence": 0.9518
1231
  },
1232
  "intent_type": {
1233
  "calibrated": true,
1234
+ "confidence": 0.9738,
1235
  "confidence_threshold": 0.4,
1236
  "label": "informational",
1237
  "meets_threshold": true,
1238
+ "raw_confidence": 0.9632
1239
  },
1240
  "overall_strategy": "min_required_component_confidence"
1241
  },
1242
+ "confidence": 0.9531,
1243
  "decision_phase": "awareness",
1244
  "subtype": "education",
1245
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
- awareness,14,1,0,0,0,0,0
3
- research,0,14,0,0,0,1,0
4
  consideration,0,1,14,0,0,0,0
5
  decision,0,0,0,15,0,0,0
6
- action,0,0,0,1,13,1,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
+ awareness,15,0,0,0,0,0,0
3
+ research,0,15,0,0,0,0,0
4
  consideration,0,1,14,0,0,0,0
5
  decision,0,0,0,15,0,0,0
6
+ action,0,1,0,0,14,0,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "accepted_accuracy": 0.9524,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9524,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 0.9714,
@@ -15,12 +15,12 @@
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.8857,
19
  "accepted_coverage": 1.0,
20
- "accuracy": 0.8857,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
- "macro_f1": 0.883
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
@@ -33,19 +33,19 @@
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
- "macro_f1": 0.9526,
37
  "per_class_metrics": {
38
- "accuracy": 0.9523809523809523,
39
  "action": {
40
- "f1-score": 0.9285714285714286,
41
  "precision": 1.0,
42
- "recall": 0.8666666666666667,
43
  "support": 15.0
44
  },
45
  "awareness": {
46
- "f1-score": 0.9655172413793104,
47
  "precision": 1.0,
48
- "recall": 0.9333333333333333,
49
  "support": 15.0
50
  },
51
  "consideration": {
@@ -55,27 +55,27 @@
55
  "support": 15.0
56
  },
57
  "decision": {
58
- "f1-score": 0.967741935483871,
59
- "precision": 0.9375,
60
  "recall": 1.0,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
- "f1-score": 0.9525819504665047,
65
- "precision": 0.9564075630252101,
66
- "recall": 0.9523809523809523,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
70
- "f1-score": 0.9375,
71
- "precision": 0.8823529411764706,
72
  "recall": 1.0,
73
  "support": 15.0
74
  },
75
  "research": {
76
- "f1-score": 0.9032258064516129,
77
- "precision": 0.875,
78
- "recall": 0.9333333333333333,
79
  "support": 15.0
80
  },
81
  "support": {
@@ -85,9 +85,9 @@
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
- "f1-score": 0.9525819504665048,
89
- "precision": 0.9564075630252101,
90
- "recall": 0.9523809523809523,
91
  "support": 105.0
92
  }
93
  },
 
1
  {
2
+ "accepted_accuracy": 0.981,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.981,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 0.9714,
 
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.9714,
19
  "accepted_coverage": 1.0,
20
+ "accuracy": 0.9714,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
+ "macro_f1": 0.9711
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
 
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
+ "macro_f1": 0.9812,
37
  "per_class_metrics": {
38
+ "accuracy": 0.9809523809523809,
39
  "action": {
40
+ "f1-score": 0.9655172413793104,
41
  "precision": 1.0,
42
+ "recall": 0.9333333333333333,
43
  "support": 15.0
44
  },
45
  "awareness": {
46
+ "f1-score": 1.0,
47
  "precision": 1.0,
48
+ "recall": 1.0,
49
  "support": 15.0
50
  },
51
  "consideration": {
 
55
  "support": 15.0
56
  },
57
  "decision": {
58
+ "f1-score": 1.0,
59
+ "precision": 1.0,
60
  "recall": 1.0,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
+ "f1-score": 0.9812192118226601,
65
+ "precision": 0.9831932773109244,
66
+ "recall": 0.980952380952381,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
70
+ "f1-score": 1.0,
71
+ "precision": 1.0,
72
  "recall": 1.0,
73
  "support": 15.0
74
  },
75
  "research": {
76
+ "f1-score": 0.9375,
77
+ "precision": 0.8823529411764706,
78
+ "recall": 1.0,
79
  "support": 15.0
80
  },
81
  "support": {
 
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
+ "f1-score": 0.9812192118226601,
89
+ "precision": 0.9831932773109243,
90
+ "recall": 0.9809523809523809,
91
  "support": 105.0
92
  }
93
  },
artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json CHANGED
@@ -2,9 +2,9 @@
2
  "accepted_accuracy": 0.963,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.963,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv",
6
  "count": 27,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
  "macro_f1": 0.961,
 
2
  "accepted_accuracy": 0.963,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.963,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv",
6
  "count": 27,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
  "macro_f1": 0.961,
artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv CHANGED
@@ -1,7 +1,7 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,6,0,0,0,0,0,0
3
  research,2,5,0,0,0,0,0
4
- consideration,0,1,6,0,0,0,0
5
  decision,0,0,0,7,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,6,0
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,6,0,0,0,0,0,0
3
  research,2,5,0,0,0,0,0
4
+ consideration,0,2,5,0,0,0,0
5
  decision,0,0,0,7,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,6,0
artifacts/evaluation/latest/decision_phase_hard_cases_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9231,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9231,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv",
6
  "count": 39,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.9249,
11
  "per_class_metrics": {
12
- "accuracy": 0.9230769230769231,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
@@ -23,9 +23,9 @@
23
  "support": 6.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.9230769230769231,
27
  "precision": 1.0,
28
- "recall": 0.8571428571428571,
29
  "support": 7.0
30
  },
31
  "decision": {
@@ -35,9 +35,9 @@
35
  "support": 7.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.792778649921507,
39
- "precision": 0.7976190476190477,
40
- "recall": 0.7959183673469388,
41
  "support": 39.0
42
  },
43
  "post_purchase": {
@@ -47,8 +47,8 @@
47
  "support": 6.0
48
  },
49
  "research": {
50
- "f1-score": 0.7692307692307693,
51
- "precision": 0.8333333333333334,
52
  "recall": 0.7142857142857143,
53
  "support": 7.0
54
  },
@@ -59,9 +59,9 @@
59
  "support": 6.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.9227951535643845,
63
- "precision": 0.9316239316239316,
64
- "recall": 0.9230769230769231,
65
  "support": 39.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.8974,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8974,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv",
6
  "count": 39,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/decision_phase/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.9008,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8974358974358975,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
 
23
  "support": 6.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.8333333333333334,
27
  "precision": 1.0,
28
+ "recall": 0.7142857142857143,
29
  "support": 7.0
30
  },
31
  "decision": {
 
35
  "support": 7.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.772108843537415,
39
+ "precision": 0.7806122448979592,
40
+ "recall": 0.7755102040816327,
41
  "support": 39.0
42
  },
43
  "post_purchase": {
 
47
  "support": 6.0
48
  },
49
  "research": {
50
+ "f1-score": 0.7142857142857143,
51
+ "precision": 0.7142857142857143,
52
  "recall": 0.7142857142857143,
53
  "support": 7.0
54
  },
 
59
  "support": 6.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8968253968253967,
63
+ "precision": 0.9102564102564102,
64
+ "recall": 0.8974358974358975,
65
  "support": 39.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,3,0,0,0,0,0,0
3
  research,3,2,0,0,0,0,0
4
- consideration,0,1,4,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
- support,0,0,0,0,0,0,4
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,3,0,0,0,0,0,0
3
  research,3,2,0,0,0,0,0
4
+ consideration,0,2,3,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
+ support,0,0,0,0,0,1,3
artifacts/evaluation/latest/decision_phase_test_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.8621,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8621,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv",
6
  "count": 29,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8651,
11
  "per_class_metrics": {
12
- "accuracy": 0.8620689655172413,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -23,9 +23,9 @@
23
  "support": 3.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.8888888888888888,
27
  "precision": 1.0,
28
- "recall": 0.8,
29
  "support": 5.0
30
  },
31
  "decision": {
@@ -35,33 +35,33 @@
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.865079365079365,
39
- "precision": 0.8809523809523808,
40
- "recall": 0.8857142857142858,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 1.0,
45
- "precision": 1.0,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.5,
51
- "precision": 0.6666666666666666,
52
  "recall": 0.4,
53
  "support": 5.0
54
  },
55
  "support": {
56
- "f1-score": 1.0,
57
  "precision": 1.0,
58
- "recall": 1.0,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8601532567049808,
63
- "precision": 0.8908045977011494,
64
- "recall": 0.8620689655172413,
65
  "support": 29.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.7931,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.7931,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv",
6
  "count": 29,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.801,
11
  "per_class_metrics": {
12
+ "accuracy": 0.7931034482758621,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
23
  "support": 3.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.75,
27
  "precision": 1.0,
28
+ "recall": 0.6,
29
  "support": 5.0
30
  },
31
  "decision": {
 
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.8010204081632653,
39
+ "precision": 0.8285714285714285,
40
+ "recall": 0.8214285714285714,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 0.8888888888888888,
45
+ "precision": 0.8,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.4444444444444444,
51
+ "precision": 0.5,
52
  "recall": 0.4,
53
  "support": 5.0
54
  },
55
  "support": {
56
+ "f1-score": 0.8571428571428571,
57
  "precision": 1.0,
58
+ "recall": 0.75,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.7915982484948002,
63
+ "precision": 0.8344827586206897,
64
+ "recall": 0.7931034482758621,
65
  "support": 29.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
- research,1,14,0,0,0,0,0
4
- consideration,0,0,17,0,0,0,0
5
- decision,0,0,0,16,0,0,0
6
  action,0,0,0,0,10,0,0
7
  post_purchase,0,0,0,0,0,14,0
8
  support,0,0,0,0,0,0,14
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
+ research,2,13,0,0,0,0,0
4
+ consideration,0,1,16,0,0,0,0
5
+ decision,0,0,1,15,0,0,0
6
  action,0,0,0,0,10,0,0
7
  post_purchase,0,0,0,0,0,14,0
8
  support,0,0,0,0,0,0,14
artifacts/evaluation/latest/decision_phase_train_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9902,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9902,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.9907,
11
  "per_class_metrics": {
12
- "accuracy": 0.9901960784313726,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -17,27 +17,27 @@
17
  "support": 10.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.9696969696969697,
21
- "precision": 0.9411764705882353,
22
  "recall": 1.0,
23
  "support": 16.0
24
  },
25
  "consideration": {
26
- "f1-score": 1.0,
27
- "precision": 1.0,
28
- "recall": 1.0,
29
  "support": 17.0
30
  },
31
  "decision": {
32
- "f1-score": 1.0,
33
  "precision": 1.0,
34
- "recall": 1.0,
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.9907448872966115,
39
- "precision": 0.9915966386554622,
40
- "recall": 0.9904761904761905,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
@@ -47,9 +47,9 @@
47
  "support": 14.0
48
  },
49
  "research": {
50
- "f1-score": 0.9655172413793104,
51
- "precision": 1.0,
52
- "recall": 0.9333333333333333,
53
  "support": 15.0
54
  },
55
  "support": {
@@ -59,9 +59,9 @@
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.9901755895670704,
63
- "precision": 0.9907727797001153,
64
- "recall": 0.9901960784313726,
65
  "support": 102.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.9608,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9608,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.9638,
11
  "per_class_metrics": {
12
+ "accuracy": 0.9607843137254902,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
17
  "support": 10.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.9411764705882353,
21
+ "precision": 0.8888888888888888,
22
  "recall": 1.0,
23
  "support": 16.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.9411764705882353,
27
+ "precision": 0.9411764705882353,
28
+ "recall": 0.9411764705882353,
29
  "support": 17.0
30
  },
31
  "decision": {
32
+ "f1-score": 0.967741935483871,
33
  "precision": 1.0,
34
+ "recall": 0.9375,
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.9638066572568961,
39
+ "precision": 0.9655195411497932,
40
+ "recall": 0.9636204481792717,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
 
47
  "support": 14.0
48
  },
49
  "research": {
50
+ "f1-score": 0.896551724137931,
51
+ "precision": 0.9285714285714286,
52
+ "recall": 0.8666666666666667,
53
  "support": 15.0
54
  },
55
  "support": {
 
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.9606957878355163,
63
+ "precision": 0.9622626828509181,
64
+ "recall": 0.9607843137254902,
65
  "support": 102.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv CHANGED
@@ -5,4 +5,4 @@ consideration,0,0,5,0,0,0,0
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,1,0,0,0,3,0
8
- support,0,0,0,0,0,0,4
 
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,1,0,0,0,3,0
8
+ support,0,0,0,0,0,1,3
artifacts/evaluation/latest/decision_phase_val_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.8966,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8966,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv",
6
  "count": 29,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8975,
11
  "per_class_metrics": {
12
- "accuracy": 0.896551724137931,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -35,14 +35,14 @@
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.8974953617810761,
39
- "precision": 0.9166666666666667,
40
- "recall": 0.8928571428571429,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 0.8571428571428571,
45
- "precision": 1.0,
46
  "recall": 0.75,
47
  "support": 4.0
48
  },
@@ -53,15 +53,15 @@
53
  "support": 4.0
54
  },
55
  "support": {
56
- "f1-score": 1.0,
57
  "precision": 1.0,
58
- "recall": 1.0,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8947604120017911,
63
- "precision": 0.9080459770114944,
64
- "recall": 0.896551724137931,
65
  "support": 29.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.8621,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8621,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv",
6
  "count": 29,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.8618,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8620689655172413,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.8617810760667904,
39
+ "precision": 0.880952380952381,
40
+ "recall": 0.8571428571428571,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 0.75,
45
+ "precision": 0.75,
46
  "recall": 0.75,
47
  "support": 4.0
48
  },
 
53
  "support": 4.0
54
  },
55
  "support": {
56
+ "f1-score": 0.8571428571428571,
57
  "precision": 1.0,
58
+ "recall": 0.75,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8602776533811015,
63
+ "precision": 0.8735632183908046,
64
+ "recall": 0.8620689655172413,
65
  "support": 29.0
66
  }
67
  },
artifacts/evaluation/latest/iab_behavior_lock_regression.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 0,
5
- "passed": 12,
6
  "total": 12
7
  }
8
  },
9
- "cases_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/iab_behavior_lock_cases.json",
10
  "count": 12,
11
- "failed": 0,
12
- "passed": 12,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
- "model_output.classification.iab_content.tier2.label": "Auto Type"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -23,15 +23,21 @@
23
  "model_output.classification.iab_content.tier2.label": "Auto Type"
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
- "mismatches": [],
 
 
 
 
 
 
27
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
28
- "pass": true,
29
  "status": "must_fix",
30
  "text": "Which car to buy in 2026"
31
  },
32
  {
33
  "actual": {
34
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
35
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
36
  "model_output.classification.iab_content.tier2.label": "Computing",
37
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -43,9 +49,15 @@
43
  "model_output.classification.iab_content.tier3.label": "Laptops"
44
  },
45
  "id": "laptop-buying-maps-to-laptops",
46
- "mismatches": [],
 
 
 
 
 
 
47
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
48
- "pass": true,
49
  "status": "must_fix",
50
  "text": "Which laptop to buy in 2026"
51
  },
@@ -53,8 +65,8 @@
53
  "actual": {
54
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
55
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
56
- "model_output.classification.iab_content.tier2.label": "Computing",
57
- "model_output.classification.iab_content.tier3.label": "Laptops"
58
  },
59
  "expected": {
60
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -63,18 +75,29 @@
63
  "model_output.classification.iab_content.tier3.label": "Laptops"
64
  },
65
  "id": "labtop-buying-maps-to-laptops",
66
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
67
  "notes": "Common typo handling should still land in the laptops branch.",
68
- "pass": true,
69
  "status": "must_fix",
70
  "text": "Which labtop to buy in 2026"
71
  },
72
  {
73
  "actual": {
74
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
75
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
76
- "model_output.classification.iab_content.tier2.label": "Computing",
77
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
78
  },
79
  "expected": {
80
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -83,18 +106,34 @@
83
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
84
  },
85
  "id": "crm-awareness-maps-to-sales",
86
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
88
- "pass": true,
89
  "status": "must_fix",
90
  "text": "What is CRM software?"
91
  },
92
  {
93
  "actual": {
94
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
95
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
96
- "model_output.classification.iab_content.tier2.label": "Computing",
97
- "model_output.classification.iab_content.tier3.label": "Internet"
98
  },
99
  "expected": {
100
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -103,9 +142,25 @@
103
  "model_output.classification.iab_content.tier3.label": "Internet"
104
  },
105
  "id": "crm-comparison-maps-to-sales",
106
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
108
- "pass": true,
109
  "status": "must_fix",
110
  "text": "HubSpot vs Zoho for a small team"
111
  },
@@ -113,8 +168,8 @@
113
  "actual": {
114
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
115
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
116
- "model_output.classification.iab_content.tier2.label": "Computing",
117
- "model_output.classification.iab_content.tier3.label": "Internet"
118
  },
119
  "expected": {
120
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -123,34 +178,51 @@
123
  "model_output.classification.iab_content.tier3.label": "Internet"
124
  },
125
  "id": "marketing-tools-map-to-marketing",
126
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
127
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
128
- "pass": true,
129
  "status": "must_fix",
130
  "text": "Best AI SEO tools for content teams"
131
  },
132
  {
133
  "actual": {
134
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
135
- "model_output.classification.iab_content.tier1.label": "Technology & Computing"
136
  },
137
  "expected": {
138
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
139
  "model_output.classification.iab_content.tier1.label": "Technology & Computing"
140
  },
141
  "id": "ml-explanation-maps-to-ai",
142
- "mismatches": [],
 
 
 
 
 
 
143
  "notes": "ML and NLP educational prompts should land in the AI branch.",
144
- "pass": true,
145
  "status": "must_fix",
146
  "text": "What is intent classification in NLP?"
147
  },
148
  {
149
  "actual": {
150
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
151
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
152
- "model_output.classification.iab_content.tier2.label": "Computing",
153
- "model_output.classification.iab_content.tier3.label": "Internet"
154
  },
155
  "expected": {
156
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -159,15 +231,31 @@
159
  "model_output.classification.iab_content.tier3.label": "Internet"
160
  },
161
  "id": "support-credential-help-maps-to-business-it",
162
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  "notes": "Credential and account help should map to business IT rather than generic business.",
164
- "pass": true,
165
  "status": "must_fix",
166
  "text": "How do I reset my password?"
167
  },
168
  {
169
  "actual": {
170
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
171
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
172
  "model_output.classification.iab_content.tier2.label": "Dining Out"
173
  },
@@ -177,18 +265,24 @@
177
  "model_output.classification.iab_content.tier2.label": "Dining Out"
178
  },
179
  "id": "restaurant-booking-maps-to-dining-out",
180
- "mismatches": [],
 
 
 
 
 
 
181
  "notes": "Generic dining requests should not inherit the repo's business default.",
182
- "pass": true,
183
  "status": "must_fix",
184
  "text": "Book a table for 2 tonight"
185
  },
186
  {
187
  "actual": {
188
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
189
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
190
- "model_output.classification.iab_content.tier2.label": "Content Production",
191
- "model_output.classification.iab_content.tier3.label": "Freelance Writing"
192
  },
193
  "expected": {
194
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -197,19 +291,35 @@
197
  "model_output.classification.iab_content.tier3.label": "Freelance Writing"
198
  },
199
  "id": "trial-signup-maps-to-software",
200
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  "notes": "Software action queries should map to the software/application branch.",
202
- "pass": true,
203
  "status": "must_fix",
204
  "text": "Start my free trial"
205
  },
206
  {
207
  "actual": {
208
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
209
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
210
- "model_output.classification.iab_content.tier2.label": "Computing",
211
- "model_output.classification.iab_content.tier3.label": "Software and Applications",
212
- "model_output.classification.iab_content.tier4.label": "Communication"
213
  },
214
  "expected": {
215
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -219,15 +329,41 @@
219
  "model_output.classification.iab_content.tier4.label": "Communication"
220
  },
221
  "id": "communication-software-maps-to-tier4",
222
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
224
- "pass": true,
225
  "status": "must_fix",
226
  "text": "best communication software for remote teams"
227
  },
228
  {
229
  "actual": {
230
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
231
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
232
  },
233
  "expected": {
@@ -235,9 +371,15 @@
235
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
236
  },
237
  "id": "vodka-query-maps-to-alcoholic-beverages",
238
- "mismatches": [],
 
 
 
 
 
 
239
  "notes": "Food and beverage prompts should not fall through to the business default.",
240
- "pass": true,
241
  "status": "must_fix",
242
  "text": "what is best vodka drink should i try"
243
  }
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 12,
5
+ "passed": 0,
6
  "total": 12
7
  }
8
  },
9
+ "cases_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/examples/iab_behavior_lock_cases.json",
10
  "count": 12,
11
+ "failed": 12,
12
+ "passed": 0,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
+ "model_output.classification.iab_content.tier2.label": null
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
23
  "model_output.classification.iab_content.tier2.label": "Auto Type"
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
+ "mismatches": [
27
+ {
28
+ "actual": null,
29
+ "expected": "Auto Type",
30
+ "path": "model_output.classification.iab_content.tier2.label"
31
+ }
32
+ ],
33
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
34
+ "pass": false,
35
  "status": "must_fix",
36
  "text": "Which car to buy in 2026"
37
  },
38
  {
39
  "actual": {
40
+ "model_output.classification.iab_content.mapping_mode": "exact",
41
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
42
  "model_output.classification.iab_content.tier2.label": "Computing",
43
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
49
  "model_output.classification.iab_content.tier3.label": "Laptops"
50
  },
51
  "id": "laptop-buying-maps-to-laptops",
52
+ "mismatches": [
53
+ {
54
+ "actual": "exact",
55
+ "expected": "nearest_equivalent",
56
+ "path": "model_output.classification.iab_content.mapping_mode"
57
+ }
58
+ ],
59
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
60
+ "pass": false,
61
  "status": "must_fix",
62
  "text": "Which laptop to buy in 2026"
63
  },
 
65
  "actual": {
66
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
67
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
68
+ "model_output.classification.iab_content.tier2.label": null,
69
+ "model_output.classification.iab_content.tier3.label": null
70
  },
71
  "expected": {
72
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
75
  "model_output.classification.iab_content.tier3.label": "Laptops"
76
  },
77
  "id": "labtop-buying-maps-to-laptops",
78
+ "mismatches": [
79
+ {
80
+ "actual": null,
81
+ "expected": "Computing",
82
+ "path": "model_output.classification.iab_content.tier2.label"
83
+ },
84
+ {
85
+ "actual": null,
86
+ "expected": "Laptops",
87
+ "path": "model_output.classification.iab_content.tier3.label"
88
+ }
89
+ ],
90
  "notes": "Common typo handling should still land in the laptops branch.",
91
+ "pass": false,
92
  "status": "must_fix",
93
  "text": "Which labtop to buy in 2026"
94
  },
95
  {
96
  "actual": {
97
+ "model_output.classification.iab_content.mapping_mode": "exact",
98
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
99
+ "model_output.classification.iab_content.tier2.label": null,
100
+ "model_output.classification.iab_content.tier3.label": null
101
  },
102
  "expected": {
103
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
106
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
107
  },
108
  "id": "crm-awareness-maps-to-sales",
109
+ "mismatches": [
110
+ {
111
+ "actual": "exact",
112
+ "expected": "nearest_equivalent",
113
+ "path": "model_output.classification.iab_content.mapping_mode"
114
+ },
115
+ {
116
+ "actual": null,
117
+ "expected": "Computing",
118
+ "path": "model_output.classification.iab_content.tier2.label"
119
+ },
120
+ {
121
+ "actual": null,
122
+ "expected": "Software and Applications",
123
+ "path": "model_output.classification.iab_content.tier3.label"
124
+ }
125
+ ],
126
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
127
+ "pass": false,
128
  "status": "must_fix",
129
  "text": "What is CRM software?"
130
  },
131
  {
132
  "actual": {
133
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
134
+ "model_output.classification.iab_content.tier1.label": "Careers",
135
+ "model_output.classification.iab_content.tier2.label": null,
136
+ "model_output.classification.iab_content.tier3.label": null
137
  },
138
  "expected": {
139
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
142
  "model_output.classification.iab_content.tier3.label": "Internet"
143
  },
144
  "id": "crm-comparison-maps-to-sales",
145
+ "mismatches": [
146
+ {
147
+ "actual": "Careers",
148
+ "expected": "Technology & Computing",
149
+ "path": "model_output.classification.iab_content.tier1.label"
150
+ },
151
+ {
152
+ "actual": null,
153
+ "expected": "Computing",
154
+ "path": "model_output.classification.iab_content.tier2.label"
155
+ },
156
+ {
157
+ "actual": null,
158
+ "expected": "Internet",
159
+ "path": "model_output.classification.iab_content.tier3.label"
160
+ }
161
+ ],
162
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
163
+ "pass": false,
164
  "status": "must_fix",
165
  "text": "HubSpot vs Zoho for a small team"
166
  },
 
168
  "actual": {
169
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
170
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
171
+ "model_output.classification.iab_content.tier2.label": null,
172
+ "model_output.classification.iab_content.tier3.label": null
173
  },
174
  "expected": {
175
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
178
  "model_output.classification.iab_content.tier3.label": "Internet"
179
  },
180
  "id": "marketing-tools-map-to-marketing",
181
+ "mismatches": [
182
+ {
183
+ "actual": null,
184
+ "expected": "Computing",
185
+ "path": "model_output.classification.iab_content.tier2.label"
186
+ },
187
+ {
188
+ "actual": null,
189
+ "expected": "Internet",
190
+ "path": "model_output.classification.iab_content.tier3.label"
191
+ }
192
+ ],
193
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
194
+ "pass": false,
195
  "status": "must_fix",
196
  "text": "Best AI SEO tools for content teams"
197
  },
198
  {
199
  "actual": {
200
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
201
+ "model_output.classification.iab_content.tier1.label": "Careers"
202
  },
203
  "expected": {
204
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
205
  "model_output.classification.iab_content.tier1.label": "Technology & Computing"
206
  },
207
  "id": "ml-explanation-maps-to-ai",
208
+ "mismatches": [
209
+ {
210
+ "actual": "Careers",
211
+ "expected": "Technology & Computing",
212
+ "path": "model_output.classification.iab_content.tier1.label"
213
+ }
214
+ ],
215
  "notes": "ML and NLP educational prompts should land in the AI branch.",
216
+ "pass": false,
217
  "status": "must_fix",
218
  "text": "What is intent classification in NLP?"
219
  },
220
  {
221
  "actual": {
222
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
223
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
224
+ "model_output.classification.iab_content.tier2.label": null,
225
+ "model_output.classification.iab_content.tier3.label": null
226
  },
227
  "expected": {
228
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
231
  "model_output.classification.iab_content.tier3.label": "Internet"
232
  },
233
  "id": "support-credential-help-maps-to-business-it",
234
+ "mismatches": [
235
+ {
236
+ "actual": "Personal Finance",
237
+ "expected": "Technology & Computing",
238
+ "path": "model_output.classification.iab_content.tier1.label"
239
+ },
240
+ {
241
+ "actual": null,
242
+ "expected": "Computing",
243
+ "path": "model_output.classification.iab_content.tier2.label"
244
+ },
245
+ {
246
+ "actual": null,
247
+ "expected": "Internet",
248
+ "path": "model_output.classification.iab_content.tier3.label"
249
+ }
250
+ ],
251
  "notes": "Credential and account help should map to business IT rather than generic business.",
252
+ "pass": false,
253
  "status": "must_fix",
254
  "text": "How do I reset my password?"
255
  },
256
  {
257
  "actual": {
258
+ "model_output.classification.iab_content.mapping_mode": "exact",
259
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
260
  "model_output.classification.iab_content.tier2.label": "Dining Out"
261
  },
 
265
  "model_output.classification.iab_content.tier2.label": "Dining Out"
266
  },
267
  "id": "restaurant-booking-maps-to-dining-out",
268
+ "mismatches": [
269
+ {
270
+ "actual": "exact",
271
+ "expected": "nearest_equivalent",
272
+ "path": "model_output.classification.iab_content.mapping_mode"
273
+ }
274
+ ],
275
  "notes": "Generic dining requests should not inherit the repo's business default.",
276
+ "pass": false,
277
  "status": "must_fix",
278
  "text": "Book a table for 2 tonight"
279
  },
280
  {
281
  "actual": {
282
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
283
+ "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
284
+ "model_output.classification.iab_content.tier2.label": null,
285
+ "model_output.classification.iab_content.tier3.label": null
286
  },
287
  "expected": {
288
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
291
  "model_output.classification.iab_content.tier3.label": "Freelance Writing"
292
  },
293
  "id": "trial-signup-maps-to-software",
294
+ "mismatches": [
295
+ {
296
+ "actual": "Sensitive Topics",
297
+ "expected": "Hobbies & Interests",
298
+ "path": "model_output.classification.iab_content.tier1.label"
299
+ },
300
+ {
301
+ "actual": null,
302
+ "expected": "Content Production",
303
+ "path": "model_output.classification.iab_content.tier2.label"
304
+ },
305
+ {
306
+ "actual": null,
307
+ "expected": "Freelance Writing",
308
+ "path": "model_output.classification.iab_content.tier3.label"
309
+ }
310
+ ],
311
  "notes": "Software action queries should map to the software/application branch.",
312
+ "pass": false,
313
  "status": "must_fix",
314
  "text": "Start my free trial"
315
  },
316
  {
317
  "actual": {
318
+ "model_output.classification.iab_content.mapping_mode": "exact",
319
+ "model_output.classification.iab_content.tier1.label": "Careers",
320
+ "model_output.classification.iab_content.tier2.label": "Remote Working",
321
+ "model_output.classification.iab_content.tier3.label": null,
322
+ "model_output.classification.iab_content.tier4.label": null
323
  },
324
  "expected": {
325
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
329
  "model_output.classification.iab_content.tier4.label": "Communication"
330
  },
331
  "id": "communication-software-maps-to-tier4",
332
+ "mismatches": [
333
+ {
334
+ "actual": "Careers",
335
+ "expected": "Technology & Computing",
336
+ "path": "model_output.classification.iab_content.tier1.label"
337
+ },
338
+ {
339
+ "actual": "exact",
340
+ "expected": "nearest_equivalent",
341
+ "path": "model_output.classification.iab_content.mapping_mode"
342
+ },
343
+ {
344
+ "actual": "Remote Working",
345
+ "expected": "Computing",
346
+ "path": "model_output.classification.iab_content.tier2.label"
347
+ },
348
+ {
349
+ "actual": null,
350
+ "expected": "Software and Applications",
351
+ "path": "model_output.classification.iab_content.tier3.label"
352
+ },
353
+ {
354
+ "actual": null,
355
+ "expected": "Communication",
356
+ "path": "model_output.classification.iab_content.tier4.label"
357
+ }
358
+ ],
359
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
360
+ "pass": false,
361
  "status": "must_fix",
362
  "text": "best communication software for remote teams"
363
  },
364
  {
365
  "actual": {
366
+ "model_output.classification.iab_content.mapping_mode": "exact",
367
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
368
  },
369
  "expected": {
 
371
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
372
  },
373
  "id": "vodka-query-maps-to-alcoholic-beverages",
374
+ "mismatches": [
375
+ {
376
+ "actual": "exact",
377
+ "expected": "nearest_equivalent",
378
+ "path": "model_output.classification.iab_content.mapping_mode"
379
+ }
380
+ ],
381
  "notes": "Food and beverage prompts should not fall through to the business default.",
382
+ "pass": false,
383
  "status": "must_fix",
384
  "text": "what is best vodka drink should i try"
385
  }
artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json CHANGED
@@ -1,93 +1,98 @@
1
  {
2
- "accepted_accuracy": 0.3444,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.3444,
5
  "count": 90,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.2667,
10
- "accepted_coverage": 1.0,
11
- "accuracy": 0.2667,
12
  "count": 30,
13
- "fallback_rate": 0.0,
14
- "macro_f1": 0.1633
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.3667,
18
- "accepted_coverage": 1.0,
19
- "accuracy": 0.3667,
20
  "count": 30,
21
- "fallback_rate": 0.0,
22
- "macro_f1": 0.2174
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.4,
26
- "accepted_coverage": 1.0,
27
- "accuracy": 0.4,
28
  "count": 30,
29
- "fallback_rate": 0.0,
30
- "macro_f1": 0.2667
31
  }
32
  },
33
- "fallback_rate": 0.0,
34
  "head": "iab_content",
35
- "macro_f1": 0.1808,
36
- "primary_source": "embedding_retrieval",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 2.5333,
40
  "error_buckets": {
41
- "exact_match": 31,
42
- "parent_safe_stop": 5,
43
- "right_tier1_wrong_tier2": 19,
44
- "wrong_deep_leaf": 13,
45
- "wrong_tier1": 22
46
  },
47
- "exact_path_accuracy": 0.3444,
48
- "parent_safe_accuracy": 0.4889,
49
- "tier1_accuracy": 0.7556,
50
- "tier2_accuracy": 0.5238,
51
- "tier3_accuracy": 0.4762,
52
- "tier4_accuracy": 1.0
53
  },
54
  "view_metrics": {
55
- "combined_path": {
56
- "average_prediction_depth": 2.5333,
57
  "error_buckets": {
58
- "exact_match": 27,
59
- "parent_safe_stop": 5,
60
- "right_tier1_wrong_tier2": 19,
61
- "wrong_deep_leaf": 17,
62
- "wrong_tier1": 22
63
  },
64
- "exact_path_accuracy": 0.3,
65
- "fallback_overuse_count": 12,
66
- "fallback_rate": 0.1333,
67
- "parent_safe_accuracy": 0.4444,
68
- "tier1_accuracy": 0.7556,
69
- "tier2_accuracy": 0.5238,
70
  "tier3_accuracy": 0.381,
71
- "tier4_accuracy": 0.5
72
- },
73
- "disagreements": {
74
- "retrieval_vs_combined": 0
75
  },
76
- "embedding_retrieval": {
77
- "average_prediction_depth": 2.5333,
78
  "error_buckets": {
79
- "exact_match": 27,
80
- "parent_safe_stop": 5,
81
- "right_tier1_wrong_tier2": 19,
82
- "wrong_deep_leaf": 17,
83
- "wrong_tier1": 22
84
  },
85
- "exact_path_accuracy": 0.3,
86
- "parent_safe_accuracy": 0.4444,
87
- "tier1_accuracy": 0.7556,
88
- "tier2_accuracy": 0.5238,
 
 
89
  "tier3_accuracy": 0.381,
90
- "tier4_accuracy": 0.5
 
 
 
 
 
 
 
 
91
  }
92
  }
93
  }
 
1
  {
2
+ "accepted_accuracy": 0.4103,
3
+ "accepted_coverage": 0.8667,
4
+ "accuracy": 0.3667,
5
  "count": 90,
6
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.3846,
10
+ "accepted_coverage": 0.8667,
11
+ "accuracy": 0.3667,
12
  "count": 30,
13
+ "fallback_rate": 0.1333,
14
+ "macro_f1": 0.2619
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.5385,
18
+ "accepted_coverage": 0.8667,
19
+ "accuracy": 0.4667,
20
  "count": 30,
21
+ "fallback_rate": 0.1333,
22
+ "macro_f1": 0.3182
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.3077,
26
+ "accepted_coverage": 0.8667,
27
+ "accuracy": 0.2667,
28
  "count": 30,
29
+ "fallback_rate": 0.1333,
30
+ "macro_f1": 0.1633
31
  }
32
  },
33
+ "fallback_rate": 0.1333,
34
  "head": "iab_content",
35
+ "macro_f1": 0.2081,
36
+ "primary_source": "supervised_classifier",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 1.9889,
40
  "error_buckets": {
41
+ "exact_match": 33,
42
+ "parent_safe_stop": 3,
43
+ "right_tier1_wrong_tier2": 20,
44
+ "wrong_deep_leaf": 6,
45
+ "wrong_tier1": 28
46
  },
47
+ "exact_path_accuracy": 0.3667,
48
+ "parent_safe_accuracy": 0.5333,
49
+ "tier1_accuracy": 0.6889,
50
+ "tier2_accuracy": 0.4286,
51
+ "tier3_accuracy": 0.381,
52
+ "tier4_accuracy": 0.3333
53
  },
54
  "view_metrics": {
55
+ "classifier": {
56
+ "average_prediction_depth": 1.9889,
57
  "error_buckets": {
58
+ "exact_match": 33,
59
+ "parent_safe_stop": 3,
60
+ "right_tier1_wrong_tier2": 20,
61
+ "wrong_deep_leaf": 6,
62
+ "wrong_tier1": 28
63
  },
64
+ "exact_path_accuracy": 0.3667,
65
+ "parent_safe_accuracy": 0.5333,
66
+ "tier1_accuracy": 0.6889,
67
+ "tier2_accuracy": 0.4286,
 
 
68
  "tier3_accuracy": 0.381,
69
+ "tier4_accuracy": 0.3333
 
 
 
70
  },
71
+ "combined_path": {
72
+ "average_prediction_depth": 1.9889,
73
  "error_buckets": {
74
+ "exact_match": 33,
75
+ "parent_safe_stop": 3,
76
+ "right_tier1_wrong_tier2": 20,
77
+ "wrong_deep_leaf": 6,
78
+ "wrong_tier1": 28
79
  },
80
+ "exact_path_accuracy": 0.3667,
81
+ "fallback_overuse_count": 18,
82
+ "fallback_rate": 0.2,
83
+ "parent_safe_accuracy": 0.5333,
84
+ "tier1_accuracy": 0.6889,
85
+ "tier2_accuracy": 0.4286,
86
  "tier3_accuracy": 0.381,
87
+ "tier4_accuracy": 0.3333
88
+ },
89
+ "disagreements": {
90
+ "classifier_vs_combined": 0
91
+ },
92
+ "shadow_embedding_retrieval": {
93
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
94
+ "reason": "disabled_by_default",
95
+ "skipped": true
96
  }
97
  }
98
  }
artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json CHANGED
@@ -1,93 +1,97 @@
1
  {
2
- "accepted_accuracy": 0.3782,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.3782,
5
  "count": 156,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.4038,
10
- "accepted_coverage": 1.0,
11
- "accuracy": 0.4038,
12
  "count": 52,
13
- "fallback_rate": 0.0,
14
- "macro_f1": 0.2171
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.3077,
18
- "accepted_coverage": 1.0,
19
- "accuracy": 0.3077,
20
  "count": 52,
21
- "fallback_rate": 0.0,
22
- "macro_f1": 0.1626
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.4231,
26
- "accepted_coverage": 1.0,
27
- "accuracy": 0.4231,
28
  "count": 52,
29
- "fallback_rate": 0.0,
30
- "macro_f1": 0.2265
31
  }
32
  },
33
- "fallback_rate": 0.0,
34
  "head": "iab_content",
35
- "macro_f1": 0.1593,
36
- "primary_source": "embedding_retrieval",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 2.5833,
40
  "error_buckets": {
41
- "exact_match": 59,
42
- "parent_safe_stop": 17,
43
- "right_tier1_wrong_tier2": 42,
44
- "wrong_deep_leaf": 13,
45
- "wrong_tier1": 25
46
  },
47
- "exact_path_accuracy": 0.3782,
48
- "parent_safe_accuracy": 0.6154,
49
- "tier1_accuracy": 0.8397,
50
- "tier2_accuracy": 0.5705,
51
- "tier3_accuracy": 0.5648,
52
- "tier4_accuracy": 0.5833
53
  },
54
  "view_metrics": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "combined_path": {
56
- "average_prediction_depth": 2.5833,
57
  "error_buckets": {
58
- "exact_match": 48,
59
- "parent_safe_stop": 17,
60
- "right_tier1_wrong_tier2": 42,
61
- "wrong_deep_leaf": 24,
62
- "wrong_tier1": 25
63
  },
64
- "exact_path_accuracy": 0.3077,
65
  "fallback_overuse_count": 11,
66
  "fallback_rate": 0.0705,
67
- "parent_safe_accuracy": 0.5449,
68
- "tier1_accuracy": 0.8397,
69
- "tier2_accuracy": 0.5705,
70
- "tier3_accuracy": 0.4352,
71
  "tier4_accuracy": 0.25
72
  },
73
  "disagreements": {
74
- "retrieval_vs_combined": 0
75
  },
76
- "embedding_retrieval": {
77
- "average_prediction_depth": 2.5833,
78
- "error_buckets": {
79
- "exact_match": 48,
80
- "parent_safe_stop": 17,
81
- "right_tier1_wrong_tier2": 42,
82
- "wrong_deep_leaf": 24,
83
- "wrong_tier1": 25
84
- },
85
- "exact_path_accuracy": 0.3077,
86
- "parent_safe_accuracy": 0.5449,
87
- "tier1_accuracy": 0.8397,
88
- "tier2_accuracy": 0.5705,
89
- "tier3_accuracy": 0.4352,
90
- "tier4_accuracy": 0.25
91
  }
92
  }
93
  }
 
1
  {
2
+ "accepted_accuracy": 0.4959,
3
+ "accepted_coverage": 0.7885,
4
+ "accuracy": 0.391,
5
  "count": 156,
6
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.5778,
10
+ "accepted_coverage": 0.8654,
11
+ "accuracy": 0.5,
12
  "count": 52,
13
+ "fallback_rate": 0.1346,
14
+ "macro_f1": 0.3025
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.35,
18
+ "accepted_coverage": 0.7692,
19
+ "accuracy": 0.2692,
20
  "count": 52,
21
+ "fallback_rate": 0.2308,
22
+ "macro_f1": 0.1505
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.5526,
26
+ "accepted_coverage": 0.7308,
27
+ "accuracy": 0.4038,
28
  "count": 52,
29
+ "fallback_rate": 0.2692,
30
+ "macro_f1": 0.2184
31
  }
32
  },
33
+ "fallback_rate": 0.2115,
34
  "head": "iab_content",
35
+ "macro_f1": 0.1715,
36
+ "primary_source": "supervised_classifier",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 1.9936,
40
  "error_buckets": {
41
+ "exact_match": 61,
42
+ "parent_safe_stop": 4,
43
+ "right_tier1_wrong_tier2": 41,
44
+ "wrong_tier1": 50
 
45
  },
46
+ "exact_path_accuracy": 0.391,
47
+ "parent_safe_accuracy": 0.6218,
48
+ "tier1_accuracy": 0.6795,
49
+ "tier2_accuracy": 0.4167,
50
+ "tier3_accuracy": 0.4259,
51
+ "tier4_accuracy": 0.4167
52
  },
53
  "view_metrics": {
54
+ "classifier": {
55
+ "average_prediction_depth": 1.9936,
56
+ "error_buckets": {
57
+ "exact_match": 56,
58
+ "parent_safe_stop": 4,
59
+ "right_tier1_wrong_tier2": 41,
60
+ "wrong_deep_leaf": 5,
61
+ "wrong_tier1": 50
62
+ },
63
+ "exact_path_accuracy": 0.359,
64
+ "parent_safe_accuracy": 0.5897,
65
+ "tier1_accuracy": 0.6795,
66
+ "tier2_accuracy": 0.4167,
67
+ "tier3_accuracy": 0.3796,
68
+ "tier4_accuracy": 0.25
69
+ },
70
  "combined_path": {
71
+ "average_prediction_depth": 1.9936,
72
  "error_buckets": {
73
+ "exact_match": 56,
74
+ "parent_safe_stop": 4,
75
+ "right_tier1_wrong_tier2": 41,
76
+ "wrong_deep_leaf": 5,
77
+ "wrong_tier1": 50
78
  },
79
+ "exact_path_accuracy": 0.359,
80
  "fallback_overuse_count": 11,
81
  "fallback_rate": 0.0705,
82
+ "parent_safe_accuracy": 0.5897,
83
+ "tier1_accuracy": 0.6795,
84
+ "tier2_accuracy": 0.4167,
85
+ "tier3_accuracy": 0.3796,
86
  "tier4_accuracy": 0.25
87
  },
88
  "disagreements": {
89
+ "classifier_vs_combined": 0
90
  },
91
+ "shadow_embedding_retrieval": {
92
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
93
+ "reason": "disabled_by_default",
94
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
  }
97
  }
artifacts/evaluation/latest/iab_content_extended_cases_report.json CHANGED
@@ -1,64 +1,69 @@
1
  {
2
- "accepted_accuracy": 0.25,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.25,
5
  "count": 8,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.1429,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.375,
14
  "error_buckets": {
15
- "exact_match": 2,
16
- "right_tier1_wrong_tier2": 3,
17
- "wrong_deep_leaf": 2,
18
  "wrong_tier1": 1
19
  },
20
- "exact_path_accuracy": 0.25,
21
- "parent_safe_accuracy": 0.375,
22
  "tier1_accuracy": 0.875,
23
- "tier2_accuracy": 0.4286,
24
- "tier3_accuracy": 1.0,
25
  "tier4_accuracy": 0.0
26
  },
27
  "view_metrics": {
28
- "combined_path": {
29
- "average_prediction_depth": 2.375,
30
  "error_buckets": {
31
- "exact_match": 2,
32
- "right_tier1_wrong_tier2": 3,
33
- "wrong_deep_leaf": 2,
34
  "wrong_tier1": 1
35
  },
36
- "exact_path_accuracy": 0.25,
37
- "fallback_overuse_count": 1,
38
- "fallback_rate": 0.125,
39
- "parent_safe_accuracy": 0.375,
40
  "tier1_accuracy": 0.875,
41
- "tier2_accuracy": 0.4286,
42
  "tier3_accuracy": 0.0,
43
  "tier4_accuracy": 0.0
44
  },
45
- "disagreements": {
46
- "retrieval_vs_combined": 0
47
- },
48
- "embedding_retrieval": {
49
- "average_prediction_depth": 2.375,
50
  "error_buckets": {
51
- "exact_match": 2,
52
- "right_tier1_wrong_tier2": 3,
53
- "wrong_deep_leaf": 2,
54
  "wrong_tier1": 1
55
  },
56
- "exact_path_accuracy": 0.25,
57
- "parent_safe_accuracy": 0.375,
 
 
58
  "tier1_accuracy": 0.875,
59
- "tier2_accuracy": 0.4286,
60
  "tier3_accuracy": 0.0,
61
  "tier4_accuracy": 0.0
 
 
 
 
 
 
 
 
62
  }
63
  }
64
  }
 
1
  {
2
+ "accepted_accuracy": 0.6,
3
+ "accepted_coverage": 0.625,
4
+ "accuracy": 0.5,
5
  "count": 8,
6
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
+ "fallback_rate": 0.375,
8
  "head": "iab_content",
9
+ "macro_f1": 0.3333,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 1.75,
14
  "error_buckets": {
15
+ "exact_match": 4,
16
+ "right_tier1_wrong_tier2": 2,
17
+ "wrong_deep_leaf": 1,
18
  "wrong_tier1": 1
19
  },
20
+ "exact_path_accuracy": 0.5,
21
+ "parent_safe_accuracy": 0.625,
22
  "tier1_accuracy": 0.875,
23
+ "tier2_accuracy": 0.5714,
24
+ "tier3_accuracy": 0.0,
25
  "tier4_accuracy": 0.0
26
  },
27
  "view_metrics": {
28
+ "classifier": {
29
+ "average_prediction_depth": 1.75,
30
  "error_buckets": {
31
+ "exact_match": 4,
32
+ "right_tier1_wrong_tier2": 2,
33
+ "wrong_deep_leaf": 1,
34
  "wrong_tier1": 1
35
  },
36
+ "exact_path_accuracy": 0.5,
37
+ "parent_safe_accuracy": 0.625,
 
 
38
  "tier1_accuracy": 0.875,
39
+ "tier2_accuracy": 0.5714,
40
  "tier3_accuracy": 0.0,
41
  "tier4_accuracy": 0.0
42
  },
43
+ "combined_path": {
44
+ "average_prediction_depth": 1.75,
 
 
 
45
  "error_buckets": {
46
+ "exact_match": 4,
47
+ "right_tier1_wrong_tier2": 2,
48
+ "wrong_deep_leaf": 1,
49
  "wrong_tier1": 1
50
  },
51
+ "exact_path_accuracy": 0.5,
52
+ "fallback_overuse_count": 2,
53
+ "fallback_rate": 0.25,
54
+ "parent_safe_accuracy": 0.625,
55
  "tier1_accuracy": 0.875,
56
+ "tier2_accuracy": 0.5714,
57
  "tier3_accuracy": 0.0,
58
  "tier4_accuracy": 0.0
59
+ },
60
+ "disagreements": {
61
+ "classifier_vs_combined": 0
62
+ },
63
+ "shadow_embedding_retrieval": {
64
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
65
+ "reason": "disabled_by_default",
66
+ "skipped": true
67
  }
68
  }
69
  }
artifacts/evaluation/latest/iab_content_hard_cases_report.json CHANGED
@@ -1,66 +1,66 @@
1
  {
2
- "accepted_accuracy": 0.25,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.25,
5
  "count": 8,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.1429,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.375,
14
  "error_buckets": {
15
- "exact_match": 2,
16
- "parent_safe_stop": 1,
17
- "right_tier1_wrong_tier2": 2,
18
- "wrong_tier1": 3
19
  },
20
- "exact_path_accuracy": 0.25,
21
  "parent_safe_accuracy": 0.5,
22
- "tier1_accuracy": 0.625,
23
  "tier2_accuracy": 0.375,
24
- "tier3_accuracy": 0.2,
25
- "tier4_accuracy": 1.0
26
  },
27
  "view_metrics": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "combined_path": {
29
- "average_prediction_depth": 2.375,
30
  "error_buckets": {
31
- "exact_match": 1,
32
- "parent_safe_stop": 1,
33
- "right_tier1_wrong_tier2": 2,
34
- "wrong_deep_leaf": 1,
35
- "wrong_tier1": 3
36
  },
37
- "exact_path_accuracy": 0.125,
38
  "fallback_overuse_count": 1,
39
  "fallback_rate": 0.125,
40
- "parent_safe_accuracy": 0.375,
41
- "tier1_accuracy": 0.625,
42
  "tier2_accuracy": 0.375,
43
- "tier3_accuracy": 0.0,
44
  "tier4_accuracy": 0.0
45
  },
46
  "disagreements": {
47
- "retrieval_vs_combined": 0
48
  },
49
- "embedding_retrieval": {
50
- "average_prediction_depth": 2.375,
51
- "error_buckets": {
52
- "exact_match": 1,
53
- "parent_safe_stop": 1,
54
- "right_tier1_wrong_tier2": 2,
55
- "wrong_deep_leaf": 1,
56
- "wrong_tier1": 3
57
- },
58
- "exact_path_accuracy": 0.125,
59
- "parent_safe_accuracy": 0.375,
60
- "tier1_accuracy": 0.625,
61
- "tier2_accuracy": 0.375,
62
- "tier3_accuracy": 0.0,
63
- "tier4_accuracy": 0.0
64
  }
65
  }
66
  }
 
1
  {
2
+ "accepted_accuracy": 0.5,
3
+ "accepted_coverage": 0.75,
4
+ "accuracy": 0.375,
5
  "count": 8,
6
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
+ "fallback_rate": 0.25,
8
  "head": "iab_content",
9
+ "macro_f1": 0.2308,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 1.75,
14
  "error_buckets": {
15
+ "exact_match": 3,
16
+ "right_tier1_wrong_tier2": 1,
17
+ "wrong_tier1": 4
 
18
  },
19
+ "exact_path_accuracy": 0.375,
20
  "parent_safe_accuracy": 0.5,
21
+ "tier1_accuracy": 0.5,
22
  "tier2_accuracy": 0.375,
23
+ "tier3_accuracy": 0.4,
24
+ "tier4_accuracy": 0.0
25
  },
26
  "view_metrics": {
27
+ "classifier": {
28
+ "average_prediction_depth": 1.75,
29
+ "error_buckets": {
30
+ "exact_match": 3,
31
+ "right_tier1_wrong_tier2": 1,
32
+ "wrong_tier1": 4
33
+ },
34
+ "exact_path_accuracy": 0.375,
35
+ "parent_safe_accuracy": 0.5,
36
+ "tier1_accuracy": 0.5,
37
+ "tier2_accuracy": 0.375,
38
+ "tier3_accuracy": 0.4,
39
+ "tier4_accuracy": 0.0
40
+ },
41
  "combined_path": {
42
+ "average_prediction_depth": 1.75,
43
  "error_buckets": {
44
+ "exact_match": 3,
45
+ "right_tier1_wrong_tier2": 1,
46
+ "wrong_tier1": 4
 
 
47
  },
48
+ "exact_path_accuracy": 0.375,
49
  "fallback_overuse_count": 1,
50
  "fallback_rate": 0.125,
51
+ "parent_safe_accuracy": 0.5,
52
+ "tier1_accuracy": 0.5,
53
  "tier2_accuracy": 0.375,
54
+ "tier3_accuracy": 0.4,
55
  "tier4_accuracy": 0.0
56
  },
57
  "disagreements": {
58
+ "classifier_vs_combined": 0
59
  },
60
+ "shadow_embedding_retrieval": {
61
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
62
+ "reason": "disabled_by_default",
63
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
64
  }
65
  }
66
  }
artifacts/evaluation/latest/iab_content_test_report.json CHANGED
@@ -1,31 +1,47 @@
1
  {
2
- "accepted_accuracy": 0.6527,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.6527,
5
  "count": 3282,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/test.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.6922,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "test",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.1889,
14
  "error_buckets": {
15
- "exact_match": 2142,
16
- "parent_safe_stop": 115,
17
- "right_tier1_wrong_tier2": 674,
18
- "wrong_deep_leaf": 236,
19
- "wrong_tier1": 115
20
  },
21
- "exact_path_accuracy": 0.6527,
22
- "parent_safe_accuracy": 0.7721,
23
- "tier1_accuracy": 0.965,
24
- "tier2_accuracy": 0.7587,
25
- "tier3_accuracy": 0.8041,
26
- "tier4_accuracy": 0.7929
27
  },
28
  "view_metrics": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "combined_path": {
30
  "count": 3282,
31
  "max_combined_rows": 500,
@@ -38,21 +54,10 @@
38
  "reason": "dataset_too_large_for_combined_view",
39
  "skipped": true
40
  },
41
- "embedding_retrieval": {
42
- "average_prediction_depth": 2.1889,
43
- "error_buckets": {
44
- "exact_match": 2107,
45
- "parent_safe_stop": 109,
46
- "right_tier1_wrong_tier2": 680,
47
- "wrong_deep_leaf": 271,
48
- "wrong_tier1": 115
49
- },
50
- "exact_path_accuracy": 0.642,
51
- "parent_safe_accuracy": 0.7596,
52
- "tier1_accuracy": 0.965,
53
- "tier2_accuracy": 0.7566,
54
- "tier3_accuracy": 0.7679,
55
- "tier4_accuracy": 0.6071
56
  }
57
  }
58
  }
 
1
  {
2
+ "accepted_accuracy": 0.916,
3
+ "accepted_coverage": 0.9973,
4
+ "accuracy": 0.915,
5
  "count": 3282,
6
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/iab/test.jsonl",
7
+ "fallback_rate": 0.0027,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8686,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "test",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1804,
14
  "error_buckets": {
15
+ "exact_match": 3003,
16
+ "parent_safe_stop": 65,
17
+ "right_tier1_wrong_tier2": 73,
18
+ "wrong_deep_leaf": 90,
19
+ "wrong_tier1": 51
20
  },
21
+ "exact_path_accuracy": 0.915,
22
+ "parent_safe_accuracy": 0.9442,
23
+ "tier1_accuracy": 0.9845,
24
+ "tier2_accuracy": 0.9606,
25
+ "tier3_accuracy": 0.8528,
26
+ "tier4_accuracy": 0.5286
27
  },
28
  "view_metrics": {
29
+ "classifier": {
30
+ "average_prediction_depth": 2.1804,
31
+ "error_buckets": {
32
+ "exact_match": 2965,
33
+ "parent_safe_stop": 63,
34
+ "right_tier1_wrong_tier2": 85,
35
+ "wrong_deep_leaf": 118,
36
+ "wrong_tier1": 51
37
+ },
38
+ "exact_path_accuracy": 0.9034,
39
+ "parent_safe_accuracy": 0.9321,
40
+ "tier1_accuracy": 0.9845,
41
+ "tier2_accuracy": 0.9565,
42
+ "tier3_accuracy": 0.8218,
43
+ "tier4_accuracy": 0.3429
44
+ },
45
  "combined_path": {
46
  "count": 3282,
47
  "max_combined_rows": 500,
 
54
  "reason": "dataset_too_large_for_combined_view",
55
  "skipped": true
56
  },
57
+ "shadow_embedding_retrieval": {
58
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
59
+ "reason": "disabled_by_default",
60
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
artifacts/evaluation/latest/iab_content_train_report.json CHANGED
@@ -1,67 +1,63 @@
1
  {
2
- "accepted_accuracy": 0.8115,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.8115,
5
  "count": 13211,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/train.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.8293,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "train",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.2368,
14
  "error_buckets": {
15
- "exact_match": 10721,
16
- "parent_safe_stop": 346,
17
- "right_tier1_wrong_tier2": 812,
18
- "wrong_deep_leaf": 809,
19
- "wrong_tier1": 523
20
  },
21
- "exact_path_accuracy": 0.8115,
22
- "parent_safe_accuracy": 0.8753,
23
- "tier1_accuracy": 0.9604,
24
- "tier2_accuracy": 0.9208,
25
- "tier3_accuracy": 0.8788,
26
- "tier4_accuracy": 0.8732
27
  },
28
  "view_metrics": {
29
- "combined_path": {
30
- "average_prediction_depth": 2.2368,
31
  "error_buckets": {
32
- "exact_match": 10569,
33
- "parent_safe_stop": 338,
34
- "right_tier1_wrong_tier2": 834,
35
- "wrong_deep_leaf": 947,
36
- "wrong_tier1": 523
37
  },
38
- "exact_path_accuracy": 0.8,
39
- "fallback_overuse_count": 1123,
40
- "fallback_rate": 0.085,
41
- "parent_safe_accuracy": 0.8631,
42
- "tier1_accuracy": 0.9604,
43
- "tier2_accuracy": 0.9189,
44
- "tier3_accuracy": 0.843,
45
- "tier4_accuracy": 0.6589
 
 
 
 
46
  },
47
  "disagreements": {
48
- "retrieval_vs_combined": 0
 
 
 
49
  },
50
- "embedding_retrieval": {
51
- "average_prediction_depth": 2.2368,
52
- "error_buckets": {
53
- "exact_match": 10569,
54
- "parent_safe_stop": 338,
55
- "right_tier1_wrong_tier2": 834,
56
- "wrong_deep_leaf": 947,
57
- "wrong_tier1": 523
58
- },
59
- "exact_path_accuracy": 0.8,
60
- "parent_safe_accuracy": 0.8631,
61
- "tier1_accuracy": 0.9604,
62
- "tier2_accuracy": 0.9189,
63
- "tier3_accuracy": 0.843,
64
- "tier4_accuracy": 0.6589
65
  }
66
  }
67
  }
 
1
  {
2
+ "accepted_accuracy": 0.9221,
3
+ "accepted_coverage": 0.998,
4
+ "accuracy": 0.9212,
5
  "count": 13211,
6
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/iab/train.jsonl",
7
+ "fallback_rate": 0.002,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8805,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "train",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1738,
14
  "error_buckets": {
15
+ "exact_match": 12170,
16
+ "parent_safe_stop": 238,
17
+ "right_tier1_wrong_tier2": 294,
18
+ "wrong_deep_leaf": 337,
19
+ "wrong_tier1": 172
20
  },
21
+ "exact_path_accuracy": 0.9212,
22
+ "parent_safe_accuracy": 0.9492,
23
+ "tier1_accuracy": 0.987,
24
+ "tier2_accuracy": 0.9629,
25
+ "tier3_accuracy": 0.8617,
26
+ "tier4_accuracy": 0.5554
27
  },
28
  "view_metrics": {
29
+ "classifier": {
30
+ "average_prediction_depth": 2.1738,
31
  "error_buckets": {
32
+ "exact_match": 12011,
33
+ "parent_safe_stop": 232,
34
+ "right_tier1_wrong_tier2": 342,
35
+ "wrong_deep_leaf": 454,
36
+ "wrong_tier1": 172
37
  },
38
+ "exact_path_accuracy": 0.9092,
39
+ "parent_safe_accuracy": 0.9367,
40
+ "tier1_accuracy": 0.987,
41
+ "tier2_accuracy": 0.9588,
42
+ "tier3_accuracy": 0.8293,
43
+ "tier4_accuracy": 0.3607
44
+ },
45
+ "combined_path": {
46
+ "count": 13211,
47
+ "max_combined_rows": 500,
48
+ "reason": "dataset_too_large_for_combined_view",
49
+ "skipped": true
50
  },
51
  "disagreements": {
52
+ "count": 13211,
53
+ "max_combined_rows": 500,
54
+ "reason": "dataset_too_large_for_combined_view",
55
+ "skipped": true
56
  },
57
+ "shadow_embedding_retrieval": {
58
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
59
+ "reason": "disabled_by_default",
60
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
artifacts/evaluation/latest/iab_content_val_report.json CHANGED
@@ -1,67 +1,63 @@
1
  {
2
- "accepted_accuracy": 0.6545,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.6545,
5
  "count": 3282,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/val.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.6957,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "val",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.1813,
14
  "error_buckets": {
15
- "exact_match": 2148,
16
- "parent_safe_stop": 105,
17
- "right_tier1_wrong_tier2": 684,
18
- "wrong_deep_leaf": 234,
19
- "wrong_tier1": 111
20
  },
21
- "exact_path_accuracy": 0.6545,
22
- "parent_safe_accuracy": 0.7821,
23
- "tier1_accuracy": 0.9662,
24
- "tier2_accuracy": 0.7577,
25
- "tier3_accuracy": 0.8352,
26
- "tier4_accuracy": 0.7214
27
  },
28
  "view_metrics": {
29
- "combined_path": {
30
- "average_prediction_depth": 2.1813,
31
  "error_buckets": {
32
- "exact_match": 2116,
33
- "parent_safe_stop": 100,
34
- "right_tier1_wrong_tier2": 689,
35
- "wrong_deep_leaf": 266,
36
- "wrong_tier1": 111
37
  },
38
- "exact_path_accuracy": 0.6447,
39
- "fallback_overuse_count": 413,
40
- "fallback_rate": 0.1258,
41
- "parent_safe_accuracy": 0.7709,
42
- "tier1_accuracy": 0.9662,
43
- "tier2_accuracy": 0.756,
44
- "tier3_accuracy": 0.799,
45
- "tier4_accuracy": 0.55
 
 
 
 
46
  },
47
  "disagreements": {
48
- "retrieval_vs_combined": 0
 
 
 
49
  },
50
- "embedding_retrieval": {
51
- "average_prediction_depth": 2.1813,
52
- "error_buckets": {
53
- "exact_match": 2116,
54
- "parent_safe_stop": 100,
55
- "right_tier1_wrong_tier2": 689,
56
- "wrong_deep_leaf": 266,
57
- "wrong_tier1": 111
58
- },
59
- "exact_path_accuracy": 0.6447,
60
- "parent_safe_accuracy": 0.7709,
61
- "tier1_accuracy": 0.9662,
62
- "tier2_accuracy": 0.756,
63
- "tier3_accuracy": 0.799,
64
- "tier4_accuracy": 0.55
65
  }
66
  }
67
  }
 
1
  {
2
+ "accepted_accuracy": 0.9138,
3
+ "accepted_coverage": 0.9963,
4
+ "accuracy": 0.9126,
5
  "count": 3282,
6
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/iab/val.jsonl",
7
+ "fallback_rate": 0.0037,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8708,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "val",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1795,
14
  "error_buckets": {
15
+ "exact_match": 2995,
16
+ "parent_safe_stop": 63,
17
+ "right_tier1_wrong_tier2": 81,
18
+ "wrong_deep_leaf": 90,
19
+ "wrong_tier1": 53
20
  },
21
+ "exact_path_accuracy": 0.9126,
22
+ "parent_safe_accuracy": 0.9427,
23
+ "tier1_accuracy": 0.9839,
24
+ "tier2_accuracy": 0.9565,
25
+ "tier3_accuracy": 0.8549,
26
+ "tier4_accuracy": 0.5429
27
  },
28
  "view_metrics": {
29
+ "classifier": {
30
+ "average_prediction_depth": 2.1795,
31
  "error_buckets": {
32
+ "exact_match": 2958,
33
+ "parent_safe_stop": 60,
34
+ "right_tier1_wrong_tier2": 93,
35
+ "wrong_deep_leaf": 118,
36
+ "wrong_tier1": 53
37
  },
38
+ "exact_path_accuracy": 0.9013,
39
+ "parent_safe_accuracy": 0.9305,
40
+ "tier1_accuracy": 0.9839,
41
+ "tier2_accuracy": 0.9524,
42
+ "tier3_accuracy": 0.8238,
43
+ "tier4_accuracy": 0.3643
44
+ },
45
+ "combined_path": {
46
+ "count": 3282,
47
+ "max_combined_rows": 500,
48
+ "reason": "dataset_too_large_for_combined_view",
49
+ "skipped": true
50
  },
51
  "disagreements": {
52
+ "count": 3282,
53
+ "max_combined_rows": 500,
54
+ "reason": "dataset_too_large_for_combined_view",
55
+ "skipped": true
56
  },
57
+ "shadow_embedding_retrieval": {
58
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
59
+ "reason": "disabled_by_default",
60
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json CHANGED
The diff for this file is too large to render. See raw diff
 
artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 86,
5
- "passed": 4,
6
  "total": 90
7
  }
8
  },
9
- "cases_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
- "failed": 86,
12
- "passed": 4,
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
- "model_output.classification.iab_content.tier1.label": "Automotive",
18
- "model_output.classification.iab_content.tier2.label": "Auto Type"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -25,7 +25,17 @@
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
- "actual": "Auto Type",
 
 
 
 
 
 
 
 
 
 
29
  "expected": "Auto Buying and Selling",
30
  "path": "model_output.classification.iab_content.tier2.label"
31
  }
@@ -37,7 +47,7 @@
37
  },
38
  {
39
  "actual": {
40
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
41
  "model_output.classification.iab_content.tier1.label": "Automotive",
42
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
43
  },
@@ -48,6 +58,11 @@
48
  },
49
  "id": "auto-buying-medium",
50
  "mismatches": [
 
 
 
 
 
51
  {
52
  "actual": "Auto Body Styles",
53
  "expected": "Auto Buying and Selling",
@@ -61,9 +76,9 @@
61
  },
62
  {
63
  "actual": {
64
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
65
  "model_output.classification.iab_content.tier1.label": "Automotive",
66
- "model_output.classification.iab_content.tier2.label": "Auto Type"
67
  },
68
  "expected": {
69
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -73,7 +88,12 @@
73
  "id": "auto-buying-hard",
74
  "mismatches": [
75
  {
76
- "actual": "Auto Type",
 
 
 
 
 
77
  "expected": "Auto Buying and Selling",
78
  "path": "model_output.classification.iab_content.tier2.label"
79
  }
@@ -85,10 +105,10 @@
85
  },
86
  {
87
  "actual": {
88
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
89
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
90
- "model_output.classification.iab_content.tier2.label": "Computing",
91
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
92
  },
93
  "expected": {
94
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -104,17 +124,12 @@
104
  "path": "model_output.classification.iab_content.tier1.label"
105
  },
106
  {
107
- "actual": "nearest_equivalent",
108
- "expected": "exact",
109
- "path": "model_output.classification.iab_content.mapping_mode"
110
- },
111
- {
112
- "actual": "Computing",
113
  "expected": "Business",
114
  "path": "model_output.classification.iab_content.tier2.label"
115
  },
116
  {
117
- "actual": "Software and Applications",
118
  "expected": "Sales",
119
  "path": "model_output.classification.iab_content.tier3.label"
120
  }
@@ -127,9 +142,9 @@
127
  {
128
  "actual": {
129
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
130
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
131
- "model_output.classification.iab_content.tier2.label": "Computing",
132
- "model_output.classification.iab_content.tier3.label": "Internet"
133
  },
134
  "expected": {
135
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -140,7 +155,7 @@
140
  "id": "sales-crm-medium",
141
  "mismatches": [
142
  {
143
- "actual": "Technology & Computing",
144
  "expected": "Business and Finance",
145
  "path": "model_output.classification.iab_content.tier1.label"
146
  },
@@ -150,12 +165,12 @@
150
  "path": "model_output.classification.iab_content.mapping_mode"
151
  },
152
  {
153
- "actual": "Computing",
154
  "expected": "Business",
155
  "path": "model_output.classification.iab_content.tier2.label"
156
  },
157
  {
158
- "actual": "Internet",
159
  "expected": "Sales",
160
  "path": "model_output.classification.iab_content.tier3.label"
161
  }
@@ -169,7 +184,7 @@
169
  "actual": {
170
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
171
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
172
- "model_output.classification.iab_content.tier2.label": "Business",
173
  "model_output.classification.iab_content.tier3.label": null
174
  },
175
  "expected": {
@@ -185,6 +200,11 @@
185
  "expected": "exact",
186
  "path": "model_output.classification.iab_content.mapping_mode"
187
  },
 
 
 
 
 
188
  {
189
  "actual": null,
190
  "expected": "Sales",
@@ -199,8 +219,8 @@
199
  {
200
  "actual": {
201
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
202
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
203
- "model_output.classification.iab_content.tier2.label": "Content Production",
204
  "model_output.classification.iab_content.tier3.label": null
205
  },
206
  "expected": {
@@ -212,7 +232,7 @@
212
  "id": "marketing-tools-easy",
213
  "mismatches": [
214
  {
215
- "actual": "Hobbies & Interests",
216
  "expected": "Business and Finance",
217
  "path": "model_output.classification.iab_content.tier1.label"
218
  },
@@ -222,7 +242,7 @@
222
  "path": "model_output.classification.iab_content.mapping_mode"
223
  },
224
  {
225
- "actual": "Content Production",
226
  "expected": "Business",
227
  "path": "model_output.classification.iab_content.tier2.label"
228
  },
@@ -240,8 +260,8 @@
240
  {
241
  "actual": {
242
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
243
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
244
- "model_output.classification.iab_content.tier2.label": "Online Piracy",
245
  "model_output.classification.iab_content.tier3.label": null
246
  },
247
  "expected": {
@@ -252,18 +272,13 @@
252
  },
253
  "id": "marketing-tools-medium",
254
  "mismatches": [
255
- {
256
- "actual": "Sensitive Topics",
257
- "expected": "Business and Finance",
258
- "path": "model_output.classification.iab_content.tier1.label"
259
- },
260
  {
261
  "actual": "nearest_equivalent",
262
  "expected": "exact",
263
  "path": "model_output.classification.iab_content.mapping_mode"
264
  },
265
  {
266
- "actual": "Online Piracy",
267
  "expected": "Business",
268
  "path": "model_output.classification.iab_content.tier2.label"
269
  },
@@ -281,9 +296,9 @@
281
  {
282
  "actual": {
283
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
284
- "model_output.classification.iab_content.tier1.label": "Genres",
285
- "model_output.classification.iab_content.tier2.label": "Talk Radio",
286
- "model_output.classification.iab_content.tier3.label": "Public Radio"
287
  },
288
  "expected": {
289
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -293,23 +308,18 @@
293
  },
294
  "id": "marketing-tools-hard",
295
  "mismatches": [
296
- {
297
- "actual": "Genres",
298
- "expected": "Business and Finance",
299
- "path": "model_output.classification.iab_content.tier1.label"
300
- },
301
  {
302
  "actual": "nearest_equivalent",
303
  "expected": "exact",
304
  "path": "model_output.classification.iab_content.mapping_mode"
305
  },
306
  {
307
- "actual": "Talk Radio",
308
  "expected": "Business",
309
  "path": "model_output.classification.iab_content.tier2.label"
310
  },
311
  {
312
- "actual": "Public Radio",
313
  "expected": "Marketing and Advertising",
314
  "path": "model_output.classification.iab_content.tier3.label"
315
  }
@@ -322,8 +332,8 @@
322
  {
323
  "actual": {
324
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
325
- "model_output.classification.iab_content.tier1.label": "Careers",
326
- "model_output.classification.iab_content.tier2.label": "Job Search",
327
  "model_output.classification.iab_content.tier3.label": null
328
  },
329
  "expected": {
@@ -335,7 +345,7 @@
335
  "id": "business-it-easy",
336
  "mismatches": [
337
  {
338
- "actual": "Careers",
339
  "expected": "Business and Finance",
340
  "path": "model_output.classification.iab_content.tier1.label"
341
  },
@@ -345,7 +355,7 @@
345
  "path": "model_output.classification.iab_content.mapping_mode"
346
  },
347
  {
348
- "actual": "Job Search",
349
  "expected": "Business",
350
  "path": "model_output.classification.iab_content.tier2.label"
351
  },
@@ -364,7 +374,7 @@
364
  "actual": {
365
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
366
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
367
- "model_output.classification.iab_content.tier2.label": "Business",
368
  "model_output.classification.iab_content.tier3.label": null
369
  },
370
  "expected": {
@@ -380,6 +390,11 @@
380
  "expected": "exact",
381
  "path": "model_output.classification.iab_content.mapping_mode"
382
  },
 
 
 
 
 
383
  {
384
  "actual": null,
385
  "expected": "Business I.T.",
@@ -396,7 +411,7 @@
396
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
397
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
398
  "model_output.classification.iab_content.tier2.label": "Computing",
399
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
400
  },
401
  "expected": {
402
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -422,7 +437,7 @@
422
  "path": "model_output.classification.iab_content.tier2.label"
423
  },
424
  {
425
- "actual": "Software and Applications",
426
  "expected": "Business I.T.",
427
  "path": "model_output.classification.iab_content.tier3.label"
428
  }
@@ -434,9 +449,9 @@
434
  },
435
  {
436
  "actual": {
437
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
438
- "model_output.classification.iab_content.tier1.label": "Sports",
439
- "model_output.classification.iab_content.tier2.label": "Table Tennis"
440
  },
441
  "expected": {
442
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -444,33 +459,17 @@
444
  "model_output.classification.iab_content.tier2.label": "Dining Out"
445
  },
446
  "id": "dining-out-easy",
447
- "mismatches": [
448
- {
449
- "actual": "Sports",
450
- "expected": "Food & Drink",
451
- "path": "model_output.classification.iab_content.tier1.label"
452
- },
453
- {
454
- "actual": "nearest_equivalent",
455
- "expected": "exact",
456
- "path": "model_output.classification.iab_content.mapping_mode"
457
- },
458
- {
459
- "actual": "Table Tennis",
460
- "expected": "Dining Out",
461
- "path": "model_output.classification.iab_content.tier2.label"
462
- }
463
- ],
464
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Dining Out.",
465
- "pass": false,
466
  "status": "must_fix",
467
  "text": "Book a table for six tonight"
468
  },
469
  {
470
  "actual": {
471
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
472
- "model_output.classification.iab_content.tier1.label": "Attractions",
473
- "model_output.classification.iab_content.tier2.label": null
474
  },
475
  "expected": {
476
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -478,33 +477,17 @@
478
  "model_output.classification.iab_content.tier2.label": "Dining Out"
479
  },
480
  "id": "dining-out-medium",
481
- "mismatches": [
482
- {
483
- "actual": "Attractions",
484
- "expected": "Food & Drink",
485
- "path": "model_output.classification.iab_content.tier1.label"
486
- },
487
- {
488
- "actual": "nearest_equivalent",
489
- "expected": "exact",
490
- "path": "model_output.classification.iab_content.mapping_mode"
491
- },
492
- {
493
- "actual": null,
494
- "expected": "Dining Out",
495
- "path": "model_output.classification.iab_content.tier2.label"
496
- }
497
- ],
498
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Dining Out.",
499
- "pass": false,
500
  "status": "must_fix",
501
  "text": "Good restaurants for a client dinner downtown"
502
  },
503
  {
504
  "actual": {
505
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
506
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
507
- "model_output.classification.iab_content.tier2.label": null
508
  },
509
  "expected": {
510
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -512,28 +495,17 @@
512
  "model_output.classification.iab_content.tier2.label": "Dining Out"
513
  },
514
  "id": "dining-out-hard",
515
- "mismatches": [
516
- {
517
- "actual": "nearest_equivalent",
518
- "expected": "exact",
519
- "path": "model_output.classification.iab_content.mapping_mode"
520
- },
521
- {
522
- "actual": null,
523
- "expected": "Dining Out",
524
- "path": "model_output.classification.iab_content.tier2.label"
525
- }
526
- ],
527
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
528
- "pass": false,
529
  "status": "must_fix",
530
  "text": "Need a place to eat tonight where I can make a reservation online"
531
  },
532
  {
533
  "actual": {
534
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
535
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
536
- "model_output.classification.iab_content.tier2.label": null
537
  },
538
  "expected": {
539
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -541,33 +513,17 @@
541
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
542
  },
543
  "id": "alcoholic-beverages-easy",
544
- "mismatches": [
545
- {
546
- "actual": "Style & Fashion",
547
- "expected": "Food & Drink",
548
- "path": "model_output.classification.iab_content.tier1.label"
549
- },
550
- {
551
- "actual": "nearest_equivalent",
552
- "expected": "exact",
553
- "path": "model_output.classification.iab_content.mapping_mode"
554
- },
555
- {
556
- "actual": null,
557
- "expected": "Alcoholic Beverages",
558
- "path": "model_output.classification.iab_content.tier2.label"
559
- }
560
- ],
561
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Alcoholic Beverages.",
562
- "pass": false,
563
  "status": "must_fix",
564
  "text": "Which whiskey cocktail should I order?"
565
  },
566
  {
567
  "actual": {
568
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
569
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
570
- "model_output.classification.iab_content.tier2.label": null
571
  },
572
  "expected": {
573
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -575,28 +531,17 @@
575
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
576
  },
577
  "id": "alcoholic-beverages-medium",
578
- "mismatches": [
579
- {
580
- "actual": "nearest_equivalent",
581
- "expected": "exact",
582
- "path": "model_output.classification.iab_content.mapping_mode"
583
- },
584
- {
585
- "actual": null,
586
- "expected": "Alcoholic Beverages",
587
- "path": "model_output.classification.iab_content.tier2.label"
588
- }
589
- ],
590
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Alcoholic Beverages.",
591
- "pass": false,
592
  "status": "must_fix",
593
  "text": "Best vodka drinks for beginners"
594
  },
595
  {
596
  "actual": {
597
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
598
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
599
- "model_output.classification.iab_content.tier2.label": null
600
  },
601
  "expected": {
602
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -606,12 +551,7 @@
606
  "id": "alcoholic-beverages-hard",
607
  "mismatches": [
608
  {
609
- "actual": "nearest_equivalent",
610
- "expected": "exact",
611
- "path": "model_output.classification.iab_content.mapping_mode"
612
- },
613
- {
614
- "actual": null,
615
  "expected": "Alcoholic Beverages",
616
  "path": "model_output.classification.iab_content.tier2.label"
617
  }
@@ -624,7 +564,7 @@
624
  {
625
  "actual": {
626
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
627
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
628
  "model_output.classification.iab_content.tier2.label": null
629
  },
630
  "expected": {
@@ -635,7 +575,7 @@
635
  "id": "artificial-intelligence-easy",
636
  "mismatches": [
637
  {
638
- "actual": "Sensitive Topics",
639
  "expected": "Technology & Computing",
640
  "path": "model_output.classification.iab_content.tier1.label"
641
  },
@@ -657,9 +597,9 @@
657
  },
658
  {
659
  "actual": {
660
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
661
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
662
- "model_output.classification.iab_content.tier2.label": null
663
  },
664
  "expected": {
665
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -669,17 +609,12 @@
669
  "id": "artificial-intelligence-medium",
670
  "mismatches": [
671
  {
672
- "actual": "Sensitive Topics",
673
  "expected": "Technology & Computing",
674
  "path": "model_output.classification.iab_content.tier1.label"
675
  },
676
  {
677
- "actual": "nearest_equivalent",
678
- "expected": "exact",
679
- "path": "model_output.classification.iab_content.mapping_mode"
680
- },
681
- {
682
- "actual": null,
683
  "expected": "Artificial Intelligence",
684
  "path": "model_output.classification.iab_content.tier2.label"
685
  }
@@ -691,7 +626,7 @@
691
  },
692
  {
693
  "actual": {
694
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
695
  "model_output.classification.iab_content.tier1.label": "Education",
696
  "model_output.classification.iab_content.tier2.label": "Language Learning"
697
  },
@@ -707,11 +642,6 @@
707
  "expected": "Technology & Computing",
708
  "path": "model_output.classification.iab_content.tier1.label"
709
  },
710
- {
711
- "actual": "nearest_equivalent",
712
- "expected": "exact",
713
- "path": "model_output.classification.iab_content.mapping_mode"
714
- },
715
  {
716
  "actual": "Language Learning",
717
  "expected": "Artificial Intelligence",
@@ -727,8 +657,8 @@
727
  "actual": {
728
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
729
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
730
- "model_output.classification.iab_content.tier2.label": "Computing",
731
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
732
  },
733
  "expected": {
734
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -742,6 +672,16 @@
742
  "actual": "nearest_equivalent",
743
  "expected": "exact",
744
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
745
  }
746
  ],
747
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
@@ -753,8 +693,8 @@
753
  "actual": {
754
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
755
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
756
- "model_output.classification.iab_content.tier2.label": "Computing",
757
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
758
  },
759
  "expected": {
760
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -768,6 +708,16 @@
768
  "actual": "nearest_equivalent",
769
  "expected": "exact",
770
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
771
  }
772
  ],
773
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
@@ -779,8 +729,8 @@
779
  "actual": {
780
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
781
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
782
- "model_output.classification.iab_content.tier2.label": "Computing",
783
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
784
  },
785
  "expected": {
786
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -794,6 +744,16 @@
794
  "actual": "nearest_equivalent",
795
  "expected": "exact",
796
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
797
  }
798
  ],
799
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
@@ -803,7 +763,7 @@
803
  },
804
  {
805
  "actual": {
806
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
807
  "model_output.classification.iab_content.tier1.label": "Careers",
808
  "model_output.classification.iab_content.tier2.label": "Remote Working",
809
  "model_output.classification.iab_content.tier3.label": null,
@@ -823,11 +783,6 @@
823
  "expected": "Technology & Computing",
824
  "path": "model_output.classification.iab_content.tier1.label"
825
  },
826
- {
827
- "actual": "nearest_equivalent",
828
- "expected": "exact",
829
- "path": "model_output.classification.iab_content.mapping_mode"
830
- },
831
  {
832
  "actual": "Remote Working",
833
  "expected": "Computing",
@@ -854,7 +809,7 @@
854
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
855
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
856
  "model_output.classification.iab_content.tier2.label": "Computing",
857
- "model_output.classification.iab_content.tier3.label": "Internet",
858
  "model_output.classification.iab_content.tier4.label": null
859
  },
860
  "expected": {
@@ -872,7 +827,7 @@
872
  "path": "model_output.classification.iab_content.mapping_mode"
873
  },
874
  {
875
- "actual": "Internet",
876
  "expected": "Software and Applications",
877
  "path": "model_output.classification.iab_content.tier3.label"
878
  },
@@ -891,7 +846,7 @@
891
  "actual": {
892
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
893
  "model_output.classification.iab_content.tier1.label": "Careers",
894
- "model_output.classification.iab_content.tier2.label": "Remote Working",
895
  "model_output.classification.iab_content.tier3.label": null,
896
  "model_output.classification.iab_content.tier4.label": null
897
  },
@@ -915,7 +870,7 @@
915
  "path": "model_output.classification.iab_content.mapping_mode"
916
  },
917
  {
918
- "actual": "Remote Working",
919
  "expected": "Computing",
920
  "path": "model_output.classification.iab_content.tier2.label"
921
  },
@@ -975,11 +930,11 @@
975
  },
976
  {
977
  "actual": {
978
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
979
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
980
  "model_output.classification.iab_content.tier2.label": "Computing",
981
  "model_output.classification.iab_content.tier3.label": "Internet",
982
- "model_output.classification.iab_content.tier4.label": null
983
  },
984
  "expected": {
985
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -989,30 +944,19 @@
989
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
990
  },
991
  "id": "web-hosting-medium",
992
- "mismatches": [
993
- {
994
- "actual": "nearest_equivalent",
995
- "expected": "exact",
996
- "path": "model_output.classification.iab_content.mapping_mode"
997
- },
998
- {
999
- "actual": null,
1000
- "expected": "Web Hosting",
1001
- "path": "model_output.classification.iab_content.tier4.label"
1002
- }
1003
- ],
1004
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
1005
- "pass": false,
1006
  "status": "must_fix",
1007
  "text": "Best hosting platform for a startup website"
1008
  },
1009
  {
1010
  "actual": {
1011
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1012
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1013
  "model_output.classification.iab_content.tier2.label": "Computing",
1014
  "model_output.classification.iab_content.tier3.label": "Internet",
1015
- "model_output.classification.iab_content.tier4.label": null
1016
  },
1017
  "expected": {
1018
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1022,26 +966,15 @@
1022
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
1023
  },
1024
  "id": "web-hosting-hard",
1025
- "mismatches": [
1026
- {
1027
- "actual": "nearest_equivalent",
1028
- "expected": "exact",
1029
- "path": "model_output.classification.iab_content.mapping_mode"
1030
- },
1031
- {
1032
- "actual": null,
1033
- "expected": "Web Hosting",
1034
- "path": "model_output.classification.iab_content.tier4.label"
1035
- }
1036
- ],
1037
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
1038
- "pass": false,
1039
  "status": "must_fix",
1040
  "text": "Need a managed hosting provider to deploy and run our marketing site"
1041
  },
1042
  {
1043
  "actual": {
1044
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1045
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1046
  "model_output.classification.iab_content.tier2.label": "Computing",
1047
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -1053,21 +986,15 @@
1053
  "model_output.classification.iab_content.tier3.label": "Laptops"
1054
  },
1055
  "id": "laptops-easy",
1056
- "mismatches": [
1057
- {
1058
- "actual": "nearest_equivalent",
1059
- "expected": "exact",
1060
- "path": "model_output.classification.iab_content.mapping_mode"
1061
- }
1062
- ],
1063
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
1064
- "pass": false,
1065
  "status": "must_fix",
1066
  "text": "Which laptop should I buy for college?"
1067
  },
1068
  {
1069
  "actual": {
1070
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1071
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1072
  "model_output.classification.iab_content.tier2.label": "Computing",
1073
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -1079,21 +1006,15 @@
1079
  "model_output.classification.iab_content.tier3.label": "Laptops"
1080
  },
1081
  "id": "laptops-medium",
1082
- "mismatches": [
1083
- {
1084
- "actual": "nearest_equivalent",
1085
- "expected": "exact",
1086
- "path": "model_output.classification.iab_content.mapping_mode"
1087
- }
1088
- ],
1089
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Laptops.",
1090
- "pass": false,
1091
  "status": "must_fix",
1092
  "text": "Best laptop for work and study under 1200"
1093
  },
1094
  {
1095
  "actual": {
1096
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1097
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1098
  "model_output.classification.iab_content.tier2.label": "Computing",
1099
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -1105,15 +1026,9 @@
1105
  "model_output.classification.iab_content.tier3.label": "Laptops"
1106
  },
1107
  "id": "laptops-hard",
1108
- "mismatches": [
1109
- {
1110
- "actual": "nearest_equivalent",
1111
- "expected": "exact",
1112
- "path": "model_output.classification.iab_content.mapping_mode"
1113
- }
1114
- ],
1115
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
1116
- "pass": false,
1117
  "status": "must_fix",
1118
  "text": "Need a portable computer with good battery life for everyday work"
1119
  },
@@ -1150,10 +1065,10 @@
1150
  },
1151
  {
1152
  "actual": {
1153
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1154
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1155
  "model_output.classification.iab_content.tier2.label": "Computing",
1156
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
1157
  },
1158
  "expected": {
1159
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1162,29 +1077,18 @@
1162
  "model_output.classification.iab_content.tier3.label": "Desktops"
1163
  },
1164
  "id": "desktops-medium",
1165
- "mismatches": [
1166
- {
1167
- "actual": "nearest_equivalent",
1168
- "expected": "exact",
1169
- "path": "model_output.classification.iab_content.mapping_mode"
1170
- },
1171
- {
1172
- "actual": "Software and Applications",
1173
- "expected": "Desktops",
1174
- "path": "model_output.classification.iab_content.tier3.label"
1175
- }
1176
- ],
1177
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
1178
- "pass": false,
1179
  "status": "must_fix",
1180
  "text": "Which desktop computer should I buy for a home office?"
1181
  },
1182
  {
1183
  "actual": {
1184
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1185
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1186
  "model_output.classification.iab_content.tier2.label": "Computing",
1187
- "model_output.classification.iab_content.tier3.label": null
1188
  },
1189
  "expected": {
1190
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1193,29 +1097,18 @@
1193
  "model_output.classification.iab_content.tier3.label": "Desktops"
1194
  },
1195
  "id": "desktops-hard",
1196
- "mismatches": [
1197
- {
1198
- "actual": "nearest_equivalent",
1199
- "expected": "exact",
1200
- "path": "model_output.classification.iab_content.mapping_mode"
1201
- },
1202
- {
1203
- "actual": null,
1204
- "expected": "Desktops",
1205
- "path": "model_output.classification.iab_content.tier3.label"
1206
- }
1207
- ],
1208
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
1209
- "pass": false,
1210
  "status": "must_fix",
1211
  "text": "Need a desktop PC with strong performance for creative work"
1212
  },
1213
  {
1214
  "actual": {
1215
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1216
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1217
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1218
- "model_output.classification.iab_content.tier3.label": null
1219
  },
1220
  "expected": {
1221
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1224,29 +1117,18 @@
1224
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1225
  },
1226
  "id": "smartphones-easy",
1227
- "mismatches": [
1228
- {
1229
- "actual": "nearest_equivalent",
1230
- "expected": "exact",
1231
- "path": "model_output.classification.iab_content.mapping_mode"
1232
- },
1233
- {
1234
- "actual": null,
1235
- "expected": "Smartphones",
1236
- "path": "model_output.classification.iab_content.tier3.label"
1237
- }
1238
- ],
1239
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1240
- "pass": false,
1241
  "status": "must_fix",
1242
  "text": "Best phone with a good camera under 700"
1243
  },
1244
  {
1245
  "actual": {
1246
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1247
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1248
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1249
- "model_output.classification.iab_content.tier3.label": null
1250
  },
1251
  "expected": {
1252
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1255,26 +1137,15 @@
1255
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1256
  },
1257
  "id": "smartphones-medium",
1258
- "mismatches": [
1259
- {
1260
- "actual": "nearest_equivalent",
1261
- "expected": "exact",
1262
- "path": "model_output.classification.iab_content.mapping_mode"
1263
- },
1264
- {
1265
- "actual": null,
1266
- "expected": "Smartphones",
1267
- "path": "model_output.classification.iab_content.tier3.label"
1268
- }
1269
- ],
1270
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1271
- "pass": false,
1272
  "status": "must_fix",
1273
  "text": "Should I buy an iPhone or Pixel this year?"
1274
  },
1275
  {
1276
  "actual": {
1277
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1278
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1279
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1280
  "model_output.classification.iab_content.tier3.label": "Smartphones"
@@ -1286,15 +1157,9 @@
1286
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1287
  },
1288
  "id": "smartphones-hard",
1289
- "mismatches": [
1290
- {
1291
- "actual": "nearest_equivalent",
1292
- "expected": "exact",
1293
- "path": "model_output.classification.iab_content.mapping_mode"
1294
- }
1295
- ],
1296
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1297
- "pass": false,
1298
  "status": "must_fix",
1299
  "text": "Need a new smartphone with strong battery life and a clean software experience"
1300
  },
@@ -1316,7 +1181,7 @@
1316
  },
1317
  {
1318
  "actual": {
1319
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1320
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1321
  },
1322
  "expected": {
@@ -1324,15 +1189,21 @@
1324
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1325
  },
1326
  "id": "style-fashion-parent-medium",
1327
- "mismatches": [],
 
 
 
 
 
 
1328
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion.",
1329
- "pass": true,
1330
  "status": "must_fix",
1331
  "text": "Affordable fashion accessories for everyday wear"
1332
  },
1333
  {
1334
  "actual": {
1335
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1336
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1337
  },
1338
  "expected": {
@@ -1340,18 +1211,24 @@
1340
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1341
  },
1342
  "id": "style-fashion-parent-hard",
1343
- "mismatches": [],
 
 
 
 
 
 
1344
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion.",
1345
- "pass": true,
1346
  "status": "must_fix",
1347
  "text": "Need style recommendations for clothing and footwear without a specific brand in mind"
1348
  },
1349
  {
1350
  "actual": {
1351
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1352
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1353
- "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1354
- "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1355
  },
1356
  "expected": {
1357
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1362,9 +1239,19 @@
1362
  "id": "womens-shoes-easy",
1363
  "mismatches": [
1364
  {
1365
- "actual": "nearest_equivalent",
1366
- "expected": "exact",
1367
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1368
  }
1369
  ],
1370
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
@@ -1374,10 +1261,10 @@
1374
  },
1375
  {
1376
  "actual": {
1377
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1378
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1379
- "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1380
- "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1381
  },
1382
  "expected": {
1383
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1388,9 +1275,19 @@
1388
  "id": "womens-shoes-medium",
1389
  "mismatches": [
1390
  {
1391
- "actual": "nearest_equivalent",
1392
- "expected": "exact",
1393
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1394
  }
1395
  ],
1396
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
@@ -1400,7 +1297,7 @@
1400
  },
1401
  {
1402
  "actual": {
1403
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1404
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1405
  "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1406
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
@@ -1412,15 +1309,9 @@
1412
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1413
  },
1414
  "id": "womens-shoes-hard",
1415
- "mismatches": [
1416
- {
1417
- "actual": "nearest_equivalent",
1418
- "expected": "exact",
1419
- "path": "model_output.classification.iab_content.mapping_mode"
1420
- }
1421
- ],
1422
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
1423
- "pass": false,
1424
  "status": "must_fix",
1425
  "text": "Need women's footwear for commuting that looks polished but feels comfortable"
1426
  },
@@ -1429,7 +1320,7 @@
1429
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1430
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1431
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1432
- "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1433
  },
1434
  "expected": {
1435
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1443,6 +1334,11 @@
1443
  "actual": "nearest_equivalent",
1444
  "expected": "exact",
1445
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1446
  }
1447
  ],
1448
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
@@ -1452,10 +1348,10 @@
1452
  },
1453
  {
1454
  "actual": {
1455
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1456
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1457
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1458
- "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1459
  },
1460
  "expected": {
1461
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1466,9 +1362,9 @@
1466
  "id": "mens-shoes-medium",
1467
  "mismatches": [
1468
  {
1469
- "actual": "nearest_equivalent",
1470
- "expected": "exact",
1471
- "path": "model_output.classification.iab_content.mapping_mode"
1472
  }
1473
  ],
1474
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
@@ -1478,7 +1374,7 @@
1478
  },
1479
  {
1480
  "actual": {
1481
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1482
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1483
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1484
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
@@ -1490,24 +1386,18 @@
1490
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1491
  },
1492
  "id": "mens-shoes-hard",
1493
- "mismatches": [
1494
- {
1495
- "actual": "nearest_equivalent",
1496
- "expected": "exact",
1497
- "path": "model_output.classification.iab_content.mapping_mode"
1498
- }
1499
- ],
1500
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
1501
- "pass": false,
1502
  "status": "must_fix",
1503
  "text": "Need men's footwear that works for workdays and weekend walking"
1504
  },
1505
  {
1506
  "actual": {
1507
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1508
- "model_output.classification.iab_content.tier1.label": "Attractions",
1509
- "model_output.classification.iab_content.tier2.label": "Nightclubs",
1510
- "model_output.classification.iab_content.tier3.label": null
1511
  },
1512
  "expected": {
1513
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1516,39 +1406,18 @@
1516
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1517
  },
1518
  "id": "hotels-easy",
1519
- "mismatches": [
1520
- {
1521
- "actual": "Attractions",
1522
- "expected": "Travel",
1523
- "path": "model_output.classification.iab_content.tier1.label"
1524
- },
1525
- {
1526
- "actual": "nearest_equivalent",
1527
- "expected": "exact",
1528
- "path": "model_output.classification.iab_content.mapping_mode"
1529
- },
1530
- {
1531
- "actual": "Nightclubs",
1532
- "expected": "Travel Type",
1533
- "path": "model_output.classification.iab_content.tier2.label"
1534
- },
1535
- {
1536
- "actual": null,
1537
- "expected": "Hotels and Motels",
1538
- "path": "model_output.classification.iab_content.tier3.label"
1539
- }
1540
- ],
1541
  "notes": "Cross-vertical easy IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1542
- "pass": false,
1543
  "status": "must_fix",
1544
  "text": "Need a hotel in Chicago for two nights"
1545
  },
1546
  {
1547
  "actual": {
1548
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1549
  "model_output.classification.iab_content.tier1.label": "Travel",
1550
  "model_output.classification.iab_content.tier2.label": "Travel Type",
1551
- "model_output.classification.iab_content.tier3.label": null
1552
  },
1553
  "expected": {
1554
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1557,26 +1426,15 @@
1557
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1558
  },
1559
  "id": "hotels-medium",
1560
- "mismatches": [
1561
- {
1562
- "actual": "nearest_equivalent",
1563
- "expected": "exact",
1564
- "path": "model_output.classification.iab_content.mapping_mode"
1565
- },
1566
- {
1567
- "actual": null,
1568
- "expected": "Hotels and Motels",
1569
- "path": "model_output.classification.iab_content.tier3.label"
1570
- }
1571
- ],
1572
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1573
- "pass": false,
1574
  "status": "must_fix",
1575
  "text": "Best hotels near Times Square for a weekend trip"
1576
  },
1577
  {
1578
  "actual": {
1579
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1580
  "model_output.classification.iab_content.tier1.label": "Travel",
1581
  "model_output.classification.iab_content.tier2.label": null,
1582
  "model_output.classification.iab_content.tier3.label": null
@@ -1589,11 +1447,6 @@
1589
  },
1590
  "id": "hotels-hard",
1591
  "mismatches": [
1592
- {
1593
- "actual": "nearest_equivalent",
1594
- "expected": "exact",
1595
- "path": "model_output.classification.iab_content.mapping_mode"
1596
- },
1597
  {
1598
  "actual": null,
1599
  "expected": "Travel Type",
@@ -1612,7 +1465,7 @@
1612
  },
1613
  {
1614
  "actual": {
1615
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1616
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1617
  "model_output.classification.iab_content.tier2.label": "Apartments"
1618
  },
@@ -1623,11 +1476,6 @@
1623
  },
1624
  "id": "real-estate-rentals-easy",
1625
  "mismatches": [
1626
- {
1627
- "actual": "nearest_equivalent",
1628
- "expected": "exact",
1629
- "path": "model_output.classification.iab_content.mapping_mode"
1630
- },
1631
  {
1632
  "actual": "Apartments",
1633
  "expected": "Real Estate Renting and Leasing",
@@ -1641,7 +1489,7 @@
1641
  },
1642
  {
1643
  "actual": {
1644
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1645
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1646
  "model_output.classification.iab_content.tier2.label": "Apartments"
1647
  },
@@ -1652,6 +1500,11 @@
1652
  },
1653
  "id": "real-estate-rentals-medium",
1654
  "mismatches": [
 
 
 
 
 
1655
  {
1656
  "actual": "Apartments",
1657
  "expected": "Real Estate Renting and Leasing",
@@ -1665,9 +1518,9 @@
1665
  },
1666
  {
1667
  "actual": {
1668
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1669
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1670
- "model_output.classification.iab_content.tier2.label": null
1671
  },
1672
  "expected": {
1673
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1675,29 +1528,18 @@
1675
  "model_output.classification.iab_content.tier2.label": "Real Estate Renting and Leasing"
1676
  },
1677
  "id": "real-estate-rentals-hard",
1678
- "mismatches": [
1679
- {
1680
- "actual": "nearest_equivalent",
1681
- "expected": "exact",
1682
- "path": "model_output.classification.iab_content.mapping_mode"
1683
- },
1684
- {
1685
- "actual": null,
1686
- "expected": "Real Estate Renting and Leasing",
1687
- "path": "model_output.classification.iab_content.tier2.label"
1688
- }
1689
- ],
1690
  "notes": "Cross-vertical hard IAB mapping case for Real Estate > Real Estate Renting and Leasing.",
1691
- "pass": false,
1692
  "status": "must_fix",
1693
  "text": "Need rental listings for a short move, not home-buying advice"
1694
  },
1695
  {
1696
  "actual": {
1697
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1698
- "model_output.classification.iab_content.tier1.label": "Healthy Living",
1699
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1700
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1701
  },
1702
  "expected": {
1703
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1708,9 +1550,19 @@
1708
  "id": "running-and-jogging-easy",
1709
  "mismatches": [
1710
  {
1711
- "actual": "nearest_equivalent",
1712
- "expected": "exact",
1713
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1714
  }
1715
  ],
1716
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
@@ -1720,10 +1572,10 @@
1720
  },
1721
  {
1722
  "actual": {
1723
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1724
- "model_output.classification.iab_content.tier1.label": "Healthy Living",
1725
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1726
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1727
  },
1728
  "expected": {
1729
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1734,9 +1586,19 @@
1734
  "id": "running-and-jogging-medium",
1735
  "mismatches": [
1736
  {
1737
- "actual": "nearest_equivalent",
1738
- "expected": "exact",
1739
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1740
  }
1741
  ],
1742
  "notes": "Cross-vertical medium IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
@@ -1746,10 +1608,10 @@
1746
  },
1747
  {
1748
  "actual": {
1749
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1750
- "model_output.classification.iab_content.tier1.label": "Healthy Living",
1751
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1752
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1753
  },
1754
  "expected": {
1755
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1760,9 +1622,19 @@
1760
  "id": "running-and-jogging-hard",
1761
  "mismatches": [
1762
  {
1763
- "actual": "nearest_equivalent",
1764
- "expected": "exact",
1765
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1766
  }
1767
  ],
1768
  "notes": "Cross-vertical hard IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
@@ -1772,9 +1644,9 @@
1772
  },
1773
  {
1774
  "actual": {
1775
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1776
  "model_output.classification.iab_content.tier1.label": "Sports",
1777
- "model_output.classification.iab_content.tier2.label": "Australian Rules Football"
1778
  },
1779
  "expected": {
1780
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1782,28 +1654,17 @@
1782
  "model_output.classification.iab_content.tier2.label": "Soccer"
1783
  },
1784
  "id": "soccer-easy",
1785
- "mismatches": [
1786
- {
1787
- "actual": "nearest_equivalent",
1788
- "expected": "exact",
1789
- "path": "model_output.classification.iab_content.mapping_mode"
1790
- },
1791
- {
1792
- "actual": "Australian Rules Football",
1793
- "expected": "Soccer",
1794
- "path": "model_output.classification.iab_content.tier2.label"
1795
- }
1796
- ],
1797
  "notes": "Cross-vertical easy IAB mapping case for Sports > Soccer.",
1798
- "pass": false,
1799
  "status": "must_fix",
1800
  "text": "How do offside rules work in soccer?"
1801
  },
1802
  {
1803
  "actual": {
1804
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1805
  "model_output.classification.iab_content.tier1.label": "Sports",
1806
- "model_output.classification.iab_content.tier2.label": null
1807
  },
1808
  "expected": {
1809
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1811,28 +1672,17 @@
1811
  "model_output.classification.iab_content.tier2.label": "Soccer"
1812
  },
1813
  "id": "soccer-medium",
1814
- "mismatches": [
1815
- {
1816
- "actual": "nearest_equivalent",
1817
- "expected": "exact",
1818
- "path": "model_output.classification.iab_content.mapping_mode"
1819
- },
1820
- {
1821
- "actual": null,
1822
- "expected": "Soccer",
1823
- "path": "model_output.classification.iab_content.tier2.label"
1824
- }
1825
- ],
1826
  "notes": "Cross-vertical medium IAB mapping case for Sports > Soccer.",
1827
- "pass": false,
1828
  "status": "must_fix",
1829
  "text": "Best soccer drills for beginner players"
1830
  },
1831
  {
1832
  "actual": {
1833
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1834
  "model_output.classification.iab_content.tier1.label": "Sports",
1835
- "model_output.classification.iab_content.tier2.label": "Fantasy Sports"
1836
  },
1837
  "expected": {
1838
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1840,26 +1690,15 @@
1840
  "model_output.classification.iab_content.tier2.label": "Soccer"
1841
  },
1842
  "id": "soccer-hard",
1843
- "mismatches": [
1844
- {
1845
- "actual": "nearest_equivalent",
1846
- "expected": "exact",
1847
- "path": "model_output.classification.iab_content.mapping_mode"
1848
- },
1849
- {
1850
- "actual": "Fantasy Sports",
1851
- "expected": "Soccer",
1852
- "path": "model_output.classification.iab_content.tier2.label"
1853
- }
1854
- ],
1855
  "notes": "Cross-vertical hard IAB mapping case for Sports > Soccer.",
1856
- "pass": false,
1857
  "status": "must_fix",
1858
  "text": "Need help understanding football tactics for the Premier League, not fantasy sports"
1859
  },
1860
  {
1861
  "actual": {
1862
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1863
  "model_output.classification.iab_content.tier1.label": "Genres",
1864
  "model_output.classification.iab_content.tier2.label": "Fantasy"
1865
  },
@@ -1875,11 +1714,6 @@
1875
  "expected": "Books and Literature",
1876
  "path": "model_output.classification.iab_content.tier1.label"
1877
  },
1878
- {
1879
- "actual": "nearest_equivalent",
1880
- "expected": "exact",
1881
- "path": "model_output.classification.iab_content.mapping_mode"
1882
- },
1883
  {
1884
  "actual": "Fantasy",
1885
  "expected": "Fiction",
@@ -1893,9 +1727,9 @@
1893
  },
1894
  {
1895
  "actual": {
1896
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1897
- "model_output.classification.iab_content.tier1.label": "Books and Literature",
1898
- "model_output.classification.iab_content.tier2.label": "Fiction"
1899
  },
1900
  "expected": {
1901
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1903,17 +1737,33 @@
1903
  "model_output.classification.iab_content.tier2.label": "Fiction"
1904
  },
1905
  "id": "fiction-medium",
1906
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1907
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
1908
- "pass": true,
1909
  "status": "must_fix",
1910
  "text": "Best fiction books for a long flight"
1911
  },
1912
  {
1913
  "actual": {
1914
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1915
- "model_output.classification.iab_content.tier1.label": "Books and Literature",
1916
- "model_output.classification.iab_content.tier2.label": "Comics and Graphic Novels"
1917
  },
1918
  "expected": {
1919
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1923,12 +1773,12 @@
1923
  "id": "fiction-hard",
1924
  "mismatches": [
1925
  {
1926
- "actual": "nearest_equivalent",
1927
- "expected": "exact",
1928
- "path": "model_output.classification.iab_content.mapping_mode"
1929
  },
1930
  {
1931
- "actual": "Comics and Graphic Novels",
1932
  "expected": "Fiction",
1933
  "path": "model_output.classification.iab_content.tier2.label"
1934
  }
@@ -1940,7 +1790,7 @@
1940
  },
1941
  {
1942
  "actual": {
1943
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1944
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1945
  "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1946
  },
@@ -1951,11 +1801,6 @@
1951
  },
1952
  "id": "home-improvement-easy",
1953
  "mismatches": [
1954
- {
1955
- "actual": "nearest_equivalent",
1956
- "expected": "exact",
1957
- "path": "model_output.classification.iab_content.mapping_mode"
1958
- },
1959
  {
1960
  "actual": "Remodeling & Construction",
1961
  "expected": "Home Improvement",
@@ -1969,9 +1814,9 @@
1969
  },
1970
  {
1971
  "actual": {
1972
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1973
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1974
- "model_output.classification.iab_content.tier2.label": "Personal Care"
1975
  },
1976
  "expected": {
1977
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1981,17 +1826,7 @@
1981
  "id": "home-improvement-medium",
1982
  "mismatches": [
1983
  {
1984
- "actual": "Style & Fashion",
1985
- "expected": "Home & Garden",
1986
- "path": "model_output.classification.iab_content.tier1.label"
1987
- },
1988
- {
1989
- "actual": "nearest_equivalent",
1990
- "expected": "exact",
1991
- "path": "model_output.classification.iab_content.mapping_mode"
1992
- },
1993
- {
1994
- "actual": "Personal Care",
1995
  "expected": "Home Improvement",
1996
  "path": "model_output.classification.iab_content.tier2.label"
1997
  }
@@ -2003,9 +1838,9 @@
2003
  },
2004
  {
2005
  "actual": {
2006
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2007
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2008
- "model_output.classification.iab_content.tier2.label": "Interior Decorating"
2009
  },
2010
  "expected": {
2011
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2015,12 +1850,7 @@
2015
  "id": "home-improvement-hard",
2016
  "mismatches": [
2017
  {
2018
- "actual": "nearest_equivalent",
2019
- "expected": "exact",
2020
- "path": "model_output.classification.iab_content.mapping_mode"
2021
- },
2022
- {
2023
- "actual": "Interior Decorating",
2024
  "expected": "Home Improvement",
2025
  "path": "model_output.classification.iab_content.tier2.label"
2026
  }
@@ -2033,8 +1863,8 @@
2033
  {
2034
  "actual": {
2035
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2036
- "model_output.classification.iab_content.tier1.label": "Education",
2037
- "model_output.classification.iab_content.tier2.label": "Online Education"
2038
  },
2039
  "expected": {
2040
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2043,10 +1873,20 @@
2043
  },
2044
  "id": "online-education-easy",
2045
  "mismatches": [
 
 
 
 
 
2046
  {
2047
  "actual": "nearest_equivalent",
2048
  "expected": "exact",
2049
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
2050
  }
2051
  ],
2052
  "notes": "Cross-vertical easy IAB mapping case for Education > Online Education.",
@@ -2056,7 +1896,7 @@
2056
  },
2057
  {
2058
  "actual": {
2059
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2060
  "model_output.classification.iab_content.tier1.label": "Careers",
2061
  "model_output.classification.iab_content.tier2.label": "Remote Working"
2062
  },
@@ -2072,11 +1912,6 @@
2072
  "expected": "Education",
2073
  "path": "model_output.classification.iab_content.tier1.label"
2074
  },
2075
- {
2076
- "actual": "nearest_equivalent",
2077
- "expected": "exact",
2078
- "path": "model_output.classification.iab_content.mapping_mode"
2079
- },
2080
  {
2081
  "actual": "Remote Working",
2082
  "expected": "Online Education",
@@ -2091,8 +1926,8 @@
2091
  {
2092
  "actual": {
2093
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2094
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
2095
- "model_output.classification.iab_content.tier2.label": "Computing"
2096
  },
2097
  "expected": {
2098
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2102,7 +1937,7 @@
2102
  "id": "online-education-hard",
2103
  "mismatches": [
2104
  {
2105
- "actual": "Technology & Computing",
2106
  "expected": "Education",
2107
  "path": "model_output.classification.iab_content.tier1.label"
2108
  },
@@ -2112,7 +1947,7 @@
2112
  "path": "model_output.classification.iab_content.mapping_mode"
2113
  },
2114
  {
2115
- "actual": "Computing",
2116
  "expected": "Online Education",
2117
  "path": "model_output.classification.iab_content.tier2.label"
2118
  }
@@ -2124,7 +1959,7 @@
2124
  },
2125
  {
2126
  "actual": {
2127
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2128
  "model_output.classification.iab_content.tier1.label": "Education",
2129
  "model_output.classification.iab_content.tier2.label": "College Education",
2130
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
@@ -2136,15 +1971,9 @@
2136
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2137
  },
2138
  "id": "postgraduate-education-easy",
2139
- "mismatches": [
2140
- {
2141
- "actual": "nearest_equivalent",
2142
- "expected": "exact",
2143
- "path": "model_output.classification.iab_content.mapping_mode"
2144
- }
2145
- ],
2146
  "notes": "Cross-vertical easy IAB mapping case for Education > College Education > Postgraduate Education.",
2147
- "pass": false,
2148
  "status": "must_fix",
2149
  "text": "best universities to study masters"
2150
  },
@@ -2152,8 +1981,8 @@
2152
  "actual": {
2153
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2154
  "model_output.classification.iab_content.tier1.label": "Education",
2155
- "model_output.classification.iab_content.tier2.label": "College Education",
2156
- "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2157
  },
2158
  "expected": {
2159
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2167,6 +1996,16 @@
2167
  "actual": "nearest_equivalent",
2168
  "expected": "exact",
2169
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
2170
  }
2171
  ],
2172
  "notes": "Cross-vertical medium IAB mapping case for Education > College Education > Postgraduate Education.",
@@ -2176,7 +2015,7 @@
2176
  },
2177
  {
2178
  "actual": {
2179
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2180
  "model_output.classification.iab_content.tier1.label": "Education",
2181
  "model_output.classification.iab_content.tier2.label": "College Education",
2182
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
@@ -2188,15 +2027,9 @@
2188
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2189
  },
2190
  "id": "postgraduate-education-hard",
2191
- "mismatches": [
2192
- {
2193
- "actual": "nearest_equivalent",
2194
- "expected": "exact",
2195
- "path": "model_output.classification.iab_content.mapping_mode"
2196
- }
2197
- ],
2198
  "notes": "Cross-vertical hard IAB mapping case for Education > College Education > Postgraduate Education.",
2199
- "pass": false,
2200
  "status": "must_fix",
2201
  "text": "need postgraduate options for a master's degree, not short online courses"
2202
  },
@@ -2246,7 +2079,7 @@
2246
  },
2247
  {
2248
  "actual": {
2249
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2250
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2251
  },
2252
  "expected": {
@@ -2254,21 +2087,15 @@
2254
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2255
  },
2256
  "id": "medical-health-hard",
2257
- "mismatches": [
2258
- {
2259
- "actual": "nearest_equivalent",
2260
- "expected": "exact",
2261
- "path": "model_output.classification.iab_content.mapping_mode"
2262
- }
2263
- ],
2264
  "notes": "Cross-vertical hard IAB mapping case for Medical Health.",
2265
- "pass": false,
2266
  "status": "must_fix",
2267
  "text": "need medical advice about symptoms, not wellness or fitness tips"
2268
  },
2269
  {
2270
  "actual": {
2271
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2272
  "model_output.classification.iab_content.tier1.label": "Careers",
2273
  "model_output.classification.iab_content.tier2.label": "Remote Working"
2274
  },
@@ -2279,11 +2106,6 @@
2279
  },
2280
  "id": "careers-job-search-easy",
2281
  "mismatches": [
2282
- {
2283
- "actual": "nearest_equivalent",
2284
- "expected": "exact",
2285
- "path": "model_output.classification.iab_content.mapping_mode"
2286
- },
2287
  {
2288
  "actual": "Remote Working",
2289
  "expected": "Job Search",
@@ -2297,7 +2119,7 @@
2297
  },
2298
  {
2299
  "actual": {
2300
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2301
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
2302
  "model_output.classification.iab_content.tier2.label": "Industries"
2303
  },
@@ -2313,11 +2135,6 @@
2313
  "expected": "Careers",
2314
  "path": "model_output.classification.iab_content.tier1.label"
2315
  },
2316
- {
2317
- "actual": "nearest_equivalent",
2318
- "expected": "exact",
2319
- "path": "model_output.classification.iab_content.mapping_mode"
2320
- },
2321
  {
2322
  "actual": "Industries",
2323
  "expected": "Job Search",
@@ -2332,8 +2149,8 @@
2332
  {
2333
  "actual": {
2334
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2335
- "model_output.classification.iab_content.tier1.label": "Business and Finance",
2336
- "model_output.classification.iab_content.tier2.label": "Industries"
2337
  },
2338
  "expected": {
2339
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2343,7 +2160,7 @@
2343
  "id": "careers-job-search-hard",
2344
  "mismatches": [
2345
  {
2346
- "actual": "Business and Finance",
2347
  "expected": "Careers",
2348
  "path": "model_output.classification.iab_content.tier1.label"
2349
  },
@@ -2353,7 +2170,7 @@
2353
  "path": "model_output.classification.iab_content.mapping_mode"
2354
  },
2355
  {
2356
- "actual": "Industries",
2357
  "expected": "Job Search",
2358
  "path": "model_output.classification.iab_content.tier2.label"
2359
  }
@@ -2366,7 +2183,7 @@
2366
  {
2367
  "actual": {
2368
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2369
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
2370
  "model_output.classification.iab_content.tier2.label": null
2371
  },
2372
  "expected": {
@@ -2376,6 +2193,11 @@
2376
  },
2377
  "id": "personal-finance-easy",
2378
  "mismatches": [
 
 
 
 
 
2379
  {
2380
  "actual": "nearest_equivalent",
2381
  "expected": "exact",
@@ -2394,7 +2216,7 @@
2394
  },
2395
  {
2396
  "actual": {
2397
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2398
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2399
  "model_output.classification.iab_content.tier2.label": null
2400
  },
@@ -2405,11 +2227,6 @@
2405
  },
2406
  "id": "personal-finance-medium",
2407
  "mismatches": [
2408
- {
2409
- "actual": "nearest_equivalent",
2410
- "expected": "exact",
2411
- "path": "model_output.classification.iab_content.mapping_mode"
2412
- },
2413
  {
2414
  "actual": null,
2415
  "expected": "Financial Planning",
@@ -2423,9 +2240,9 @@
2423
  },
2424
  {
2425
  "actual": {
2426
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2427
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2428
- "model_output.classification.iab_content.tier2.label": "Retirement Planning"
2429
  },
2430
  "expected": {
2431
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2435,12 +2252,7 @@
2435
  "id": "personal-finance-hard",
2436
  "mismatches": [
2437
  {
2438
- "actual": "nearest_equivalent",
2439
- "expected": "exact",
2440
- "path": "model_output.classification.iab_content.mapping_mode"
2441
- },
2442
- {
2443
- "actual": "Retirement Planning",
2444
  "expected": "Financial Planning",
2445
  "path": "model_output.classification.iab_content.tier2.label"
2446
  }
@@ -2452,7 +2264,7 @@
2452
  },
2453
  {
2454
  "actual": {
2455
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2456
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2457
  "model_output.classification.iab_content.tier2.label": "Parenting"
2458
  },
@@ -2462,15 +2274,9 @@
2462
  "model_output.classification.iab_content.tier2.label": "Parenting"
2463
  },
2464
  "id": "parenting-easy",
2465
- "mismatches": [
2466
- {
2467
- "actual": "nearest_equivalent",
2468
- "expected": "exact",
2469
- "path": "model_output.classification.iab_content.mapping_mode"
2470
- }
2471
- ],
2472
  "notes": "Cross-vertical easy IAB mapping case for Family and Relationships > Parenting.",
2473
- "pass": false,
2474
  "status": "must_fix",
2475
  "text": "tips for parenting a toddler"
2476
  },
@@ -2478,7 +2284,7 @@
2478
  "actual": {
2479
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2480
  "model_output.classification.iab_content.tier1.label": "Education",
2481
- "model_output.classification.iab_content.tier2.label": "Online Education"
2482
  },
2483
  "expected": {
2484
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2498,7 +2304,7 @@
2498
  "path": "model_output.classification.iab_content.mapping_mode"
2499
  },
2500
  {
2501
- "actual": "Online Education",
2502
  "expected": "Parenting",
2503
  "path": "model_output.classification.iab_content.tier2.label"
2504
  }
@@ -2510,7 +2316,7 @@
2510
  },
2511
  {
2512
  "actual": {
2513
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2514
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2515
  "model_output.classification.iab_content.tier2.label": "Parenting"
2516
  },
@@ -2520,21 +2326,15 @@
2520
  "model_output.classification.iab_content.tier2.label": "Parenting"
2521
  },
2522
  "id": "parenting-hard",
2523
- "mismatches": [
2524
- {
2525
- "actual": "nearest_equivalent",
2526
- "expected": "exact",
2527
- "path": "model_output.classification.iab_content.mapping_mode"
2528
- }
2529
- ],
2530
  "notes": "Cross-vertical hard IAB mapping case for Family and Relationships > Parenting.",
2531
- "pass": false,
2532
  "status": "must_fix",
2533
  "text": "need parenting advice for a child starting preschool"
2534
  },
2535
  {
2536
  "actual": {
2537
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2538
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2539
  "model_output.classification.iab_content.tier2.label": "Gardening"
2540
  },
@@ -2544,22 +2344,16 @@
2544
  "model_output.classification.iab_content.tier2.label": "Gardening"
2545
  },
2546
  "id": "gardening-easy",
2547
- "mismatches": [
2548
- {
2549
- "actual": "nearest_equivalent",
2550
- "expected": "exact",
2551
- "path": "model_output.classification.iab_content.mapping_mode"
2552
- }
2553
- ],
2554
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Gardening.",
2555
- "pass": false,
2556
  "status": "must_fix",
2557
  "text": "best plants for a small balcony garden"
2558
  },
2559
  {
2560
  "actual": {
2561
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2562
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
2563
  "model_output.classification.iab_content.tier2.label": null
2564
  },
2565
  "expected": {
@@ -2570,7 +2364,7 @@
2570
  "id": "gardening-medium",
2571
  "mismatches": [
2572
  {
2573
- "actual": "Personal Finance",
2574
  "expected": "Home & Garden",
2575
  "path": "model_output.classification.iab_content.tier1.label"
2576
  },
@@ -2592,9 +2386,9 @@
2592
  },
2593
  {
2594
  "actual": {
2595
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2596
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2597
- "model_output.classification.iab_content.tier2.label": null
2598
  },
2599
  "expected": {
2600
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2602,28 +2396,17 @@
2602
  "model_output.classification.iab_content.tier2.label": "Gardening"
2603
  },
2604
  "id": "gardening-hard",
2605
- "mismatches": [
2606
- {
2607
- "actual": "nearest_equivalent",
2608
- "expected": "exact",
2609
- "path": "model_output.classification.iab_content.mapping_mode"
2610
- },
2611
- {
2612
- "actual": null,
2613
- "expected": "Gardening",
2614
- "path": "model_output.classification.iab_content.tier2.label"
2615
- }
2616
- ],
2617
  "notes": "Cross-vertical hard IAB mapping case for Home & Garden > Gardening.",
2618
- "pass": false,
2619
  "status": "must_fix",
2620
  "text": "need gardening advice for a shady backyard, not interior decor ideas"
2621
  },
2622
  {
2623
  "actual": {
2624
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2625
- "model_output.classification.iab_content.tier1.label": "Genres",
2626
- "model_output.classification.iab_content.tier2.label": null
2627
  },
2628
  "expected": {
2629
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2631,31 +2414,15 @@
2631
  "model_output.classification.iab_content.tier2.label": "Movies"
2632
  },
2633
  "id": "movies-easy",
2634
- "mismatches": [
2635
- {
2636
- "actual": "Genres",
2637
- "expected": "Entertainment",
2638
- "path": "model_output.classification.iab_content.tier1.label"
2639
- },
2640
- {
2641
- "actual": "nearest_equivalent",
2642
- "expected": "exact",
2643
- "path": "model_output.classification.iab_content.mapping_mode"
2644
- },
2645
- {
2646
- "actual": null,
2647
- "expected": "Movies",
2648
- "path": "model_output.classification.iab_content.tier2.label"
2649
- }
2650
- ],
2651
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
2652
- "pass": false,
2653
  "status": "must_fix",
2654
  "text": "What movie should we watch tonight?"
2655
  },
2656
  {
2657
  "actual": {
2658
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2659
  "model_output.classification.iab_content.tier1.label": "Genres",
2660
  "model_output.classification.iab_content.tier2.label": "Horror"
2661
  },
@@ -2671,11 +2438,6 @@
2671
  "expected": "Entertainment",
2672
  "path": "model_output.classification.iab_content.tier1.label"
2673
  },
2674
- {
2675
- "actual": "nearest_equivalent",
2676
- "expected": "exact",
2677
- "path": "model_output.classification.iab_content.mapping_mode"
2678
- },
2679
  {
2680
  "actual": "Horror",
2681
  "expected": "Movies",
@@ -2689,9 +2451,9 @@
2689
  },
2690
  {
2691
  "actual": {
2692
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2693
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2694
- "model_output.classification.iab_content.tier2.label": "Music"
2695
  },
2696
  "expected": {
2697
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2699,20 +2461,9 @@
2699
  "model_output.classification.iab_content.tier2.label": "Movies"
2700
  },
2701
  "id": "movies-hard",
2702
- "mismatches": [
2703
- {
2704
- "actual": "nearest_equivalent",
2705
- "expected": "exact",
2706
- "path": "model_output.classification.iab_content.mapping_mode"
2707
- },
2708
- {
2709
- "actual": "Music",
2710
- "expected": "Movies",
2711
- "path": "model_output.classification.iab_content.tier2.label"
2712
- }
2713
- ],
2714
  "notes": "Cross-vertical hard IAB mapping case for Entertainment > Movies.",
2715
- "pass": false,
2716
  "status": "must_fix",
2717
  "text": "Looking for film recommendations, not TV shows or music"
2718
  }
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 57,
5
+ "passed": 33,
6
  "total": 90
7
  }
8
  },
9
+ "cases_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
+ "failed": 57,
12
+ "passed": 33,
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "exact",
17
+ "model_output.classification.iab_content.tier1.label": "Travel",
18
+ "model_output.classification.iab_content.tier2.label": "Travel Accessories"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
+ "actual": "Travel",
29
+ "expected": "Automotive",
30
+ "path": "model_output.classification.iab_content.tier1.label"
31
+ },
32
+ {
33
+ "actual": "exact",
34
+ "expected": "nearest_equivalent",
35
+ "path": "model_output.classification.iab_content.mapping_mode"
36
+ },
37
+ {
38
+ "actual": "Travel Accessories",
39
  "expected": "Auto Buying and Selling",
40
  "path": "model_output.classification.iab_content.tier2.label"
41
  }
 
47
  },
48
  {
49
  "actual": {
50
+ "model_output.classification.iab_content.mapping_mode": "exact",
51
  "model_output.classification.iab_content.tier1.label": "Automotive",
52
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
53
  },
 
58
  },
59
  "id": "auto-buying-medium",
60
  "mismatches": [
61
+ {
62
+ "actual": "exact",
63
+ "expected": "nearest_equivalent",
64
+ "path": "model_output.classification.iab_content.mapping_mode"
65
+ },
66
  {
67
  "actual": "Auto Body Styles",
68
  "expected": "Auto Buying and Selling",
 
76
  },
77
  {
78
  "actual": {
79
+ "model_output.classification.iab_content.mapping_mode": "exact",
80
  "model_output.classification.iab_content.tier1.label": "Automotive",
81
+ "model_output.classification.iab_content.tier2.label": "Auto Shows"
82
  },
83
  "expected": {
84
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
88
  "id": "auto-buying-hard",
89
  "mismatches": [
90
  {
91
+ "actual": "exact",
92
+ "expected": "nearest_equivalent",
93
+ "path": "model_output.classification.iab_content.mapping_mode"
94
+ },
95
+ {
96
+ "actual": "Auto Shows",
97
  "expected": "Auto Buying and Selling",
98
  "path": "model_output.classification.iab_content.tier2.label"
99
  }
 
105
  },
106
  {
107
  "actual": {
108
+ "model_output.classification.iab_content.mapping_mode": "exact",
109
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
110
+ "model_output.classification.iab_content.tier2.label": null,
111
+ "model_output.classification.iab_content.tier3.label": null
112
  },
113
  "expected": {
114
  "model_output.classification.iab_content.mapping_mode": "exact",
 
124
  "path": "model_output.classification.iab_content.tier1.label"
125
  },
126
  {
127
+ "actual": null,
 
 
 
 
 
128
  "expected": "Business",
129
  "path": "model_output.classification.iab_content.tier2.label"
130
  },
131
  {
132
+ "actual": null,
133
  "expected": "Sales",
134
  "path": "model_output.classification.iab_content.tier3.label"
135
  }
 
142
  {
143
  "actual": {
144
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
145
+ "model_output.classification.iab_content.tier1.label": "Careers",
146
+ "model_output.classification.iab_content.tier2.label": null,
147
+ "model_output.classification.iab_content.tier3.label": null
148
  },
149
  "expected": {
150
  "model_output.classification.iab_content.mapping_mode": "exact",
 
155
  "id": "sales-crm-medium",
156
  "mismatches": [
157
  {
158
+ "actual": "Careers",
159
  "expected": "Business and Finance",
160
  "path": "model_output.classification.iab_content.tier1.label"
161
  },
 
165
  "path": "model_output.classification.iab_content.mapping_mode"
166
  },
167
  {
168
+ "actual": null,
169
  "expected": "Business",
170
  "path": "model_output.classification.iab_content.tier2.label"
171
  },
172
  {
173
+ "actual": null,
174
  "expected": "Sales",
175
  "path": "model_output.classification.iab_content.tier3.label"
176
  }
 
184
  "actual": {
185
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
186
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
187
+ "model_output.classification.iab_content.tier2.label": null,
188
  "model_output.classification.iab_content.tier3.label": null
189
  },
190
  "expected": {
 
200
  "expected": "exact",
201
  "path": "model_output.classification.iab_content.mapping_mode"
202
  },
203
+ {
204
+ "actual": null,
205
+ "expected": "Business",
206
+ "path": "model_output.classification.iab_content.tier2.label"
207
+ },
208
  {
209
  "actual": null,
210
  "expected": "Sales",
 
219
  {
220
  "actual": {
221
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
222
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
223
+ "model_output.classification.iab_content.tier2.label": null,
224
  "model_output.classification.iab_content.tier3.label": null
225
  },
226
  "expected": {
 
232
  "id": "marketing-tools-easy",
233
  "mismatches": [
234
  {
235
+ "actual": "Technology & Computing",
236
  "expected": "Business and Finance",
237
  "path": "model_output.classification.iab_content.tier1.label"
238
  },
 
242
  "path": "model_output.classification.iab_content.mapping_mode"
243
  },
244
  {
245
+ "actual": null,
246
  "expected": "Business",
247
  "path": "model_output.classification.iab_content.tier2.label"
248
  },
 
260
  {
261
  "actual": {
262
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
263
+ "model_output.classification.iab_content.tier1.label": "Business and Finance",
264
+ "model_output.classification.iab_content.tier2.label": null,
265
  "model_output.classification.iab_content.tier3.label": null
266
  },
267
  "expected": {
 
272
  },
273
  "id": "marketing-tools-medium",
274
  "mismatches": [
 
 
 
 
 
275
  {
276
  "actual": "nearest_equivalent",
277
  "expected": "exact",
278
  "path": "model_output.classification.iab_content.mapping_mode"
279
  },
280
  {
281
+ "actual": null,
282
  "expected": "Business",
283
  "path": "model_output.classification.iab_content.tier2.label"
284
  },
 
296
  {
297
  "actual": {
298
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
299
+ "model_output.classification.iab_content.tier1.label": "Business and Finance",
300
+ "model_output.classification.iab_content.tier2.label": null,
301
+ "model_output.classification.iab_content.tier3.label": null
302
  },
303
  "expected": {
304
  "model_output.classification.iab_content.mapping_mode": "exact",
 
308
  },
309
  "id": "marketing-tools-hard",
310
  "mismatches": [
 
 
 
 
 
311
  {
312
  "actual": "nearest_equivalent",
313
  "expected": "exact",
314
  "path": "model_output.classification.iab_content.mapping_mode"
315
  },
316
  {
317
+ "actual": null,
318
  "expected": "Business",
319
  "path": "model_output.classification.iab_content.tier2.label"
320
  },
321
  {
322
+ "actual": null,
323
  "expected": "Marketing and Advertising",
324
  "path": "model_output.classification.iab_content.tier3.label"
325
  }
 
332
  {
333
  "actual": {
334
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
335
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
336
+ "model_output.classification.iab_content.tier2.label": "Computing",
337
  "model_output.classification.iab_content.tier3.label": null
338
  },
339
  "expected": {
 
345
  "id": "business-it-easy",
346
  "mismatches": [
347
  {
348
+ "actual": "Technology & Computing",
349
  "expected": "Business and Finance",
350
  "path": "model_output.classification.iab_content.tier1.label"
351
  },
 
355
  "path": "model_output.classification.iab_content.mapping_mode"
356
  },
357
  {
358
+ "actual": "Computing",
359
  "expected": "Business",
360
  "path": "model_output.classification.iab_content.tier2.label"
361
  },
 
374
  "actual": {
375
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
376
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
377
+ "model_output.classification.iab_content.tier2.label": null,
378
  "model_output.classification.iab_content.tier3.label": null
379
  },
380
  "expected": {
 
390
  "expected": "exact",
391
  "path": "model_output.classification.iab_content.mapping_mode"
392
  },
393
+ {
394
+ "actual": null,
395
+ "expected": "Business",
396
+ "path": "model_output.classification.iab_content.tier2.label"
397
+ },
398
  {
399
  "actual": null,
400
  "expected": "Business I.T.",
 
411
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
412
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
413
  "model_output.classification.iab_content.tier2.label": "Computing",
414
+ "model_output.classification.iab_content.tier3.label": null
415
  },
416
  "expected": {
417
  "model_output.classification.iab_content.mapping_mode": "exact",
 
437
  "path": "model_output.classification.iab_content.tier2.label"
438
  },
439
  {
440
+ "actual": null,
441
  "expected": "Business I.T.",
442
  "path": "model_output.classification.iab_content.tier3.label"
443
  }
 
449
  },
450
  {
451
  "actual": {
452
+ "model_output.classification.iab_content.mapping_mode": "exact",
453
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
454
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
455
  },
456
  "expected": {
457
  "model_output.classification.iab_content.mapping_mode": "exact",
 
459
  "model_output.classification.iab_content.tier2.label": "Dining Out"
460
  },
461
  "id": "dining-out-easy",
462
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Dining Out.",
464
+ "pass": true,
465
  "status": "must_fix",
466
  "text": "Book a table for six tonight"
467
  },
468
  {
469
  "actual": {
470
+ "model_output.classification.iab_content.mapping_mode": "exact",
471
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
472
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
473
  },
474
  "expected": {
475
  "model_output.classification.iab_content.mapping_mode": "exact",
 
477
  "model_output.classification.iab_content.tier2.label": "Dining Out"
478
  },
479
  "id": "dining-out-medium",
480
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Dining Out.",
482
+ "pass": true,
483
  "status": "must_fix",
484
  "text": "Good restaurants for a client dinner downtown"
485
  },
486
  {
487
  "actual": {
488
+ "model_output.classification.iab_content.mapping_mode": "exact",
489
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
490
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
491
  },
492
  "expected": {
493
  "model_output.classification.iab_content.mapping_mode": "exact",
 
495
  "model_output.classification.iab_content.tier2.label": "Dining Out"
496
  },
497
  "id": "dining-out-hard",
498
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
499
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
500
+ "pass": true,
501
  "status": "must_fix",
502
  "text": "Need a place to eat tonight where I can make a reservation online"
503
  },
504
  {
505
  "actual": {
506
+ "model_output.classification.iab_content.mapping_mode": "exact",
507
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
508
+ "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
509
  },
510
  "expected": {
511
  "model_output.classification.iab_content.mapping_mode": "exact",
 
513
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
514
  },
515
  "id": "alcoholic-beverages-easy",
516
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Alcoholic Beverages.",
518
+ "pass": true,
519
  "status": "must_fix",
520
  "text": "Which whiskey cocktail should I order?"
521
  },
522
  {
523
  "actual": {
524
+ "model_output.classification.iab_content.mapping_mode": "exact",
525
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
526
+ "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
527
  },
528
  "expected": {
529
  "model_output.classification.iab_content.mapping_mode": "exact",
 
531
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
532
  },
533
  "id": "alcoholic-beverages-medium",
534
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
535
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Alcoholic Beverages.",
536
+ "pass": true,
537
  "status": "must_fix",
538
  "text": "Best vodka drinks for beginners"
539
  },
540
  {
541
  "actual": {
542
+ "model_output.classification.iab_content.mapping_mode": "exact",
543
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
544
+ "model_output.classification.iab_content.tier2.label": "Non-Alcoholic Beverages"
545
  },
546
  "expected": {
547
  "model_output.classification.iab_content.mapping_mode": "exact",
 
551
  "id": "alcoholic-beverages-hard",
552
  "mismatches": [
553
  {
554
+ "actual": "Non-Alcoholic Beverages",
 
 
 
 
 
555
  "expected": "Alcoholic Beverages",
556
  "path": "model_output.classification.iab_content.tier2.label"
557
  }
 
564
  {
565
  "actual": {
566
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
567
+ "model_output.classification.iab_content.tier1.label": "Careers",
568
  "model_output.classification.iab_content.tier2.label": null
569
  },
570
  "expected": {
 
575
  "id": "artificial-intelligence-easy",
576
  "mismatches": [
577
  {
578
+ "actual": "Careers",
579
  "expected": "Technology & Computing",
580
  "path": "model_output.classification.iab_content.tier1.label"
581
  },
 
597
  },
598
  {
599
  "actual": {
600
+ "model_output.classification.iab_content.mapping_mode": "exact",
601
+ "model_output.classification.iab_content.tier1.label": "Education",
602
+ "model_output.classification.iab_content.tier2.label": "Language Learning"
603
  },
604
  "expected": {
605
  "model_output.classification.iab_content.mapping_mode": "exact",
 
609
  "id": "artificial-intelligence-medium",
610
  "mismatches": [
611
  {
612
+ "actual": "Education",
613
  "expected": "Technology & Computing",
614
  "path": "model_output.classification.iab_content.tier1.label"
615
  },
616
  {
617
+ "actual": "Language Learning",
 
 
 
 
 
618
  "expected": "Artificial Intelligence",
619
  "path": "model_output.classification.iab_content.tier2.label"
620
  }
 
626
  },
627
  {
628
  "actual": {
629
+ "model_output.classification.iab_content.mapping_mode": "exact",
630
  "model_output.classification.iab_content.tier1.label": "Education",
631
  "model_output.classification.iab_content.tier2.label": "Language Learning"
632
  },
 
642
  "expected": "Technology & Computing",
643
  "path": "model_output.classification.iab_content.tier1.label"
644
  },
 
 
 
 
 
645
  {
646
  "actual": "Language Learning",
647
  "expected": "Artificial Intelligence",
 
657
  "actual": {
658
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
659
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
660
+ "model_output.classification.iab_content.tier2.label": null,
661
+ "model_output.classification.iab_content.tier3.label": null
662
  },
663
  "expected": {
664
  "model_output.classification.iab_content.mapping_mode": "exact",
 
672
  "actual": "nearest_equivalent",
673
  "expected": "exact",
674
  "path": "model_output.classification.iab_content.mapping_mode"
675
+ },
676
+ {
677
+ "actual": null,
678
+ "expected": "Computing",
679
+ "path": "model_output.classification.iab_content.tier2.label"
680
+ },
681
+ {
682
+ "actual": null,
683
+ "expected": "Software and Applications",
684
+ "path": "model_output.classification.iab_content.tier3.label"
685
  }
686
  ],
687
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
 
693
  "actual": {
694
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
695
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
696
+ "model_output.classification.iab_content.tier2.label": null,
697
+ "model_output.classification.iab_content.tier3.label": null
698
  },
699
  "expected": {
700
  "model_output.classification.iab_content.mapping_mode": "exact",
 
708
  "actual": "nearest_equivalent",
709
  "expected": "exact",
710
  "path": "model_output.classification.iab_content.mapping_mode"
711
+ },
712
+ {
713
+ "actual": null,
714
+ "expected": "Computing",
715
+ "path": "model_output.classification.iab_content.tier2.label"
716
+ },
717
+ {
718
+ "actual": null,
719
+ "expected": "Software and Applications",
720
+ "path": "model_output.classification.iab_content.tier3.label"
721
  }
722
  ],
723
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
 
729
  "actual": {
730
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
731
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
732
+ "model_output.classification.iab_content.tier2.label": null,
733
+ "model_output.classification.iab_content.tier3.label": null
734
  },
735
  "expected": {
736
  "model_output.classification.iab_content.mapping_mode": "exact",
 
744
  "actual": "nearest_equivalent",
745
  "expected": "exact",
746
  "path": "model_output.classification.iab_content.mapping_mode"
747
+ },
748
+ {
749
+ "actual": null,
750
+ "expected": "Computing",
751
+ "path": "model_output.classification.iab_content.tier2.label"
752
+ },
753
+ {
754
+ "actual": null,
755
+ "expected": "Software and Applications",
756
+ "path": "model_output.classification.iab_content.tier3.label"
757
  }
758
  ],
759
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
 
763
  },
764
  {
765
  "actual": {
766
+ "model_output.classification.iab_content.mapping_mode": "exact",
767
  "model_output.classification.iab_content.tier1.label": "Careers",
768
  "model_output.classification.iab_content.tier2.label": "Remote Working",
769
  "model_output.classification.iab_content.tier3.label": null,
 
783
  "expected": "Technology & Computing",
784
  "path": "model_output.classification.iab_content.tier1.label"
785
  },
 
 
 
 
 
786
  {
787
  "actual": "Remote Working",
788
  "expected": "Computing",
 
809
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
810
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
811
  "model_output.classification.iab_content.tier2.label": "Computing",
812
+ "model_output.classification.iab_content.tier3.label": null,
813
  "model_output.classification.iab_content.tier4.label": null
814
  },
815
  "expected": {
 
827
  "path": "model_output.classification.iab_content.mapping_mode"
828
  },
829
  {
830
+ "actual": null,
831
  "expected": "Software and Applications",
832
  "path": "model_output.classification.iab_content.tier3.label"
833
  },
 
846
  "actual": {
847
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
848
  "model_output.classification.iab_content.tier1.label": "Careers",
849
+ "model_output.classification.iab_content.tier2.label": null,
850
  "model_output.classification.iab_content.tier3.label": null,
851
  "model_output.classification.iab_content.tier4.label": null
852
  },
 
870
  "path": "model_output.classification.iab_content.mapping_mode"
871
  },
872
  {
873
+ "actual": null,
874
  "expected": "Computing",
875
  "path": "model_output.classification.iab_content.tier2.label"
876
  },
 
930
  },
931
  {
932
  "actual": {
933
+ "model_output.classification.iab_content.mapping_mode": "exact",
934
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
935
  "model_output.classification.iab_content.tier2.label": "Computing",
936
  "model_output.classification.iab_content.tier3.label": "Internet",
937
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
938
  },
939
  "expected": {
940
  "model_output.classification.iab_content.mapping_mode": "exact",
 
944
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
945
  },
946
  "id": "web-hosting-medium",
947
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
948
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
949
+ "pass": true,
950
  "status": "must_fix",
951
  "text": "Best hosting platform for a startup website"
952
  },
953
  {
954
  "actual": {
955
+ "model_output.classification.iab_content.mapping_mode": "exact",
956
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
957
  "model_output.classification.iab_content.tier2.label": "Computing",
958
  "model_output.classification.iab_content.tier3.label": "Internet",
959
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
960
  },
961
  "expected": {
962
  "model_output.classification.iab_content.mapping_mode": "exact",
 
966
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
967
  },
968
  "id": "web-hosting-hard",
969
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
970
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
971
+ "pass": true,
972
  "status": "must_fix",
973
  "text": "Need a managed hosting provider to deploy and run our marketing site"
974
  },
975
  {
976
  "actual": {
977
+ "model_output.classification.iab_content.mapping_mode": "exact",
978
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
979
  "model_output.classification.iab_content.tier2.label": "Computing",
980
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
986
  "model_output.classification.iab_content.tier3.label": "Laptops"
987
  },
988
  "id": "laptops-easy",
989
+ "mismatches": [],
 
 
 
 
 
 
990
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
991
+ "pass": true,
992
  "status": "must_fix",
993
  "text": "Which laptop should I buy for college?"
994
  },
995
  {
996
  "actual": {
997
+ "model_output.classification.iab_content.mapping_mode": "exact",
998
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
999
  "model_output.classification.iab_content.tier2.label": "Computing",
1000
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
1006
  "model_output.classification.iab_content.tier3.label": "Laptops"
1007
  },
1008
  "id": "laptops-medium",
1009
+ "mismatches": [],
 
 
 
 
 
 
1010
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Laptops.",
1011
+ "pass": true,
1012
  "status": "must_fix",
1013
  "text": "Best laptop for work and study under 1200"
1014
  },
1015
  {
1016
  "actual": {
1017
+ "model_output.classification.iab_content.mapping_mode": "exact",
1018
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1019
  "model_output.classification.iab_content.tier2.label": "Computing",
1020
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
1026
  "model_output.classification.iab_content.tier3.label": "Laptops"
1027
  },
1028
  "id": "laptops-hard",
1029
+ "mismatches": [],
 
 
 
 
 
 
1030
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
1031
+ "pass": true,
1032
  "status": "must_fix",
1033
  "text": "Need a portable computer with good battery life for everyday work"
1034
  },
 
1065
  },
1066
  {
1067
  "actual": {
1068
+ "model_output.classification.iab_content.mapping_mode": "exact",
1069
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1070
  "model_output.classification.iab_content.tier2.label": "Computing",
1071
+ "model_output.classification.iab_content.tier3.label": "Desktops"
1072
  },
1073
  "expected": {
1074
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1077
  "model_output.classification.iab_content.tier3.label": "Desktops"
1078
  },
1079
  "id": "desktops-medium",
1080
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1081
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
1082
+ "pass": true,
1083
  "status": "must_fix",
1084
  "text": "Which desktop computer should I buy for a home office?"
1085
  },
1086
  {
1087
  "actual": {
1088
+ "model_output.classification.iab_content.mapping_mode": "exact",
1089
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1090
  "model_output.classification.iab_content.tier2.label": "Computing",
1091
+ "model_output.classification.iab_content.tier3.label": "Desktops"
1092
  },
1093
  "expected": {
1094
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1097
  "model_output.classification.iab_content.tier3.label": "Desktops"
1098
  },
1099
  "id": "desktops-hard",
1100
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1101
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
1102
+ "pass": true,
1103
  "status": "must_fix",
1104
  "text": "Need a desktop PC with strong performance for creative work"
1105
  },
1106
  {
1107
  "actual": {
1108
+ "model_output.classification.iab_content.mapping_mode": "exact",
1109
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1110
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1111
+ "model_output.classification.iab_content.tier3.label": "Smartphones"
1112
  },
1113
  "expected": {
1114
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1117
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1118
  },
1119
  "id": "smartphones-easy",
1120
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1121
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1122
+ "pass": true,
1123
  "status": "must_fix",
1124
  "text": "Best phone with a good camera under 700"
1125
  },
1126
  {
1127
  "actual": {
1128
+ "model_output.classification.iab_content.mapping_mode": "exact",
1129
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1130
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1131
+ "model_output.classification.iab_content.tier3.label": "Smartphones"
1132
  },
1133
  "expected": {
1134
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1137
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1138
  },
1139
  "id": "smartphones-medium",
1140
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1141
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1142
+ "pass": true,
1143
  "status": "must_fix",
1144
  "text": "Should I buy an iPhone or Pixel this year?"
1145
  },
1146
  {
1147
  "actual": {
1148
+ "model_output.classification.iab_content.mapping_mode": "exact",
1149
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1150
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1151
  "model_output.classification.iab_content.tier3.label": "Smartphones"
 
1157
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1158
  },
1159
  "id": "smartphones-hard",
1160
+ "mismatches": [],
 
 
 
 
 
 
1161
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1162
+ "pass": true,
1163
  "status": "must_fix",
1164
  "text": "Need a new smartphone with strong battery life and a clean software experience"
1165
  },
 
1181
  },
1182
  {
1183
  "actual": {
1184
+ "model_output.classification.iab_content.mapping_mode": "exact",
1185
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1186
  },
1187
  "expected": {
 
1189
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1190
  },
1191
  "id": "style-fashion-parent-medium",
1192
+ "mismatches": [
1193
+ {
1194
+ "actual": "exact",
1195
+ "expected": "nearest_equivalent",
1196
+ "path": "model_output.classification.iab_content.mapping_mode"
1197
+ }
1198
+ ],
1199
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion.",
1200
+ "pass": false,
1201
  "status": "must_fix",
1202
  "text": "Affordable fashion accessories for everyday wear"
1203
  },
1204
  {
1205
  "actual": {
1206
+ "model_output.classification.iab_content.mapping_mode": "exact",
1207
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1208
  },
1209
  "expected": {
 
1211
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1212
  },
1213
  "id": "style-fashion-parent-hard",
1214
+ "mismatches": [
1215
+ {
1216
+ "actual": "exact",
1217
+ "expected": "nearest_equivalent",
1218
+ "path": "model_output.classification.iab_content.mapping_mode"
1219
+ }
1220
+ ],
1221
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion.",
1222
+ "pass": false,
1223
  "status": "must_fix",
1224
  "text": "Need style recommendations for clothing and footwear without a specific brand in mind"
1225
  },
1226
  {
1227
  "actual": {
1228
+ "model_output.classification.iab_content.mapping_mode": "exact",
1229
+ "model_output.classification.iab_content.tier1.label": "Sports",
1230
+ "model_output.classification.iab_content.tier2.label": "Walking",
1231
+ "model_output.classification.iab_content.tier3.label": null
1232
  },
1233
  "expected": {
1234
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1239
  "id": "womens-shoes-easy",
1240
  "mismatches": [
1241
  {
1242
+ "actual": "Sports",
1243
+ "expected": "Style & Fashion",
1244
+ "path": "model_output.classification.iab_content.tier1.label"
1245
+ },
1246
+ {
1247
+ "actual": "Walking",
1248
+ "expected": "Women's Fashion",
1249
+ "path": "model_output.classification.iab_content.tier2.label"
1250
+ },
1251
+ {
1252
+ "actual": null,
1253
+ "expected": "Women's Shoes and Footwear",
1254
+ "path": "model_output.classification.iab_content.tier3.label"
1255
  }
1256
  ],
1257
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
 
1261
  },
1262
  {
1263
  "actual": {
1264
+ "model_output.classification.iab_content.mapping_mode": "exact",
1265
+ "model_output.classification.iab_content.tier1.label": "Sports",
1266
+ "model_output.classification.iab_content.tier2.label": "Walking",
1267
+ "model_output.classification.iab_content.tier3.label": null
1268
  },
1269
  "expected": {
1270
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1275
  "id": "womens-shoes-medium",
1276
  "mismatches": [
1277
  {
1278
+ "actual": "Sports",
1279
+ "expected": "Style & Fashion",
1280
+ "path": "model_output.classification.iab_content.tier1.label"
1281
+ },
1282
+ {
1283
+ "actual": "Walking",
1284
+ "expected": "Women's Fashion",
1285
+ "path": "model_output.classification.iab_content.tier2.label"
1286
+ },
1287
+ {
1288
+ "actual": null,
1289
+ "expected": "Women's Shoes and Footwear",
1290
+ "path": "model_output.classification.iab_content.tier3.label"
1291
  }
1292
  ],
1293
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
 
1297
  },
1298
  {
1299
  "actual": {
1300
+ "model_output.classification.iab_content.mapping_mode": "exact",
1301
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1302
  "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1303
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
 
1309
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1310
  },
1311
  "id": "womens-shoes-hard",
1312
+ "mismatches": [],
 
 
 
 
 
 
1313
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
1314
+ "pass": true,
1315
  "status": "must_fix",
1316
  "text": "Need women's footwear for commuting that looks polished but feels comfortable"
1317
  },
 
1320
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1321
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1322
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1323
+ "model_output.classification.iab_content.tier3.label": null
1324
  },
1325
  "expected": {
1326
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1334
  "actual": "nearest_equivalent",
1335
  "expected": "exact",
1336
  "path": "model_output.classification.iab_content.mapping_mode"
1337
+ },
1338
+ {
1339
+ "actual": null,
1340
+ "expected": "Men's Shoes and Footwear",
1341
+ "path": "model_output.classification.iab_content.tier3.label"
1342
  }
1343
  ],
1344
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
 
1348
  },
1349
  {
1350
  "actual": {
1351
+ "model_output.classification.iab_content.mapping_mode": "exact",
1352
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1353
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1354
+ "model_output.classification.iab_content.tier3.label": "Men's Clothing"
1355
  },
1356
  "expected": {
1357
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1362
  "id": "mens-shoes-medium",
1363
  "mismatches": [
1364
  {
1365
+ "actual": "Men's Clothing",
1366
+ "expected": "Men's Shoes and Footwear",
1367
+ "path": "model_output.classification.iab_content.tier3.label"
1368
  }
1369
  ],
1370
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
 
1374
  },
1375
  {
1376
  "actual": {
1377
+ "model_output.classification.iab_content.mapping_mode": "exact",
1378
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1379
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1380
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
 
1386
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1387
  },
1388
  "id": "mens-shoes-hard",
1389
+ "mismatches": [],
 
 
 
 
 
 
1390
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
1391
+ "pass": true,
1392
  "status": "must_fix",
1393
  "text": "Need men's footwear that works for workdays and weekend walking"
1394
  },
1395
  {
1396
  "actual": {
1397
+ "model_output.classification.iab_content.mapping_mode": "exact",
1398
+ "model_output.classification.iab_content.tier1.label": "Travel",
1399
+ "model_output.classification.iab_content.tier2.label": "Travel Type",
1400
+ "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1401
  },
1402
  "expected": {
1403
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1406
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1407
  },
1408
  "id": "hotels-easy",
1409
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1410
  "notes": "Cross-vertical easy IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1411
+ "pass": true,
1412
  "status": "must_fix",
1413
  "text": "Need a hotel in Chicago for two nights"
1414
  },
1415
  {
1416
  "actual": {
1417
+ "model_output.classification.iab_content.mapping_mode": "exact",
1418
  "model_output.classification.iab_content.tier1.label": "Travel",
1419
  "model_output.classification.iab_content.tier2.label": "Travel Type",
1420
+ "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1421
  },
1422
  "expected": {
1423
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1426
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1427
  },
1428
  "id": "hotels-medium",
1429
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1430
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1431
+ "pass": true,
1432
  "status": "must_fix",
1433
  "text": "Best hotels near Times Square for a weekend trip"
1434
  },
1435
  {
1436
  "actual": {
1437
+ "model_output.classification.iab_content.mapping_mode": "exact",
1438
  "model_output.classification.iab_content.tier1.label": "Travel",
1439
  "model_output.classification.iab_content.tier2.label": null,
1440
  "model_output.classification.iab_content.tier3.label": null
 
1447
  },
1448
  "id": "hotels-hard",
1449
  "mismatches": [
 
 
 
 
 
1450
  {
1451
  "actual": null,
1452
  "expected": "Travel Type",
 
1465
  },
1466
  {
1467
  "actual": {
1468
+ "model_output.classification.iab_content.mapping_mode": "exact",
1469
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1470
  "model_output.classification.iab_content.tier2.label": "Apartments"
1471
  },
 
1476
  },
1477
  "id": "real-estate-rentals-easy",
1478
  "mismatches": [
 
 
 
 
 
1479
  {
1480
  "actual": "Apartments",
1481
  "expected": "Real Estate Renting and Leasing",
 
1489
  },
1490
  {
1491
  "actual": {
1492
+ "model_output.classification.iab_content.mapping_mode": "exact",
1493
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1494
  "model_output.classification.iab_content.tier2.label": "Apartments"
1495
  },
 
1500
  },
1501
  "id": "real-estate-rentals-medium",
1502
  "mismatches": [
1503
+ {
1504
+ "actual": "exact",
1505
+ "expected": "nearest_equivalent",
1506
+ "path": "model_output.classification.iab_content.mapping_mode"
1507
+ },
1508
  {
1509
  "actual": "Apartments",
1510
  "expected": "Real Estate Renting and Leasing",
 
1518
  },
1519
  {
1520
  "actual": {
1521
+ "model_output.classification.iab_content.mapping_mode": "exact",
1522
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1523
+ "model_output.classification.iab_content.tier2.label": "Real Estate Renting and Leasing"
1524
  },
1525
  "expected": {
1526
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1528
  "model_output.classification.iab_content.tier2.label": "Real Estate Renting and Leasing"
1529
  },
1530
  "id": "real-estate-rentals-hard",
1531
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1532
  "notes": "Cross-vertical hard IAB mapping case for Real Estate > Real Estate Renting and Leasing.",
1533
+ "pass": true,
1534
  "status": "must_fix",
1535
  "text": "Need rental listings for a short move, not home-buying advice"
1536
  },
1537
  {
1538
  "actual": {
1539
+ "model_output.classification.iab_content.mapping_mode": "exact",
1540
+ "model_output.classification.iab_content.tier1.label": "Sports",
1541
+ "model_output.classification.iab_content.tier2.label": "Walking",
1542
+ "model_output.classification.iab_content.tier3.label": null
1543
  },
1544
  "expected": {
1545
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1550
  "id": "running-and-jogging-easy",
1551
  "mismatches": [
1552
  {
1553
+ "actual": "Sports",
1554
+ "expected": "Healthy Living",
1555
+ "path": "model_output.classification.iab_content.tier1.label"
1556
+ },
1557
+ {
1558
+ "actual": "Walking",
1559
+ "expected": "Fitness and Exercise",
1560
+ "path": "model_output.classification.iab_content.tier2.label"
1561
+ },
1562
+ {
1563
+ "actual": null,
1564
+ "expected": "Running and Jogging",
1565
+ "path": "model_output.classification.iab_content.tier3.label"
1566
  }
1567
  ],
1568
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
 
1572
  },
1573
  {
1574
  "actual": {
1575
+ "model_output.classification.iab_content.mapping_mode": "exact",
1576
+ "model_output.classification.iab_content.tier1.label": "Sports",
1577
+ "model_output.classification.iab_content.tier2.label": "Walking",
1578
+ "model_output.classification.iab_content.tier3.label": null
1579
  },
1580
  "expected": {
1581
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1586
  "id": "running-and-jogging-medium",
1587
  "mismatches": [
1588
  {
1589
+ "actual": "Sports",
1590
+ "expected": "Healthy Living",
1591
+ "path": "model_output.classification.iab_content.tier1.label"
1592
+ },
1593
+ {
1594
+ "actual": "Walking",
1595
+ "expected": "Fitness and Exercise",
1596
+ "path": "model_output.classification.iab_content.tier2.label"
1597
+ },
1598
+ {
1599
+ "actual": null,
1600
+ "expected": "Running and Jogging",
1601
+ "path": "model_output.classification.iab_content.tier3.label"
1602
  }
1603
  ],
1604
  "notes": "Cross-vertical medium IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
 
1608
  },
1609
  {
1610
  "actual": {
1611
+ "model_output.classification.iab_content.mapping_mode": "exact",
1612
+ "model_output.classification.iab_content.tier1.label": "Sports",
1613
+ "model_output.classification.iab_content.tier2.label": "Walking",
1614
+ "model_output.classification.iab_content.tier3.label": null
1615
  },
1616
  "expected": {
1617
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1622
  "id": "running-and-jogging-hard",
1623
  "mismatches": [
1624
  {
1625
+ "actual": "Sports",
1626
+ "expected": "Healthy Living",
1627
+ "path": "model_output.classification.iab_content.tier1.label"
1628
+ },
1629
+ {
1630
+ "actual": "Walking",
1631
+ "expected": "Fitness and Exercise",
1632
+ "path": "model_output.classification.iab_content.tier2.label"
1633
+ },
1634
+ {
1635
+ "actual": null,
1636
+ "expected": "Running and Jogging",
1637
+ "path": "model_output.classification.iab_content.tier3.label"
1638
  }
1639
  ],
1640
  "notes": "Cross-vertical hard IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
 
1644
  },
1645
  {
1646
  "actual": {
1647
+ "model_output.classification.iab_content.mapping_mode": "exact",
1648
  "model_output.classification.iab_content.tier1.label": "Sports",
1649
+ "model_output.classification.iab_content.tier2.label": "Soccer"
1650
  },
1651
  "expected": {
1652
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1654
  "model_output.classification.iab_content.tier2.label": "Soccer"
1655
  },
1656
  "id": "soccer-easy",
1657
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1658
  "notes": "Cross-vertical easy IAB mapping case for Sports > Soccer.",
1659
+ "pass": true,
1660
  "status": "must_fix",
1661
  "text": "How do offside rules work in soccer?"
1662
  },
1663
  {
1664
  "actual": {
1665
+ "model_output.classification.iab_content.mapping_mode": "exact",
1666
  "model_output.classification.iab_content.tier1.label": "Sports",
1667
+ "model_output.classification.iab_content.tier2.label": "Soccer"
1668
  },
1669
  "expected": {
1670
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1672
  "model_output.classification.iab_content.tier2.label": "Soccer"
1673
  },
1674
  "id": "soccer-medium",
1675
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1676
  "notes": "Cross-vertical medium IAB mapping case for Sports > Soccer.",
1677
+ "pass": true,
1678
  "status": "must_fix",
1679
  "text": "Best soccer drills for beginner players"
1680
  },
1681
  {
1682
  "actual": {
1683
+ "model_output.classification.iab_content.mapping_mode": "exact",
1684
  "model_output.classification.iab_content.tier1.label": "Sports",
1685
+ "model_output.classification.iab_content.tier2.label": "Soccer"
1686
  },
1687
  "expected": {
1688
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1690
  "model_output.classification.iab_content.tier2.label": "Soccer"
1691
  },
1692
  "id": "soccer-hard",
1693
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1694
  "notes": "Cross-vertical hard IAB mapping case for Sports > Soccer.",
1695
+ "pass": true,
1696
  "status": "must_fix",
1697
  "text": "Need help understanding football tactics for the Premier League, not fantasy sports"
1698
  },
1699
  {
1700
  "actual": {
1701
+ "model_output.classification.iab_content.mapping_mode": "exact",
1702
  "model_output.classification.iab_content.tier1.label": "Genres",
1703
  "model_output.classification.iab_content.tier2.label": "Fantasy"
1704
  },
 
1714
  "expected": "Books and Literature",
1715
  "path": "model_output.classification.iab_content.tier1.label"
1716
  },
 
 
 
 
 
1717
  {
1718
  "actual": "Fantasy",
1719
  "expected": "Fiction",
 
1727
  },
1728
  {
1729
  "actual": {
1730
+ "model_output.classification.iab_content.mapping_mode": "exact",
1731
+ "model_output.classification.iab_content.tier1.label": "Travel",
1732
+ "model_output.classification.iab_content.tier2.label": "Travel Type"
1733
  },
1734
  "expected": {
1735
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1737
  "model_output.classification.iab_content.tier2.label": "Fiction"
1738
  },
1739
  "id": "fiction-medium",
1740
+ "mismatches": [
1741
+ {
1742
+ "actual": "Travel",
1743
+ "expected": "Books and Literature",
1744
+ "path": "model_output.classification.iab_content.tier1.label"
1745
+ },
1746
+ {
1747
+ "actual": "exact",
1748
+ "expected": "nearest_equivalent",
1749
+ "path": "model_output.classification.iab_content.mapping_mode"
1750
+ },
1751
+ {
1752
+ "actual": "Travel Type",
1753
+ "expected": "Fiction",
1754
+ "path": "model_output.classification.iab_content.tier2.label"
1755
+ }
1756
+ ],
1757
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
1758
+ "pass": false,
1759
  "status": "must_fix",
1760
  "text": "Best fiction books for a long flight"
1761
  },
1762
  {
1763
  "actual": {
1764
+ "model_output.classification.iab_content.mapping_mode": "exact",
1765
+ "model_output.classification.iab_content.tier1.label": "Genres",
1766
+ "model_output.classification.iab_content.tier2.label": "Romance"
1767
  },
1768
  "expected": {
1769
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1773
  "id": "fiction-hard",
1774
  "mismatches": [
1775
  {
1776
+ "actual": "Genres",
1777
+ "expected": "Books and Literature",
1778
+ "path": "model_output.classification.iab_content.tier1.label"
1779
  },
1780
  {
1781
+ "actual": "Romance",
1782
  "expected": "Fiction",
1783
  "path": "model_output.classification.iab_content.tier2.label"
1784
  }
 
1790
  },
1791
  {
1792
  "actual": {
1793
+ "model_output.classification.iab_content.mapping_mode": "exact",
1794
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1795
  "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1796
  },
 
1801
  },
1802
  "id": "home-improvement-easy",
1803
  "mismatches": [
 
 
 
 
 
1804
  {
1805
  "actual": "Remodeling & Construction",
1806
  "expected": "Home Improvement",
 
1814
  },
1815
  {
1816
  "actual": {
1817
+ "model_output.classification.iab_content.mapping_mode": "exact",
1818
+ "model_output.classification.iab_content.tier1.label": "Home & Garden",
1819
+ "model_output.classification.iab_content.tier2.label": "Indoor Environmental Quality"
1820
  },
1821
  "expected": {
1822
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1826
  "id": "home-improvement-medium",
1827
  "mismatches": [
1828
  {
1829
+ "actual": "Indoor Environmental Quality",
 
 
 
 
 
 
 
 
 
 
1830
  "expected": "Home Improvement",
1831
  "path": "model_output.classification.iab_content.tier2.label"
1832
  }
 
1838
  },
1839
  {
1840
  "actual": {
1841
+ "model_output.classification.iab_content.mapping_mode": "exact",
1842
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1843
+ "model_output.classification.iab_content.tier2.label": null
1844
  },
1845
  "expected": {
1846
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1850
  "id": "home-improvement-hard",
1851
  "mismatches": [
1852
  {
1853
+ "actual": null,
 
 
 
 
 
1854
  "expected": "Home Improvement",
1855
  "path": "model_output.classification.iab_content.tier2.label"
1856
  }
 
1863
  {
1864
  "actual": {
1865
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1866
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1867
+ "model_output.classification.iab_content.tier2.label": null
1868
  },
1869
  "expected": {
1870
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1873
  },
1874
  "id": "online-education-easy",
1875
  "mismatches": [
1876
+ {
1877
+ "actual": "Technology & Computing",
1878
+ "expected": "Education",
1879
+ "path": "model_output.classification.iab_content.tier1.label"
1880
+ },
1881
  {
1882
  "actual": "nearest_equivalent",
1883
  "expected": "exact",
1884
  "path": "model_output.classification.iab_content.mapping_mode"
1885
+ },
1886
+ {
1887
+ "actual": null,
1888
+ "expected": "Online Education",
1889
+ "path": "model_output.classification.iab_content.tier2.label"
1890
  }
1891
  ],
1892
  "notes": "Cross-vertical easy IAB mapping case for Education > Online Education.",
 
1896
  },
1897
  {
1898
  "actual": {
1899
+ "model_output.classification.iab_content.mapping_mode": "exact",
1900
  "model_output.classification.iab_content.tier1.label": "Careers",
1901
  "model_output.classification.iab_content.tier2.label": "Remote Working"
1902
  },
 
1912
  "expected": "Education",
1913
  "path": "model_output.classification.iab_content.tier1.label"
1914
  },
 
 
 
 
 
1915
  {
1916
  "actual": "Remote Working",
1917
  "expected": "Online Education",
 
1926
  {
1927
  "actual": {
1928
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1929
+ "model_output.classification.iab_content.tier1.label": "Careers",
1930
+ "model_output.classification.iab_content.tier2.label": null
1931
  },
1932
  "expected": {
1933
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1937
  "id": "online-education-hard",
1938
  "mismatches": [
1939
  {
1940
+ "actual": "Careers",
1941
  "expected": "Education",
1942
  "path": "model_output.classification.iab_content.tier1.label"
1943
  },
 
1947
  "path": "model_output.classification.iab_content.mapping_mode"
1948
  },
1949
  {
1950
+ "actual": null,
1951
  "expected": "Online Education",
1952
  "path": "model_output.classification.iab_content.tier2.label"
1953
  }
 
1959
  },
1960
  {
1961
  "actual": {
1962
+ "model_output.classification.iab_content.mapping_mode": "exact",
1963
  "model_output.classification.iab_content.tier1.label": "Education",
1964
  "model_output.classification.iab_content.tier2.label": "College Education",
1965
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
 
1971
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
1972
  },
1973
  "id": "postgraduate-education-easy",
1974
+ "mismatches": [],
 
 
 
 
 
 
1975
  "notes": "Cross-vertical easy IAB mapping case for Education > College Education > Postgraduate Education.",
1976
+ "pass": true,
1977
  "status": "must_fix",
1978
  "text": "best universities to study masters"
1979
  },
 
1981
  "actual": {
1982
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1983
  "model_output.classification.iab_content.tier1.label": "Education",
1984
+ "model_output.classification.iab_content.tier2.label": null,
1985
+ "model_output.classification.iab_content.tier3.label": null
1986
  },
1987
  "expected": {
1988
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1996
  "actual": "nearest_equivalent",
1997
  "expected": "exact",
1998
  "path": "model_output.classification.iab_content.mapping_mode"
1999
+ },
2000
+ {
2001
+ "actual": null,
2002
+ "expected": "College Education",
2003
+ "path": "model_output.classification.iab_content.tier2.label"
2004
+ },
2005
+ {
2006
+ "actual": null,
2007
+ "expected": "Postgraduate Education",
2008
+ "path": "model_output.classification.iab_content.tier3.label"
2009
  }
2010
  ],
2011
  "notes": "Cross-vertical medium IAB mapping case for Education > College Education > Postgraduate Education.",
 
2015
  },
2016
  {
2017
  "actual": {
2018
+ "model_output.classification.iab_content.mapping_mode": "exact",
2019
  "model_output.classification.iab_content.tier1.label": "Education",
2020
  "model_output.classification.iab_content.tier2.label": "College Education",
2021
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
 
2027
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2028
  },
2029
  "id": "postgraduate-education-hard",
2030
+ "mismatches": [],
 
 
 
 
 
 
2031
  "notes": "Cross-vertical hard IAB mapping case for Education > College Education > Postgraduate Education.",
2032
+ "pass": true,
2033
  "status": "must_fix",
2034
  "text": "need postgraduate options for a master's degree, not short online courses"
2035
  },
 
2079
  },
2080
  {
2081
  "actual": {
2082
+ "model_output.classification.iab_content.mapping_mode": "exact",
2083
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2084
  },
2085
  "expected": {
 
2087
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2088
  },
2089
  "id": "medical-health-hard",
2090
+ "mismatches": [],
 
 
 
 
 
 
2091
  "notes": "Cross-vertical hard IAB mapping case for Medical Health.",
2092
+ "pass": true,
2093
  "status": "must_fix",
2094
  "text": "need medical advice about symptoms, not wellness or fitness tips"
2095
  },
2096
  {
2097
  "actual": {
2098
+ "model_output.classification.iab_content.mapping_mode": "exact",
2099
  "model_output.classification.iab_content.tier1.label": "Careers",
2100
  "model_output.classification.iab_content.tier2.label": "Remote Working"
2101
  },
 
2106
  },
2107
  "id": "careers-job-search-easy",
2108
  "mismatches": [
 
 
 
 
 
2109
  {
2110
  "actual": "Remote Working",
2111
  "expected": "Job Search",
 
2119
  },
2120
  {
2121
  "actual": {
2122
+ "model_output.classification.iab_content.mapping_mode": "exact",
2123
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
2124
  "model_output.classification.iab_content.tier2.label": "Industries"
2125
  },
 
2135
  "expected": "Careers",
2136
  "path": "model_output.classification.iab_content.tier1.label"
2137
  },
 
 
 
 
 
2138
  {
2139
  "actual": "Industries",
2140
  "expected": "Job Search",
 
2149
  {
2150
  "actual": {
2151
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2152
+ "model_output.classification.iab_content.tier1.label": "Genres",
2153
+ "model_output.classification.iab_content.tier2.label": null
2154
  },
2155
  "expected": {
2156
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2160
  "id": "careers-job-search-hard",
2161
  "mismatches": [
2162
  {
2163
+ "actual": "Genres",
2164
  "expected": "Careers",
2165
  "path": "model_output.classification.iab_content.tier1.label"
2166
  },
 
2170
  "path": "model_output.classification.iab_content.mapping_mode"
2171
  },
2172
  {
2173
+ "actual": null,
2174
  "expected": "Job Search",
2175
  "path": "model_output.classification.iab_content.tier2.label"
2176
  }
 
2183
  {
2184
  "actual": {
2185
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2186
+ "model_output.classification.iab_content.tier1.label": "Personal Celebrations & Life Events",
2187
  "model_output.classification.iab_content.tier2.label": null
2188
  },
2189
  "expected": {
 
2193
  },
2194
  "id": "personal-finance-easy",
2195
  "mismatches": [
2196
+ {
2197
+ "actual": "Personal Celebrations & Life Events",
2198
+ "expected": "Personal Finance",
2199
+ "path": "model_output.classification.iab_content.tier1.label"
2200
+ },
2201
  {
2202
  "actual": "nearest_equivalent",
2203
  "expected": "exact",
 
2216
  },
2217
  {
2218
  "actual": {
2219
+ "model_output.classification.iab_content.mapping_mode": "exact",
2220
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2221
  "model_output.classification.iab_content.tier2.label": null
2222
  },
 
2227
  },
2228
  "id": "personal-finance-medium",
2229
  "mismatches": [
 
 
 
 
 
2230
  {
2231
  "actual": null,
2232
  "expected": "Financial Planning",
 
2240
  },
2241
  {
2242
  "actual": {
2243
+ "model_output.classification.iab_content.mapping_mode": "exact",
2244
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2245
+ "model_output.classification.iab_content.tier2.label": null
2246
  },
2247
  "expected": {
2248
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2252
  "id": "personal-finance-hard",
2253
  "mismatches": [
2254
  {
2255
+ "actual": null,
 
 
 
 
 
2256
  "expected": "Financial Planning",
2257
  "path": "model_output.classification.iab_content.tier2.label"
2258
  }
 
2264
  },
2265
  {
2266
  "actual": {
2267
+ "model_output.classification.iab_content.mapping_mode": "exact",
2268
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2269
  "model_output.classification.iab_content.tier2.label": "Parenting"
2270
  },
 
2274
  "model_output.classification.iab_content.tier2.label": "Parenting"
2275
  },
2276
  "id": "parenting-easy",
2277
+ "mismatches": [],
 
 
 
 
 
 
2278
  "notes": "Cross-vertical easy IAB mapping case for Family and Relationships > Parenting.",
2279
+ "pass": true,
2280
  "status": "must_fix",
2281
  "text": "tips for parenting a toddler"
2282
  },
 
2284
  "actual": {
2285
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2286
  "model_output.classification.iab_content.tier1.label": "Education",
2287
+ "model_output.classification.iab_content.tier2.label": null
2288
  },
2289
  "expected": {
2290
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2304
  "path": "model_output.classification.iab_content.mapping_mode"
2305
  },
2306
  {
2307
+ "actual": null,
2308
  "expected": "Parenting",
2309
  "path": "model_output.classification.iab_content.tier2.label"
2310
  }
 
2316
  },
2317
  {
2318
  "actual": {
2319
+ "model_output.classification.iab_content.mapping_mode": "exact",
2320
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2321
  "model_output.classification.iab_content.tier2.label": "Parenting"
2322
  },
 
2326
  "model_output.classification.iab_content.tier2.label": "Parenting"
2327
  },
2328
  "id": "parenting-hard",
2329
+ "mismatches": [],
 
 
 
 
 
 
2330
  "notes": "Cross-vertical hard IAB mapping case for Family and Relationships > Parenting.",
2331
+ "pass": true,
2332
  "status": "must_fix",
2333
  "text": "need parenting advice for a child starting preschool"
2334
  },
2335
  {
2336
  "actual": {
2337
+ "model_output.classification.iab_content.mapping_mode": "exact",
2338
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2339
  "model_output.classification.iab_content.tier2.label": "Gardening"
2340
  },
 
2344
  "model_output.classification.iab_content.tier2.label": "Gardening"
2345
  },
2346
  "id": "gardening-easy",
2347
+ "mismatches": [],
 
 
 
 
 
 
2348
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Gardening.",
2349
+ "pass": true,
2350
  "status": "must_fix",
2351
  "text": "best plants for a small balcony garden"
2352
  },
2353
  {
2354
  "actual": {
2355
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2356
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
2357
  "model_output.classification.iab_content.tier2.label": null
2358
  },
2359
  "expected": {
 
2364
  "id": "gardening-medium",
2365
  "mismatches": [
2366
  {
2367
+ "actual": "Food & Drink",
2368
  "expected": "Home & Garden",
2369
  "path": "model_output.classification.iab_content.tier1.label"
2370
  },
 
2386
  },
2387
  {
2388
  "actual": {
2389
+ "model_output.classification.iab_content.mapping_mode": "exact",
2390
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2391
+ "model_output.classification.iab_content.tier2.label": "Gardening"
2392
  },
2393
  "expected": {
2394
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2396
  "model_output.classification.iab_content.tier2.label": "Gardening"
2397
  },
2398
  "id": "gardening-hard",
2399
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
2400
  "notes": "Cross-vertical hard IAB mapping case for Home & Garden > Gardening.",
2401
+ "pass": true,
2402
  "status": "must_fix",
2403
  "text": "need gardening advice for a shady backyard, not interior decor ideas"
2404
  },
2405
  {
2406
  "actual": {
2407
+ "model_output.classification.iab_content.mapping_mode": "exact",
2408
+ "model_output.classification.iab_content.tier1.label": "Entertainment",
2409
+ "model_output.classification.iab_content.tier2.label": "Movies"
2410
  },
2411
  "expected": {
2412
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2414
  "model_output.classification.iab_content.tier2.label": "Movies"
2415
  },
2416
  "id": "movies-easy",
2417
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2418
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
2419
+ "pass": true,
2420
  "status": "must_fix",
2421
  "text": "What movie should we watch tonight?"
2422
  },
2423
  {
2424
  "actual": {
2425
+ "model_output.classification.iab_content.mapping_mode": "exact",
2426
  "model_output.classification.iab_content.tier1.label": "Genres",
2427
  "model_output.classification.iab_content.tier2.label": "Horror"
2428
  },
 
2438
  "expected": "Entertainment",
2439
  "path": "model_output.classification.iab_content.tier1.label"
2440
  },
 
 
 
 
 
2441
  {
2442
  "actual": "Horror",
2443
  "expected": "Movies",
 
2451
  },
2452
  {
2453
  "actual": {
2454
+ "model_output.classification.iab_content.mapping_mode": "exact",
2455
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2456
+ "model_output.classification.iab_content.tier2.label": "Movies"
2457
  },
2458
  "expected": {
2459
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2461
  "model_output.classification.iab_content.tier2.label": "Movies"
2462
  },
2463
  "id": "movies-hard",
2464
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
2465
  "notes": "Cross-vertical hard IAB mapping case for Entertainment > Movies.",
2466
+ "pass": true,
2467
  "status": "must_fix",
2468
  "text": "Looking for film recommendations, not TV shows or music"
2469
  }
artifacts/evaluation/latest/iab_quality_target_eval.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 12,
5
- "passed": 0,
6
  "total": 12
7
  }
8
  },
9
- "cases_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/iab_mapping_cases.json",
10
  "count": 12,
11
- "failed": 12,
12
- "passed": 0,
13
  "results": [
14
  {
15
  "actual": {
@@ -42,7 +42,7 @@
42
  },
43
  {
44
  "actual": {
45
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
46
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
47
  "model_output.classification.iab_content.tier2.label": "Computing",
48
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -54,15 +54,9 @@
54
  "model_output.classification.iab_content.tier3.label": "Laptops"
55
  },
56
  "id": "laptop-buying-maps-to-laptops",
57
- "mismatches": [
58
- {
59
- "actual": "nearest_equivalent",
60
- "expected": "exact",
61
- "path": "model_output.classification.iab_content.mapping_mode"
62
- }
63
- ],
64
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
65
- "pass": false,
66
  "status": "must_fix",
67
  "text": "Which laptop to buy in 2026"
68
  },
@@ -70,7 +64,7 @@
70
  "actual": {
71
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
72
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
73
- "model_output.classification.iab_content.tier2.label": "Computing",
74
  "model_output.classification.iab_content.tier3.label": null
75
  },
76
  "expected": {
@@ -81,6 +75,11 @@
81
  },
82
  "id": "labtop-buying-maps-to-laptops",
83
  "mismatches": [
 
 
 
 
 
84
  {
85
  "actual": null,
86
  "expected": "Laptops",
@@ -99,10 +98,10 @@
99
  },
100
  {
101
  "actual": {
102
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
103
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
104
- "model_output.classification.iab_content.tier2.label": "Computing",
105
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
106
  },
107
  "expected": {
108
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -118,14 +117,19 @@
118
  "path": "model_output.classification.iab_content.tier1.label"
119
  },
120
  {
121
- "actual": "Computing",
122
  "expected": "Business",
123
  "path": "model_output.classification.iab_content.tier2.label"
124
  },
125
  {
126
- "actual": "Software and Applications",
127
  "expected": "Sales",
128
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
129
  }
130
  ],
131
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
@@ -136,9 +140,9 @@
136
  {
137
  "actual": {
138
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
139
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
140
- "model_output.classification.iab_content.tier2.label": "Computing",
141
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
142
  },
143
  "expected": {
144
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -149,17 +153,17 @@
149
  "id": "crm-comparison-maps-to-sales",
150
  "mismatches": [
151
  {
152
- "actual": "Technology & Computing",
153
  "expected": "Business and Finance",
154
  "path": "model_output.classification.iab_content.tier1.label"
155
  },
156
  {
157
- "actual": "Computing",
158
  "expected": "Business",
159
  "path": "model_output.classification.iab_content.tier2.label"
160
  },
161
  {
162
- "actual": "Software and Applications",
163
  "expected": "Sales",
164
  "path": "model_output.classification.iab_content.tier3.label"
165
  },
@@ -177,8 +181,8 @@
177
  {
178
  "actual": {
179
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
180
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
181
- "model_output.classification.iab_content.tier2.label": "Content Production",
182
  "model_output.classification.iab_content.tier3.label": null
183
  },
184
  "expected": {
@@ -190,12 +194,12 @@
190
  "id": "marketing-tools-map-to-marketing",
191
  "mismatches": [
192
  {
193
- "actual": "Hobbies & Interests",
194
  "expected": "Business and Finance",
195
  "path": "model_output.classification.iab_content.tier1.label"
196
  },
197
  {
198
- "actual": "Content Production",
199
  "expected": "Business",
200
  "path": "model_output.classification.iab_content.tier2.label"
201
  },
@@ -218,8 +222,8 @@
218
  {
219
  "actual": {
220
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
221
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
222
- "model_output.classification.iab_content.tier2.label": "Computing"
223
  },
224
  "expected": {
225
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -229,7 +233,12 @@
229
  "id": "ml-explanation-maps-to-ai",
230
  "mismatches": [
231
  {
232
- "actual": "Computing",
 
 
 
 
 
233
  "expected": "Artificial Intelligence",
234
  "path": "model_output.classification.iab_content.tier2.label"
235
  },
@@ -247,9 +256,9 @@
247
  {
248
  "actual": {
249
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
250
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
251
- "model_output.classification.iab_content.tier2.label": "Computing",
252
- "model_output.classification.iab_content.tier3.label": "Internet"
253
  },
254
  "expected": {
255
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -260,17 +269,17 @@
260
  "id": "support-credential-help-maps-to-business-it",
261
  "mismatches": [
262
  {
263
- "actual": "Technology & Computing",
264
  "expected": "Business and Finance",
265
  "path": "model_output.classification.iab_content.tier1.label"
266
  },
267
  {
268
- "actual": "Computing",
269
  "expected": "Business",
270
  "path": "model_output.classification.iab_content.tier2.label"
271
  },
272
  {
273
- "actual": "Internet",
274
  "expected": "Business I.T.",
275
  "path": "model_output.classification.iab_content.tier3.label"
276
  }
@@ -282,7 +291,7 @@
282
  },
283
  {
284
  "actual": {
285
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
286
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
287
  "model_output.classification.iab_content.tier2.label": "Dining Out"
288
  },
@@ -292,23 +301,17 @@
292
  "model_output.classification.iab_content.tier2.label": "Dining Out"
293
  },
294
  "id": "restaurant-booking-maps-to-dining-out",
295
- "mismatches": [
296
- {
297
- "actual": "nearest_equivalent",
298
- "expected": "exact",
299
- "path": "model_output.classification.iab_content.mapping_mode"
300
- }
301
- ],
302
  "notes": "Generic dining requests should not inherit the repo's business default.",
303
- "pass": false,
304
  "status": "must_fix",
305
  "text": "Book a table for 2 tonight"
306
  },
307
  {
308
  "actual": {
309
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
310
- "model_output.classification.iab_content.tier1.label": "Business and Finance",
311
- "model_output.classification.iab_content.tier2.label": "Business",
312
  "model_output.classification.iab_content.tier3.label": null
313
  },
314
  "expected": {
@@ -320,12 +323,12 @@
320
  "id": "trial-signup-maps-to-software",
321
  "mismatches": [
322
  {
323
- "actual": "Business and Finance",
324
  "expected": "Technology & Computing",
325
  "path": "model_output.classification.iab_content.tier1.label"
326
  },
327
  {
328
- "actual": "Business",
329
  "expected": "Computing",
330
  "path": "model_output.classification.iab_content.tier2.label"
331
  },
@@ -342,10 +345,10 @@
342
  },
343
  {
344
  "actual": {
345
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
346
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
347
- "model_output.classification.iab_content.tier2.label": "Computing",
348
- "model_output.classification.iab_content.tier3.label": "Software and Applications",
349
  "model_output.classification.iab_content.tier4.label": null
350
  },
351
  "expected": {
@@ -358,7 +361,17 @@
358
  "id": "communication-software-maps-to-tier4",
359
  "mismatches": [
360
  {
361
- "actual": "Software and Applications",
 
 
 
 
 
 
 
 
 
 
362
  "expected": "Computer Software and Applications",
363
  "path": "model_output.classification.iab_content.tier3.label"
364
  },
@@ -366,11 +379,6 @@
366
  "actual": null,
367
  "expected": "Communication",
368
  "path": "model_output.classification.iab_content.tier4.label"
369
- },
370
- {
371
- "actual": "nearest_equivalent",
372
- "expected": "exact",
373
- "path": "model_output.classification.iab_content.mapping_mode"
374
  }
375
  ],
376
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
@@ -380,9 +388,9 @@
380
  },
381
  {
382
  "actual": {
383
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
384
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
385
- "model_output.classification.iab_content.tier2.label": null
386
  },
387
  "expected": {
388
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -390,20 +398,9 @@
390
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
391
  },
392
  "id": "vodka-query-maps-to-alcoholic-beverages",
393
- "mismatches": [
394
- {
395
- "actual": null,
396
- "expected": "Alcoholic Beverages",
397
- "path": "model_output.classification.iab_content.tier2.label"
398
- },
399
- {
400
- "actual": "nearest_equivalent",
401
- "expected": "exact",
402
- "path": "model_output.classification.iab_content.mapping_mode"
403
- }
404
- ],
405
  "notes": "Food and beverage prompts should not fall through to the business default.",
406
- "pass": false,
407
  "status": "must_fix",
408
  "text": "what is best vodka drink should i try"
409
  }
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 9,
5
+ "passed": 3,
6
  "total": 12
7
  }
8
  },
9
+ "cases_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/examples/iab_mapping_cases.json",
10
  "count": 12,
11
+ "failed": 9,
12
+ "passed": 3,
13
  "results": [
14
  {
15
  "actual": {
 
42
  },
43
  {
44
  "actual": {
45
+ "model_output.classification.iab_content.mapping_mode": "exact",
46
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
47
  "model_output.classification.iab_content.tier2.label": "Computing",
48
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
54
  "model_output.classification.iab_content.tier3.label": "Laptops"
55
  },
56
  "id": "laptop-buying-maps-to-laptops",
57
+ "mismatches": [],
 
 
 
 
 
 
58
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
59
+ "pass": true,
60
  "status": "must_fix",
61
  "text": "Which laptop to buy in 2026"
62
  },
 
64
  "actual": {
65
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
66
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
67
+ "model_output.classification.iab_content.tier2.label": null,
68
  "model_output.classification.iab_content.tier3.label": null
69
  },
70
  "expected": {
 
75
  },
76
  "id": "labtop-buying-maps-to-laptops",
77
  "mismatches": [
78
+ {
79
+ "actual": null,
80
+ "expected": "Computing",
81
+ "path": "model_output.classification.iab_content.tier2.label"
82
+ },
83
  {
84
  "actual": null,
85
  "expected": "Laptops",
 
98
  },
99
  {
100
  "actual": {
101
+ "model_output.classification.iab_content.mapping_mode": "exact",
102
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
103
+ "model_output.classification.iab_content.tier2.label": null,
104
+ "model_output.classification.iab_content.tier3.label": null
105
  },
106
  "expected": {
107
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
117
  "path": "model_output.classification.iab_content.tier1.label"
118
  },
119
  {
120
+ "actual": null,
121
  "expected": "Business",
122
  "path": "model_output.classification.iab_content.tier2.label"
123
  },
124
  {
125
+ "actual": null,
126
  "expected": "Sales",
127
  "path": "model_output.classification.iab_content.tier3.label"
128
+ },
129
+ {
130
+ "actual": "exact",
131
+ "expected": "nearest_equivalent",
132
+ "path": "model_output.classification.iab_content.mapping_mode"
133
  }
134
  ],
135
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
 
140
  {
141
  "actual": {
142
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
143
+ "model_output.classification.iab_content.tier1.label": "Careers",
144
+ "model_output.classification.iab_content.tier2.label": null,
145
+ "model_output.classification.iab_content.tier3.label": null
146
  },
147
  "expected": {
148
  "model_output.classification.iab_content.mapping_mode": "exact",
 
153
  "id": "crm-comparison-maps-to-sales",
154
  "mismatches": [
155
  {
156
+ "actual": "Careers",
157
  "expected": "Business and Finance",
158
  "path": "model_output.classification.iab_content.tier1.label"
159
  },
160
  {
161
+ "actual": null,
162
  "expected": "Business",
163
  "path": "model_output.classification.iab_content.tier2.label"
164
  },
165
  {
166
+ "actual": null,
167
  "expected": "Sales",
168
  "path": "model_output.classification.iab_content.tier3.label"
169
  },
 
181
  {
182
  "actual": {
183
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
184
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
185
+ "model_output.classification.iab_content.tier2.label": null,
186
  "model_output.classification.iab_content.tier3.label": null
187
  },
188
  "expected": {
 
194
  "id": "marketing-tools-map-to-marketing",
195
  "mismatches": [
196
  {
197
+ "actual": "Technology & Computing",
198
  "expected": "Business and Finance",
199
  "path": "model_output.classification.iab_content.tier1.label"
200
  },
201
  {
202
+ "actual": null,
203
  "expected": "Business",
204
  "path": "model_output.classification.iab_content.tier2.label"
205
  },
 
222
  {
223
  "actual": {
224
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
225
+ "model_output.classification.iab_content.tier1.label": "Careers",
226
+ "model_output.classification.iab_content.tier2.label": null
227
  },
228
  "expected": {
229
  "model_output.classification.iab_content.mapping_mode": "exact",
 
233
  "id": "ml-explanation-maps-to-ai",
234
  "mismatches": [
235
  {
236
+ "actual": "Careers",
237
+ "expected": "Technology & Computing",
238
+ "path": "model_output.classification.iab_content.tier1.label"
239
+ },
240
+ {
241
+ "actual": null,
242
  "expected": "Artificial Intelligence",
243
  "path": "model_output.classification.iab_content.tier2.label"
244
  },
 
256
  {
257
  "actual": {
258
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
259
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
260
+ "model_output.classification.iab_content.tier2.label": null,
261
+ "model_output.classification.iab_content.tier3.label": null
262
  },
263
  "expected": {
264
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
269
  "id": "support-credential-help-maps-to-business-it",
270
  "mismatches": [
271
  {
272
+ "actual": "Personal Finance",
273
  "expected": "Business and Finance",
274
  "path": "model_output.classification.iab_content.tier1.label"
275
  },
276
  {
277
+ "actual": null,
278
  "expected": "Business",
279
  "path": "model_output.classification.iab_content.tier2.label"
280
  },
281
  {
282
+ "actual": null,
283
  "expected": "Business I.T.",
284
  "path": "model_output.classification.iab_content.tier3.label"
285
  }
 
291
  },
292
  {
293
  "actual": {
294
+ "model_output.classification.iab_content.mapping_mode": "exact",
295
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
296
  "model_output.classification.iab_content.tier2.label": "Dining Out"
297
  },
 
301
  "model_output.classification.iab_content.tier2.label": "Dining Out"
302
  },
303
  "id": "restaurant-booking-maps-to-dining-out",
304
+ "mismatches": [],
 
 
 
 
 
 
305
  "notes": "Generic dining requests should not inherit the repo's business default.",
306
+ "pass": true,
307
  "status": "must_fix",
308
  "text": "Book a table for 2 tonight"
309
  },
310
  {
311
  "actual": {
312
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
313
+ "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
314
+ "model_output.classification.iab_content.tier2.label": null,
315
  "model_output.classification.iab_content.tier3.label": null
316
  },
317
  "expected": {
 
323
  "id": "trial-signup-maps-to-software",
324
  "mismatches": [
325
  {
326
+ "actual": "Sensitive Topics",
327
  "expected": "Technology & Computing",
328
  "path": "model_output.classification.iab_content.tier1.label"
329
  },
330
  {
331
+ "actual": null,
332
  "expected": "Computing",
333
  "path": "model_output.classification.iab_content.tier2.label"
334
  },
 
345
  },
346
  {
347
  "actual": {
348
+ "model_output.classification.iab_content.mapping_mode": "exact",
349
+ "model_output.classification.iab_content.tier1.label": "Careers",
350
+ "model_output.classification.iab_content.tier2.label": "Remote Working",
351
+ "model_output.classification.iab_content.tier3.label": null,
352
  "model_output.classification.iab_content.tier4.label": null
353
  },
354
  "expected": {
 
361
  "id": "communication-software-maps-to-tier4",
362
  "mismatches": [
363
  {
364
+ "actual": "Careers",
365
+ "expected": "Technology & Computing",
366
+ "path": "model_output.classification.iab_content.tier1.label"
367
+ },
368
+ {
369
+ "actual": "Remote Working",
370
+ "expected": "Computing",
371
+ "path": "model_output.classification.iab_content.tier2.label"
372
+ },
373
+ {
374
+ "actual": null,
375
  "expected": "Computer Software and Applications",
376
  "path": "model_output.classification.iab_content.tier3.label"
377
  },
 
379
  "actual": null,
380
  "expected": "Communication",
381
  "path": "model_output.classification.iab_content.tier4.label"
 
 
 
 
 
382
  }
383
  ],
384
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
 
388
  },
389
  {
390
  "actual": {
391
+ "model_output.classification.iab_content.mapping_mode": "exact",
392
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
393
+ "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
394
  },
395
  "expected": {
396
  "model_output.classification.iab_content.mapping_mode": "exact",
 
398
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
399
  },
400
  "id": "vodka-query-maps-to-alcoholic-beverages",
401
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
402
  "notes": "Food and beverage prompts should not fall through to the business default.",
403
+ "pass": true,
404
  "status": "must_fix",
405
  "text": "what is best vodka drink should i try"
406
  }
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,13,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
4
- comparison,2,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,1,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,14,1,0,0,0,0,0,0,0,0,0,0,0,0
7
- provider_selection,0,0,1,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,0,14,0,0,0,0,1,0,0,0,0,0
10
- booking,0,0,0,0,0,0,1,0,13,0,0,1,0,0,0,0,0,0
11
- download,0,0,0,0,0,0,0,0,0,14,0,1,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0
13
- task_execution,0,0,0,0,0,0,1,0,0,0,0,17,0,0,0,0,0,0
14
- onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,1,16,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,1
16
- account_help,0,0,0,0,0,0,2,0,0,0,0,0,0,0,12,1,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0
18
  follow_up,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,14,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
4
+ comparison,2,0,12,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,1,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,14,1,0,0,0,0,0,0,0,0,0,0,0,0
7
+ provider_selection,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,1,0,0,0,0,12,0,0,0,0,2,0,0,0,0,0
10
+ booking,0,0,0,0,0,0,1,0,11,1,1,1,0,0,0,0,0,0
11
+ download,0,0,0,0,0,0,0,0,0,13,1,1,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0
13
+ task_execution,0,0,0,0,0,0,0,0,0,0,0,17,1,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,1,0,1
16
+ account_help,0,0,0,0,0,0,0,0,0,0,0,1,0,3,11,0,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,1,4,3,7,0,0
18
  follow_up,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json CHANGED
@@ -1,81 +1,81 @@
1
  {
2
- "accepted_accuracy": 0.9386,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.9386,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
- "accepted_accuracy": 0.9565,
11
  "accepted_coverage": 1.0,
12
- "accuracy": 0.9565,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
- "macro_f1": 0.9579
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.8901,
19
- "accepted_coverage": 1.0,
20
- "accuracy": 0.8901,
21
  "count": 91,
22
- "fallback_rate": 0.0,
23
- "macro_f1": 0.8913
24
  },
25
  "medium": {
26
- "accepted_accuracy": 0.9681,
27
  "accepted_coverage": 1.0,
28
- "accuracy": 0.9681,
29
  "count": 94,
30
  "fallback_rate": 0.0,
31
- "macro_f1": 0.9671
32
  }
33
  },
34
- "fallback_rate": 0.0,
35
  "head": "intent_subtype",
36
- "macro_f1": 0.9401,
37
  "per_class_metrics": {
38
  "account_help": {
39
- "f1-score": 0.8888888888888888,
40
- "precision": 1.0,
41
- "recall": 0.8,
42
  "support": 15.0
43
  },
44
- "accuracy": 0.9386281588447654,
45
  "billing_help": {
46
- "f1-score": 0.967741935483871,
47
- "precision": 0.9375,
48
- "recall": 1.0,
49
  "support": 15.0
50
  },
51
  "booking": {
52
- "f1-score": 0.9285714285714286,
53
  "precision": 1.0,
54
- "recall": 0.8666666666666667,
55
  "support": 15.0
56
  },
57
  "comparison": {
58
- "f1-score": 0.896551724137931,
59
- "precision": 0.9285714285714286,
60
- "recall": 0.8666666666666667,
61
  "support": 15.0
62
  },
63
  "contact_sales": {
64
- "f1-score": 1.0,
65
- "precision": 1.0,
66
  "recall": 1.0,
67
  "support": 15.0
68
  },
69
  "deal_seeking": {
70
- "f1-score": 0.9333333333333333,
71
- "precision": 0.9333333333333333,
72
  "recall": 0.9333333333333333,
73
  "support": 15.0
74
  },
75
  "download": {
76
- "f1-score": 0.9655172413793104,
77
- "precision": 1.0,
78
- "recall": 0.9333333333333333,
79
  "support": 15.0
80
  },
81
  "education": {
@@ -91,8 +91,8 @@
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
- "f1-score": 0.9655172413793104,
95
- "precision": 1.0,
96
  "recall": 0.9333333333333333,
97
  "support": 15.0
98
  },
@@ -103,57 +103,57 @@
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
- "f1-score": 0.9401067100194944,
107
- "precision": 0.9476910208527856,
108
- "recall": 0.9383215323166303,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
- "f1-score": 0.9411764705882353,
113
- "precision": 0.9411764705882353,
114
- "recall": 0.9411764705882353,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
- "f1-score": 0.9285714285714286,
119
  "precision": 1.0,
120
- "recall": 0.8666666666666667,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
124
- "f1-score": 0.9375,
125
- "precision": 0.9375,
126
- "recall": 0.9375,
127
  "support": 16.0
128
  },
129
  "purchase": {
130
- "f1-score": 0.9655172413793104,
131
  "precision": 1.0,
132
- "recall": 0.9333333333333333,
133
  "support": 15.0
134
  },
135
  "signup": {
136
- "f1-score": 0.8888888888888888,
137
- "precision": 0.8,
138
  "recall": 1.0,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
- "f1-score": 0.8717948717948718,
143
- "precision": 0.8095238095238095,
144
  "recall": 0.9444444444444444,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
- "f1-score": 0.9655172413793104,
149
- "precision": 1.0,
150
- "recall": 0.9333333333333333,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
- "f1-score": 0.9391802821325396,
155
- "precision": 0.9455776173285199,
156
- "recall": 0.9386281588447654,
157
  "support": 277.0
158
  }
159
  },
 
1
  {
2
+ "accepted_accuracy": 0.8982,
3
+ "accepted_coverage": 0.9928,
4
+ "accuracy": 0.8917,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/subtype_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
+ "accepted_accuracy": 0.9239,
11
  "accepted_coverage": 1.0,
12
+ "accuracy": 0.9239,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
+ "macro_f1": 0.924
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.8539,
19
+ "accepted_coverage": 0.978,
20
+ "accuracy": 0.8352,
21
  "count": 91,
22
+ "fallback_rate": 0.022,
23
+ "macro_f1": 0.8241
24
  },
25
  "medium": {
26
+ "accepted_accuracy": 0.9149,
27
  "accepted_coverage": 1.0,
28
+ "accuracy": 0.9149,
29
  "count": 94,
30
  "fallback_rate": 0.0,
31
+ "macro_f1": 0.9094
32
  }
33
  },
34
+ "fallback_rate": 0.0072,
35
  "head": "intent_subtype",
36
+ "macro_f1": 0.8876,
37
  "per_class_metrics": {
38
  "account_help": {
39
+ "f1-score": 0.7586206896551724,
40
+ "precision": 0.7857142857142857,
41
+ "recall": 0.7333333333333333,
42
  "support": 15.0
43
  },
44
+ "accuracy": 0.8916967509025271,
45
  "billing_help": {
46
+ "f1-score": 0.6086956521739131,
47
+ "precision": 0.875,
48
+ "recall": 0.4666666666666667,
49
  "support": 15.0
50
  },
51
  "booking": {
52
+ "f1-score": 0.8461538461538461,
53
  "precision": 1.0,
54
+ "recall": 0.7333333333333333,
55
  "support": 15.0
56
  },
57
  "comparison": {
58
+ "f1-score": 0.8571428571428571,
59
+ "precision": 0.9230769230769231,
60
+ "recall": 0.8,
61
  "support": 15.0
62
  },
63
  "contact_sales": {
64
+ "f1-score": 0.9375,
65
+ "precision": 0.8823529411764706,
66
  "recall": 1.0,
67
  "support": 15.0
68
  },
69
  "deal_seeking": {
70
+ "f1-score": 0.9655172413793104,
71
+ "precision": 1.0,
72
  "recall": 0.9333333333333333,
73
  "support": 15.0
74
  },
75
  "download": {
76
+ "f1-score": 0.8666666666666667,
77
+ "precision": 0.8666666666666667,
78
+ "recall": 0.8666666666666667,
79
  "support": 15.0
80
  },
81
  "education": {
 
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
+ "f1-score": 0.9333333333333333,
95
+ "precision": 0.9333333333333333,
96
  "recall": 0.9333333333333333,
97
  "support": 15.0
98
  },
 
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
+ "f1-score": 0.8875885571005383,
107
+ "precision": 0.9016030130000718,
108
+ "recall": 0.8895061728395063,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
+ "f1-score": 0.8947368421052632,
113
+ "precision": 0.8095238095238095,
114
+ "recall": 1.0,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
+ "f1-score": 0.9655172413793104,
119
  "precision": 1.0,
120
+ "recall": 0.9333333333333333,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
124
+ "f1-score": 0.9696969696969697,
125
+ "precision": 0.9411764705882353,
126
+ "recall": 1.0,
127
  "support": 16.0
128
  },
129
  "purchase": {
130
+ "f1-score": 0.8888888888888888,
131
  "precision": 1.0,
132
+ "recall": 0.8,
133
  "support": 15.0
134
  },
135
  "signup": {
136
+ "f1-score": 0.9696969696969697,
137
+ "precision": 0.9411764705882353,
138
  "recall": 1.0,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
+ "f1-score": 0.8947368421052632,
143
+ "precision": 0.85,
144
  "recall": 0.9444444444444444,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
+ "f1-score": 0.7428571428571429,
149
+ "precision": 0.65,
150
+ "recall": 0.8666666666666667,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
+ "f1-score": 0.8883104280399479,
155
+ "precision": 0.9006650327445612,
156
+ "recall": 0.8916967509025271,
157
  "support": 277.0
158
  }
159
  },
artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
- comparison,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -15,5 +15,5 @@ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
- follow_up,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
+ follow_up,1,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_extended_cases_report.json CHANGED
@@ -2,16 +2,16 @@
2
  "accepted_accuracy": 0.8491,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8491,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.8146,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.6666666666666666,
14
- "precision": 0.6666666666666666,
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
@@ -29,9 +29,9 @@
29
  "support": 0.0
30
  },
31
  "comparison": {
32
- "f1-score": 0.6666666666666666,
33
  "precision": 1.0,
34
- "recall": 0.5,
35
  "support": 2.0
36
  },
37
  "contact_sales": {
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.8181818181818182,
45
- "precision": 0.6923076923076923,
46
  "recall": 1.0,
47
  "support": 9.0
48
  },
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 0.9333333333333333,
57
- "precision": 0.875,
58
  "recall": 1.0,
59
  "support": 7.0
60
  },
@@ -65,9 +65,9 @@
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.5,
69
- "precision": 1.0,
70
- "recall": 0.3333333333333333,
71
  "support": 3.0
72
  },
73
  "follow_up": {
@@ -77,9 +77,9 @@
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.4978114478114478,
81
- "precision": 0.531517094017094,
82
- "recall": 0.5092592592592592,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
@@ -89,8 +89,8 @@
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 1.0,
93
- "precision": 1.0,
94
  "recall": 1.0,
95
  "support": 6.0
96
  },
@@ -113,8 +113,8 @@
113
  "support": 0.0
114
  },
115
  "task_execution": {
116
- "f1-score": 1.0,
117
- "precision": 1.0,
118
  "recall": 1.0,
119
  "support": 1.0
120
  },
@@ -125,8 +125,8 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8404230989136648,
129
- "precision": 0.887215771649734,
130
  "recall": 0.8490566037735849,
131
  "support": 53.0
132
  }
 
2
  "accepted_accuracy": 0.8491,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8491,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.7764,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.8,
14
+ "precision": 1.0,
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
 
29
  "support": 0.0
30
  },
31
  "comparison": {
32
+ "f1-score": 1.0,
33
  "precision": 1.0,
34
+ "recall": 1.0,
35
  "support": 2.0
36
  },
37
  "contact_sales": {
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.9,
45
+ "precision": 0.8181818181818182,
46
  "recall": 1.0,
47
  "support": 9.0
48
  },
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.875,
57
+ "precision": 0.7777777777777778,
58
  "recall": 1.0,
59
  "support": 7.0
60
  },
 
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.0,
69
+ "precision": 0.0,
70
+ "recall": 0.0,
71
  "support": 3.0
72
  },
73
  "follow_up": {
 
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.474472286972287,
81
+ "precision": 0.46035754369087706,
82
+ "recall": 0.5185185185185186,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.9230769230769231,
93
+ "precision": 0.8571428571428571,
94
  "recall": 1.0,
95
  "support": 6.0
96
  },
 
113
  "support": 0.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.6666666666666666,
117
+ "precision": 0.5,
118
  "recall": 1.0,
119
  "support": 1.0
120
  },
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.823438668249989,
129
+ "precision": 0.8324076342944268,
130
  "recall": 0.8490566037735849,
131
  "support": 53.0
132
  }
artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv CHANGED
@@ -2,18 +2,18 @@
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,0,2,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0
16
- account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
18
  follow_up,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,11,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,2,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0
16
+ account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
18
  follow_up,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,11,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_hard_cases_report.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accepted_accuracy": 0.9468,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9468,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv",
6
  "count": 94,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.9191,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.8,
14
- "precision": 0.6666666666666666,
15
- "recall": 1.0,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.9468085106382979,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
@@ -41,9 +41,9 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 1.0,
45
  "precision": 1.0,
46
- "recall": 1.0,
47
  "support": 3.0
48
  },
49
  "download": {
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 1.0,
57
- "precision": 1.0,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
@@ -65,9 +65,9 @@
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.7272727272727273,
69
- "precision": 0.8,
70
- "recall": 0.6666666666666666,
71
  "support": 6.0
72
  },
73
  "follow_up": {
@@ -77,9 +77,9 @@
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.7659288023895194,
81
- "precision": 0.7648148148148147,
82
- "recall": 0.786111111111111,
83
  "support": 94.0
84
  },
85
  "onboarding_setup": {
@@ -89,26 +89,26 @@
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.8888888888888888,
93
- "precision": 0.8,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
98
- "f1-score": 0.9473684210526315,
99
- "precision": 1.0,
100
  "recall": 0.9,
101
  "support": 10.0
102
  },
103
  "purchase": {
104
- "f1-score": 1.0,
105
  "precision": 1.0,
106
- "recall": 1.0,
107
  "support": 3.0
108
  },
109
  "signup": {
110
- "f1-score": 1.0,
111
- "precision": 1.0,
112
  "recall": 1.0,
113
  "support": 3.0
114
  },
@@ -119,15 +119,15 @@
119
  "support": 1.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 0.8,
123
- "precision": 1.0,
124
  "recall": 0.6666666666666666,
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.9478016938458051,
129
- "precision": 0.9578014184397163,
130
- "recall": 0.9468085106382979,
131
  "support": 94.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.883,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.883,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv",
6
  "count": 94,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8137,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.5,
14
+ "precision": 0.5,
15
+ "recall": 0.5,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.8829787234042553,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.8,
45
  "precision": 1.0,
46
+ "recall": 0.6666666666666666,
47
  "support": 3.0
48
  },
49
  "download": {
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9666666666666667,
57
+ "precision": 0.9354838709677419,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
 
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.25,
69
+ "precision": 0.5,
70
+ "recall": 0.16666666666666666,
71
  "support": 6.0
72
  },
73
  "follow_up": {
 
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.6780983255239549,
81
+ "precision": 0.693301292494841,
82
+ "recall": 0.6935185185185184,
83
  "support": 94.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.8421052631578947,
93
+ "precision": 0.7272727272727273,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
98
+ "f1-score": 0.9,
99
+ "precision": 0.9,
100
  "recall": 0.9,
101
  "support": 10.0
102
  },
103
  "purchase": {
104
+ "f1-score": 0.8,
105
  "precision": 1.0,
106
+ "recall": 0.6666666666666666,
107
  "support": 3.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.8571428571428571,
111
+ "precision": 0.75,
112
  "recall": 1.0,
113
  "support": 3.0
114
  },
 
119
  "support": 1.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 0.6666666666666666,
123
+ "precision": 0.6666666666666666,
124
  "recall": 0.6666666666666666,
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8700694845346483,
129
+ "precision": 0.879757596555812,
130
+ "recall": 0.8829787234042553,
131
  "support": 94.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
@@ -10,10 +10,10 @@ purchase,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
- task_execution,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0
16
- account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
- follow_up,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
 
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
+ task_execution,0,0,0,0,0,0,1,0,0,0,0,5,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0
16
+ account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
+ follow_up,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_subtype_test_report.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accepted_accuracy": 0.9,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.863,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 1.0,
14
  "precision": 1.0,
15
- "recall": 1.0,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.9,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
@@ -29,8 +29,8 @@
29
  "support": 3.0
30
  },
31
  "comparison": {
32
- "f1-score": 1.0,
33
- "precision": 1.0,
34
  "recall": 1.0,
35
  "support": 3.0
36
  },
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 1.0,
57
- "precision": 1.0,
58
  "recall": 1.0,
59
  "support": 14.0
60
  },
@@ -65,9 +65,9 @@
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.4,
69
- "precision": 0.3333333333333333,
70
- "recall": 0.5,
71
  "support": 2.0
72
  },
73
  "follow_up": {
@@ -77,9 +77,9 @@
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.6712084293224644,
81
- "precision": 0.6671296296296296,
82
- "recall": 0.6908670033670034,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
@@ -89,9 +89,9 @@
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.875,
93
- "precision": 0.875,
94
- "recall": 0.875,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
@@ -107,15 +107,15 @@
107
  "support": 0.0
108
  },
109
  "signup": {
110
- "f1-score": 1.0,
111
- "precision": 1.0,
112
  "recall": 1.0,
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 1.0,
117
- "precision": 1.0,
118
- "recall": 1.0,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
@@ -125,9 +125,9 @@
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.9058084605453025,
129
- "precision": 0.9266666666666667,
130
- "recall": 0.9,
131
  "support": 70.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8714,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8714,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.7807,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.6666666666666666,
14
  "precision": 1.0,
15
+ "recall": 0.5,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.8714285714285714,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
 
29
  "support": 3.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.8571428571428571,
33
+ "precision": 0.75,
34
  "recall": 1.0,
35
  "support": 3.0
36
  },
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9655172413793104,
57
+ "precision": 0.9333333333333333,
58
  "recall": 1.0,
59
  "support": 14.0
60
  },
 
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.0,
69
+ "precision": 0.0,
70
+ "recall": 0.0,
71
  "support": 2.0
72
  },
73
  "follow_up": {
 
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.6071895459070292,
81
+ "precision": 0.6101851851851853,
82
+ "recall": 0.632996632996633,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 1.0,
93
+ "precision": 1.0,
94
+ "recall": 1.0,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
 
107
  "support": 0.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.8,
111
+ "precision": 0.6666666666666666,
112
  "recall": 1.0,
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8333333333333334,
117
+ "precision": 0.8333333333333334,
118
+ "recall": 0.8333333333333334,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
 
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8661227931749063,
129
+ "precision": 0.8835714285714285,
130
+ "recall": 0.8714285714285714,
131
  "support": 70.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,1,2,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,24,0,0,0,0,0,0,0,0,0,0,1,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0
10
- booking,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
- contact_sales,0,0,0,0,0,0,2,0,0,0,7,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0
14
- onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,0,0,0
16
- account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0
18
- follow_up,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,32,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,29,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4
  comparison,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,3,5,0,8,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,0,0,0,0,10,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,24,0,0,0,0,0,0,0,0,0,0,1,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,1,5,0,0,0,0,0,0,0,0,0,0
10
+ booking,0,0,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
+ contact_sales,0,0,0,0,0,0,2,0,1,0,6,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,1,0,0,0,16,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,9,2,0,2,0
16
+ account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,6,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0
18
+ follow_up,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,31,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
artifacts/evaluation/latest/intent_subtype_train_report.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accepted_accuracy": 0.9649,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9649,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.9649,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.9333333333333333,
14
- "precision": 0.875,
15
- "recall": 1.0,
16
  "support": 7.0
17
  },
18
- "accuracy": 0.9648562300319489,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
@@ -23,27 +23,27 @@
23
  "support": 6.0
24
  },
25
  "booking": {
26
- "f1-score": 1.0,
27
- "precision": 1.0,
28
- "recall": 1.0,
29
  "support": 5.0
30
  },
31
  "comparison": {
32
- "f1-score": 1.0,
33
- "precision": 1.0,
34
  "recall": 1.0,
35
  "support": 15.0
36
  },
37
  "contact_sales": {
38
- "f1-score": 0.875,
39
  "precision": 1.0,
40
- "recall": 0.7777777777777778,
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 1.0,
45
- "precision": 1.0,
46
- "recall": 1.0,
47
  "support": 11.0
48
  },
49
  "download": {
@@ -53,8 +53,8 @@
53
  "support": 8.0
54
  },
55
  "education": {
56
- "f1-score": 0.9904761904761905,
57
- "precision": 0.9811320754716981,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
@@ -65,69 +65,69 @@
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.9032258064516129,
69
  "precision": 1.0,
70
- "recall": 0.8235294117647058,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.927536231884058,
75
- "precision": 0.9696969696969697,
76
- "recall": 0.8888888888888888,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.9649442256020961,
81
- "precision": 0.9689347311202658,
82
- "recall": 0.9651818334171275,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 1.0,
87
  "precision": 1.0,
88
- "recall": 1.0,
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.96875,
93
- "precision": 0.9393939393939394,
94
- "recall": 1.0,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
98
- "f1-score": 0.9795918367346939,
99
- "precision": 1.0,
100
  "recall": 0.96,
101
  "support": 25.0
102
  },
103
  "purchase": {
104
- "f1-score": 1.0,
105
  "precision": 1.0,
106
- "recall": 1.0,
107
  "support": 6.0
108
  },
109
  "signup": {
110
- "f1-score": 0.9411764705882353,
111
- "precision": 0.8888888888888888,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.926829268292683,
117
- "precision": 0.8636363636363636,
118
  "recall": 1.0,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 0.9230769230769231,
123
- "precision": 0.9230769230769231,
124
- "recall": 0.9230769230769231,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.9643733669039578,
129
- "precision": 0.967429661617075,
130
- "recall": 0.9648562300319489,
131
  "support": 313.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.9042,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9042,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/subtype/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8791,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.8,
14
+ "precision": 0.75,
15
+ "recall": 0.8571428571428571,
16
  "support": 7.0
17
  },
18
+ "accuracy": 0.9041533546325878,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
 
23
  "support": 6.0
24
  },
25
  "booking": {
26
+ "f1-score": 0.6,
27
+ "precision": 0.6,
28
+ "recall": 0.6,
29
  "support": 5.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.967741935483871,
33
+ "precision": 0.9375,
34
  "recall": 1.0,
35
  "support": 15.0
36
  },
37
  "contact_sales": {
38
+ "f1-score": 0.8,
39
  "precision": 1.0,
40
+ "recall": 0.6666666666666666,
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.9090909090909091,
45
+ "precision": 0.9090909090909091,
46
+ "recall": 0.9090909090909091,
47
  "support": 11.0
48
  },
49
  "download": {
 
53
  "support": 8.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9719626168224299,
57
+ "precision": 0.9454545454545454,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
 
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.64,
69
  "precision": 1.0,
70
+ "recall": 0.47058823529411764,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.8732394366197183,
75
+ "precision": 0.8857142857142857,
76
+ "recall": 0.8611111111111112,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.8791291644367831,
81
+ "precision": 0.9052373525167643,
82
+ "recall": 0.8737167303612591,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 0.9696969696969697,
87
  "precision": 1.0,
88
+ "recall": 0.9411764705882353,
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.8923076923076924,
93
+ "precision": 0.8529411764705882,
94
+ "recall": 0.9354838709677419,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
98
+ "f1-score": 0.96,
99
+ "precision": 0.96,
100
  "recall": 0.96,
101
  "support": 25.0
102
  },
103
  "purchase": {
104
+ "f1-score": 0.9090909090909091,
105
  "precision": 1.0,
106
+ "recall": 0.8333333333333334,
107
  "support": 6.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.8648648648648649,
111
+ "precision": 0.7619047619047619,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8837209302325582,
117
+ "precision": 0.7916666666666666,
118
  "recall": 1.0,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 0.782608695652174,
123
+ "precision": 0.9,
124
+ "recall": 0.6923076923076923,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8996108171948927,
129
+ "precision": 0.9128919168596861,
130
+ "recall": 0.9041533546325878,
131
  "support": 313.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4
- comparison,0,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0
14
- onboarding_setup,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
18
- follow_up,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,10,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
18
+ follow_up,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,9,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_subtype_val_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.875,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.875,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.725,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
@@ -15,11 +15,11 @@
15
  "recall": 0.5,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.875,
19
  "billing_help": {
20
- "f1-score": 1.0,
21
- "precision": 1.0,
22
- "recall": 1.0,
23
  "support": 1.0
24
  },
25
  "booking": {
@@ -29,9 +29,9 @@
29
  "support": 3.0
30
  },
31
  "comparison": {
32
- "f1-score": 0.4,
33
  "precision": 1.0,
34
- "recall": 0.25,
35
  "support": 4.0
36
  },
37
  "contact_sales": {
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.5714285714285714,
45
- "precision": 0.4,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
@@ -65,32 +65,32 @@
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.6666666666666666,
69
- "precision": 0.5,
70
  "recall": 1.0,
71
  "support": 2.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.9523809523809523,
75
  "precision": 1.0,
76
- "recall": 0.9090909090909091,
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.6444203944203944,
81
- "precision": 0.6542087542087542,
82
- "recall": 0.687121212121212,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 0.8,
87
- "precision": 0.8,
88
- "recall": 0.8,
89
  "support": 5.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.9090909090909091,
93
- "precision": 0.9090909090909091,
94
  "recall": 0.9090909090909091,
95
  "support": 11.0
96
  },
@@ -101,9 +101,9 @@
101
  "support": 7.0
102
  },
103
  "purchase": {
104
- "f1-score": 0.0,
105
- "precision": 0.0,
106
- "recall": 0.0,
107
  "support": 2.0
108
  },
109
  "signup": {
@@ -113,8 +113,8 @@
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 1.0,
117
- "precision": 1.0,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
@@ -125,9 +125,9 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8644047619047619,
129
- "precision": 0.8891666666666665,
130
- "recall": 0.875,
131
  "support": 80.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.9,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/subtype/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.7496,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
 
15
  "recall": 0.5,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.9,
19
  "billing_help": {
20
+ "f1-score": 0.0,
21
+ "precision": 0.0,
22
+ "recall": 0.0,
23
  "support": 1.0
24
  },
25
  "booking": {
 
29
  "support": 3.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.8571428571428571,
33
  "precision": 1.0,
34
+ "recall": 0.75,
35
  "support": 4.0
36
  },
37
  "contact_sales": {
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.6666666666666666,
45
+ "precision": 0.5,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
 
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.8,
69
+ "precision": 0.6666666666666666,
70
  "recall": 1.0,
71
  "support": 2.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.9,
75
  "precision": 1.0,
76
+ "recall": 0.8181818181818182,
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.6662846956964604,
81
+ "precision": 0.6697530864197531,
82
+ "recall": 0.6931818181818182,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 0.9090909090909091,
87
+ "precision": 0.8333333333333334,
88
+ "recall": 1.0,
89
  "support": 5.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.9523809523809523,
93
+ "precision": 1.0,
94
  "recall": 0.9090909090909091,
95
  "support": 11.0
96
  },
 
101
  "support": 7.0
102
  },
103
  "purchase": {
104
+ "f1-score": 0.6666666666666666,
105
+ "precision": 1.0,
106
+ "recall": 0.5,
107
  "support": 2.0
108
  },
109
  "signup": {
 
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.9411764705882353,
117
+ "precision": 0.8888888888888888,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8968286860198624,
129
+ "precision": 0.9118055555555555,
130
+ "recall": 0.9,
131
  "support": 80.0
132
  }
133
  },
artifacts/evaluation/latest/intent_type_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -7,5 +7,5 @@ support,0,0,0,0,15,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,15,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,15,0,0,0
9
  chit_chat,0,0,0,0,0,1,0,14,0,0
10
- ambiguous,0,0,0,0,0,0,0,0,15,0
11
  prohibited,0,0,0,0,1,0,0,0,0,14
 
7
  personal_reflection,0,0,0,0,0,15,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,15,0,0,0
9
  chit_chat,0,0,0,0,0,1,0,14,0,0
10
+ ambiguous,1,0,0,0,0,0,0,0,14,0
11
  prohibited,0,0,0,0,1,0,0,0,0,14
artifacts/evaluation/latest/intent_type_difficulty_benchmark_report.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "accepted_accuracy": 0.9867,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9867,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 150,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/intent_type_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 1.0,
@@ -23,23 +23,23 @@
23
  "macro_f1": 0.9596
24
  },
25
  "medium": {
26
- "accepted_accuracy": 1.0,
27
  "accepted_coverage": 1.0,
28
- "accuracy": 1.0,
29
  "count": 50,
30
  "fallback_rate": 0.0,
31
- "macro_f1": 1.0
32
  }
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "intent_type",
36
- "macro_f1": 0.9867,
37
  "per_class_metrics": {
38
- "accuracy": 0.9866666666666667,
39
  "ambiguous": {
40
- "f1-score": 1.0,
41
  "precision": 1.0,
42
- "recall": 1.0,
43
  "support": 15.0
44
  },
45
  "chit_chat": {
@@ -67,15 +67,15 @@
67
  "support": 15.0
68
  },
69
  "informational": {
70
- "f1-score": 1.0,
71
- "precision": 1.0,
72
  "recall": 1.0,
73
  "support": 15.0
74
  },
75
  "macro avg": {
76
- "f1-score": 0.9866518353726363,
77
- "precision": 0.9875,
78
- "recall": 0.9866666666666667,
79
  "support": 150.0
80
  },
81
  "personal_reflection": {
@@ -103,9 +103,9 @@
103
  "support": 15.0
104
  },
105
  "weighted avg": {
106
- "f1-score": 0.9866518353726362,
107
- "precision": 0.9875,
108
- "recall": 0.9866666666666667,
109
  "support": 150.0
110
  }
111
  },
 
1
  {
2
+ "accepted_accuracy": 0.98,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.98,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 150,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/intent_type_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 1.0,
 
23
  "macro_f1": 0.9596
24
  },
25
  "medium": {
26
+ "accepted_accuracy": 0.98,
27
  "accepted_coverage": 1.0,
28
+ "accuracy": 0.98,
29
  "count": 50,
30
  "fallback_rate": 0.0,
31
+ "macro_f1": 0.9798
32
  }
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "intent_type",
36
+ "macro_f1": 0.98,
37
  "per_class_metrics": {
38
+ "accuracy": 0.98,
39
  "ambiguous": {
40
+ "f1-score": 0.9655172413793104,
41
  "precision": 1.0,
42
+ "recall": 0.9333333333333333,
43
  "support": 15.0
44
  },
45
  "chit_chat": {
 
67
  "support": 15.0
68
  },
69
  "informational": {
70
+ "f1-score": 0.967741935483871,
71
+ "precision": 0.9375,
72
  "recall": 1.0,
73
  "support": 15.0
74
  },
75
  "macro avg": {
76
+ "f1-score": 0.9799777530589543,
77
+ "precision": 0.98125,
78
+ "recall": 0.9800000000000001,
79
  "support": 150.0
80
  },
81
  "personal_reflection": {
 
103
  "support": 15.0
104
  },
105
  "weighted avg": {
106
+ "f1-score": 0.9799777530589544,
107
+ "precision": 0.98125,
108
+ "recall": 0.98,
109
  "support": 150.0
110
  }
111
  },
artifacts/evaluation/latest/intent_type_hard_cases_report.json CHANGED
@@ -2,9 +2,9 @@
2
  "accepted_accuracy": 1.0,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 1.0,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
 
2
  "accepted_accuracy": 1.0,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 1.0,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv CHANGED
@@ -1,11 +1,11 @@
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
- commercial,1,0,9,0,0,0,0,0,0,0
5
- transactional,0,0,0,8,0,0,0,0,0,0
6
  support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
- creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
  ambiguous,1,0,1,0,0,0,0,0,7,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
 
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
+ commercial,0,0,10,0,0,0,0,0,0,0
5
+ transactional,0,0,0,7,0,0,1,0,0,0
6
  support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
+ creative_generation,0,0,0,1,0,0,0,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
  ambiguous,1,0,1,0,0,0,0,0,7,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
artifacts/evaluation/latest/intent_type_test_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9149,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9149,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv",
6
  "count": 47,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 0.9131,
11
  "per_class_metrics": {
12
- "accuracy": 0.9148936170212766,
13
  "ambiguous": {
14
  "f1-score": 0.875,
15
  "precision": 1.0,
@@ -23,15 +23,15 @@
23
  "support": 1.0
24
  },
25
  "commercial": {
26
- "f1-score": 0.9,
27
- "precision": 0.9,
28
- "recall": 0.9,
29
  "support": 10.0
30
  },
31
  "creative_generation": {
32
- "f1-score": 1.0,
33
- "precision": 1.0,
34
- "recall": 1.0,
35
  "support": 1.0
36
  },
37
  "exploratory": {
@@ -41,15 +41,15 @@
41
  "support": 1.0
42
  },
43
  "informational": {
44
- "f1-score": 0.8888888888888888,
45
- "precision": 0.8,
46
  "recall": 1.0,
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
- "f1-score": 0.9130555555555555,
51
- "precision": 0.9199999999999999,
52
- "recall": 0.9344444444444445,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
@@ -71,15 +71,15 @@
71
  "support": 3.0
72
  },
73
  "transactional": {
74
- "f1-score": 1.0,
75
- "precision": 1.0,
76
- "recall": 1.0,
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 0.916016548463357,
81
- "precision": 0.9340425531914893,
82
- "recall": 0.9148936170212766,
83
  "support": 47.0
84
  }
85
  },
 
1
  {
2
+ "accepted_accuracy": 0.8936,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8936,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv",
6
  "count": 47,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
+ "macro_f1": 0.811,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8936170212765957,
13
  "ambiguous": {
14
  "f1-score": 0.875,
15
  "precision": 1.0,
 
23
  "support": 1.0
24
  },
25
  "commercial": {
26
+ "f1-score": 0.9523809523809523,
27
+ "precision": 0.9090909090909091,
28
+ "recall": 1.0,
29
  "support": 10.0
30
  },
31
  "creative_generation": {
32
+ "f1-score": 0.0,
33
+ "precision": 0.0,
34
+ "recall": 0.0,
35
  "support": 1.0
36
  },
37
  "exploratory": {
 
41
  "support": 1.0
42
  },
43
  "informational": {
44
+ "f1-score": 0.9411764705882353,
45
+ "precision": 0.8888888888888888,
46
  "recall": 1.0,
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.8110224089635854,
51
+ "precision": 0.8172979797979798,
52
+ "recall": 0.8319444444444443,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
 
71
  "support": 3.0
72
  },
73
  "transactional": {
74
+ "f1-score": 0.875,
75
+ "precision": 0.875,
76
+ "recall": 0.875,
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.893508254365576,
81
+ "precision": 0.9085536213195787,
82
+ "recall": 0.8936170212765957,
83
  "support": 47.0
84
  }
85
  },
artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv CHANGED
@@ -1,11 +1,11 @@
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,0,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
- commercial,1,0,11,0,0,0,0,0,0,0
5
  transactional,0,0,0,0,0,0,0,0,0,0
6
  support,0,0,0,0,0,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,0,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
- ambiguous,1,0,1,0,0,0,0,0,8,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
 
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,0,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
+ commercial,0,0,12,0,0,0,0,0,0,0
5
  transactional,0,0,0,0,0,0,0,0,0,0
6
  support,0,0,0,0,0,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,0,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
+ ambiguous,1,0,2,0,0,0,0,0,7,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
artifacts/evaluation/latest/intent_type_third_wave_cases_report.json CHANGED
@@ -2,18 +2,18 @@
2
  "accepted_accuracy": 0.8846,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8846,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv",
6
  "count": 26,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/third_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 0.8294,
11
  "per_class_metrics": {
12
  "accuracy": 0.8846153846153846,
13
  "ambiguous": {
14
- "f1-score": 0.8888888888888888,
15
  "precision": 1.0,
16
- "recall": 0.8,
17
  "support": 10.0
18
  },
19
  "chit_chat": {
@@ -23,9 +23,9 @@
23
  "support": 1.0
24
  },
25
  "commercial": {
26
- "f1-score": 0.9166666666666666,
27
- "precision": 0.9166666666666666,
28
- "recall": 0.9166666666666666,
29
  "support": 12.0
30
  },
31
  "creative_generation": {
@@ -47,9 +47,9 @@
47
  "support": 0.0
48
  },
49
  "macro avg": {
50
- "f1-score": 0.5805555555555555,
51
- "precision": 0.5916666666666666,
52
- "recall": 0.5716666666666667,
53
  "support": 26.0
54
  },
55
  "personal_reflection": {
@@ -77,8 +77,8 @@
77
  "support": 0.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 0.9188034188034189,
81
- "precision": 0.9615384615384616,
82
  "recall": 0.8846153846153846,
83
  "support": 26.0
84
  }
 
2
  "accepted_accuracy": 0.8846,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8846,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv",
6
  "count": 26,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/third_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
+ "macro_f1": 0.8209,
11
  "per_class_metrics": {
12
  "accuracy": 0.8846153846153846,
13
  "ambiguous": {
14
+ "f1-score": 0.8235294117647058,
15
  "precision": 1.0,
16
+ "recall": 0.7,
17
  "support": 10.0
18
  },
19
  "chit_chat": {
 
23
  "support": 1.0
24
  },
25
  "commercial": {
26
+ "f1-score": 0.9230769230769231,
27
+ "precision": 0.8571428571428571,
28
+ "recall": 1.0,
29
  "support": 12.0
30
  },
31
  "creative_generation": {
 
47
  "support": 0.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.5746606334841629,
51
+ "precision": 0.5857142857142857,
52
+ "recall": 0.5700000000000001,
53
  "support": 26.0
54
  },
55
  "personal_reflection": {
 
77
  "support": 0.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.8966237382526975,
81
+ "precision": 0.9340659340659341,
82
  "recall": 0.8846153846153846,
83
  "support": 26.0
84
  }
artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv CHANGED
@@ -2,7 +2,7 @@
2
  informational,38,0,0,0,0,0,0,0,0,0
3
  exploratory,0,5,0,0,0,0,0,0,0,0
4
  commercial,0,0,36,0,0,0,0,0,0,0
5
- transactional,0,0,0,28,0,0,0,0,0,0
6
  support,0,0,0,0,10,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,20,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,5,0,0,0
 
2
  informational,38,0,0,0,0,0,0,0,0,0
3
  exploratory,0,5,0,0,0,0,0,0,0,0
4
  commercial,0,0,36,0,0,0,0,0,0,0
5
+ transactional,0,0,0,27,0,0,1,0,0,0
6
  support,0,0,0,0,10,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,20,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,5,0,0,0
artifacts/evaluation/latest/intent_type_train_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 1.0,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 1.0,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv",
6
  "count": 183,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 1.0,
11
  "per_class_metrics": {
12
- "accuracy": 1.0,
13
  "ambiguous": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -29,8 +29,8 @@
29
  "support": 36.0
30
  },
31
  "creative_generation": {
32
- "f1-score": 1.0,
33
- "precision": 1.0,
34
  "recall": 1.0,
35
  "support": 5.0
36
  },
@@ -47,9 +47,9 @@
47
  "support": 38.0
48
  },
49
  "macro avg": {
50
- "f1-score": 1.0,
51
- "precision": 1.0,
52
- "recall": 1.0,
53
  "support": 183.0
54
  },
55
  "personal_reflection": {
@@ -71,15 +71,15 @@
71
  "support": 10.0
72
  },
73
  "transactional": {
74
- "f1-score": 1.0,
75
  "precision": 1.0,
76
- "recall": 1.0,
77
  "support": 28.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 1.0,
81
- "precision": 1.0,
82
- "recall": 1.0,
83
  "support": 183.0
84
  }
85
  },
 
1
  {
2
+ "accepted_accuracy": 0.9945,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9945,
5
+ "confusion_matrix_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv",
6
  "count": 183,
7
+ "dataset_path": "/root/.cache/huggingface/hub/models--admesh--agentic-intent-classifier/snapshots/0584798f8efee6beccd778b0afa06782ab5add60/agentic-intent-classifier/agentic-intent-classifier/data/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
+ "macro_f1": 0.9891,
11
  "per_class_metrics": {
12
+ "accuracy": 0.994535519125683,
13
  "ambiguous": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
29
  "support": 36.0
30
  },
31
  "creative_generation": {
32
+ "f1-score": 0.9090909090909091,
33
+ "precision": 0.8333333333333334,
34
  "recall": 1.0,
35
  "support": 5.0
36
  },
 
47
  "support": 38.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.989090909090909,
51
+ "precision": 0.9833333333333334,
52
+ "recall": 0.9964285714285716,
53
  "support": 183.0
54
  },
55
  "personal_reflection": {
 
71
  "support": 10.0
72
  },
73
  "transactional": {
74
+ "f1-score": 0.9818181818181818,
75
  "precision": 1.0,
76
+ "recall": 0.9642857142857143,
77
  "support": 28.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.9947342275211128,
81
+ "precision": 0.9954462659380693,
82
+ "recall": 0.994535519125683,
83
  "support": 183.0
84
  }
85
  },