manikumargouni commited on
Commit
1519226
·
verified ·
1 Parent(s): f0d902a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. README.md +440 -156
  3. artifacts/calibration/decision_phase.json +16 -16
  4. artifacts/calibration/iab_content.json +14 -14
  5. artifacts/calibration/intent_subtype.json +17 -17
  6. artifacts/calibration/intent_type.json +14 -14
  7. artifacts/evaluation/latest/combined_demo_benchmark.json +165 -222
  8. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv +2 -2
  9. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json +24 -24
  10. artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv +2 -2
  11. artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json +19 -19
  12. artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv +1 -1
  13. artifacts/evaluation/latest/decision_phase_hard_cases_report.json +16 -16
  14. artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv +2 -2
  15. artifacts/evaluation/latest/decision_phase_test_report.json +20 -20
  16. artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv +4 -4
  17. artifacts/evaluation/latest/decision_phase_train_report.json +22 -22
  18. artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv +1 -1
  19. artifacts/evaluation/latest/decision_phase_val_report.json +17 -17
  20. artifacts/evaluation/latest/iab_behavior_lock_regression.json +173 -51
  21. artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json +69 -64
  22. artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json +69 -64
  23. artifacts/evaluation/latest/iab_content_extended_cases_report.json +35 -30
  24. artifacts/evaluation/latest/iab_content_hard_cases_report.json +42 -42
  25. artifacts/evaluation/latest/iab_content_test_report.json +39 -34
  26. artifacts/evaluation/latest/iab_content_train_report.json +46 -50
  27. artifacts/evaluation/latest/iab_content_val_report.json +46 -50
  28. artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json +0 -0
  29. artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json +433 -616
  30. artifacts/evaluation/latest/iab_quality_target_eval.json +60 -68
  31. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv +15 -15
  32. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json +72 -72
  33. artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv +3 -3
  34. artifacts/evaluation/latest/intent_subtype_extended_cases_report.json +23 -23
  35. artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv +2 -2
  36. artifacts/evaluation/latest/intent_subtype_hard_cases_report.json +23 -23
  37. artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv +4 -4
  38. artifacts/evaluation/latest/intent_subtype_test_report.json +29 -29
  39. artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv +10 -10
  40. artifacts/evaluation/latest/intent_subtype_train_report.json +47 -47
  41. artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv +3 -3
  42. artifacts/evaluation/latest/intent_subtype_val_report.json +21 -21
  43. artifacts/evaluation/latest/intent_type_difficulty_benchmark_report.json +2 -2
  44. artifacts/evaluation/latest/intent_type_hard_cases_report.json +2 -2
  45. artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv +1 -1
  46. artifacts/evaluation/latest/intent_type_test_report.json +17 -17
  47. artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv +2 -2
  48. artifacts/evaluation/latest/intent_type_third_wave_cases_report.json +13 -13
  49. artifacts/evaluation/latest/intent_type_train_report.json +4 -4
  50. artifacts/evaluation/latest/intent_type_val_confusion_matrix.csv +3 -3
.gitignore CHANGED
@@ -7,3 +7,4 @@ subtype_model_output/
7
  iab_model_output/
8
  */model.safetensors
9
  iab_hierarchy_model_output/
 
 
7
  iab_model_output/
8
  */model.safetensors
9
  iab_hierarchy_model_output/
10
+ model.safetensors
README.md CHANGED
@@ -1,208 +1,492 @@
1
- ---
2
- language:
3
- - en
4
- library_name: transformers
5
- pipeline_tag: text-classification
6
- base_model: distilbert-base-uncased
7
- metrics:
8
- - accuracy
9
- - f1
10
- tags:
11
- - intent-classification
12
- - multitask
13
- - iab
14
- - conversational-ai
15
- - adtech
16
- - calibrated-confidence
17
- license: apache-2.0
18
- ---
19
-
20
- # admesh/agentic-intent-classifier
21
-
22
- Production-ready intent + IAB classifier bundle for conversational traffic.
23
-
24
- Combines multitask intent modeling, supervised IAB content classification, and per-head confidence calibration to support safe monetization decisions in real time.
25
-
26
- ## Links
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- - Hugging Face: https://huggingface.co/admesh/agentic-intent-classifier
29
- - GitHub: https://github.com/GouniManikumar12/agentic-intent-classifier
30
-
31
- ## What It Predicts
32
 
33
- | Field | Description |
34
- |---|---|
35
- | `intent.type` | `commercial`, `informational`, `navigational`, `transactional`, … |
36
- | `intent.subtype` | `product_discovery`, `comparison`, `how_to`, … |
37
- | `intent.decision_phase` | `awareness`, `consideration`, `decision`, … |
38
- | `iab_content` | IAB Content Taxonomy 3.0 tier1 / tier2 / tier3 labels |
39
- | `component_confidence` | Per-head calibrated confidence with threshold flags |
40
- | `system_decision` | Monetization eligibility, opportunity type, policy |
41
 
42
- ---
 
 
43
 
44
- ## Deployment Options
 
 
 
 
45
 
46
- ### 1. `transformers.pipeline()` one line anywhere
47
 
48
  ```python
49
- from transformers import pipeline
50
 
51
- clf = pipeline(
52
- "admesh-intent",
53
- model="admesh/agentic-intent-classifier",
54
- trust_remote_code=True,
55
- )
56
-
57
- result = clf("Which laptop should I buy for college?")
58
  ```
59
 
60
- Batch and custom thresholds:
61
 
62
  ```python
63
- # batch
64
  results = clf([
65
  "Best running shoes under $100",
66
- "How does TCP work?",
67
  "Buy noise-cancelling headphones",
68
  ])
69
 
70
- # custom confidence thresholds
71
  result = clf(
72
- "Buy headphones",
73
  threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
74
  )
75
  ```
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  ---
78
 
79
- ### 2. HF Inference Endpoints (managed, deploy to AWS / Azure / GCP)
80
 
81
- 1. Go to https://ui.endpoints.huggingface.co
82
- 2. **New Endpoint** → select `admesh/agentic-intent-classifier`
83
- 3. Framework: **PyTorch** — Task: **Text Classification**
84
- 4. Enable **"Load with trust_remote_code"**
85
- 5. Deploy
 
 
86
 
87
- The endpoint serves the same `pipeline()` interface above via REST:
88
 
89
  ```bash
90
- curl https://<your-endpoint>.endpoints.huggingface.cloud \
91
- -H "Authorization: Bearer $HF_TOKEN" \
92
- -H "Content-Type: application/json" \
93
- -d '{"inputs": "Which laptop should I buy for college?"}'
94
  ```
95
 
96
- ---
 
 
 
 
 
97
 
98
- ### 3. HF Spaces (Gradio / Streamlit demo)
99
 
100
- ```python
101
- # app.py for a Gradio Space
102
- import gradio as gr
103
- from transformers import pipeline
104
-
105
- clf = pipeline(
106
- "admesh-intent",
107
- model="admesh/agentic-intent-classifier",
108
- trust_remote_code=True,
109
- )
110
 
111
- def classify(text):
112
- return clf(text)
113
 
114
- gr.Interface(fn=classify, inputs="text", outputs="json").launch()
 
 
 
115
  ```
116
 
117
- ---
118
 
119
- ### 4. Local / notebook via `snapshot_download`
 
 
 
120
 
121
- ```python
122
- import sys
123
- from huggingface_hub import snapshot_download
124
 
125
- local_dir = snapshot_download(
126
- repo_id="admesh/agentic-intent-classifier",
127
- repo_type="model",
128
- )
129
- sys.path.insert(0, local_dir)
130
 
131
- from pipeline import AdmeshIntentPipeline
132
- clf = AdmeshIntentPipeline()
133
- result = clf("I need a CRM for a 5-person startup")
 
 
 
 
134
  ```
135
 
136
- Or the one-liner factory:
137
 
138
- ```python
139
- from pipeline import AdmeshIntentPipeline
140
- clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
 
 
 
 
 
 
 
 
 
 
141
  ```
142
 
143
- ---
144
 
145
- ## Example Output
146
-
147
- ```json
148
- {
149
- "model_output": {
150
- "classification": {
151
- "iab_content": {
152
- "taxonomy": "IAB Content Taxonomy",
153
- "taxonomy_version": "3.0",
154
- "tier1": {"id": "552", "label": "Style & Fashion"},
155
- "tier2": {"id": "579", "label": "Men's Fashion"},
156
- "mapping_mode": "exact",
157
- "mapping_confidence": 0.73
158
- },
159
- "intent": {
160
- "type": "commercial",
161
- "subtype": "product_discovery",
162
- "decision_phase": "consideration",
163
- "confidence": 0.9549,
164
- "commercial_score": 0.656
165
- }
166
- }
167
- },
168
- "system_decision": {
169
- "policy": {
170
- "monetization_eligibility": "allowed_with_caution",
171
- "eligibility_reason": "commercial_discovery_signal_present"
172
- },
173
- "opportunity": {"type": "soft_recommendation", "strength": "medium"}
174
- },
175
- "meta": {
176
- "system_version": "0.6.0-phase4",
177
- "calibration_enabled": true,
178
- "iab_mapping_is_placeholder": false
179
- }
180
- }
181
- ```
182
-
183
- ## Reproducible Revision
184
 
185
- ```python
186
- from huggingface_hub import snapshot_download
187
- local_dir = snapshot_download(
188
- repo_id="admesh/agentic-intent-classifier",
189
- repo_type="model",
190
- revision="0584798f8efee6beccd778b0afa06782ab5add60",
191
- )
192
  ```
193
 
194
- ## Included Artifacts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- | Path | Contents |
197
- |---|---|
198
- | `multitask_intent_model_output/` | DistilBERT multitask weights + tokenizer |
199
- | `iab_classifier_model_output/` | IAB content classifier weights + tokenizer |
200
- | `artifacts/calibration/` | Per-head temperature + threshold JSONs |
201
- | `pipeline.py` | `AdmeshIntentPipeline` (transformers.Pipeline subclass) |
202
- | `combined_inference.py` | Core inference logic |
203
 
204
- ## Notes
205
 
206
- - `trust_remote_code=True` is required because this model uses a custom multi-head architecture that does not map to a single standard `AutoModel` checkpoint.
207
- - `meta.iab_mapping_is_placeholder: true` means IAB artifacts were missing or skipped; train and calibrate IAB for full production accuracy.
208
- - For long-running servers, instantiate once and reuse — models are cached in memory after the first call.
 
 
1
+ # Agentic Intent Classifier
2
+
3
+ `agentic-intent-classifier` is a multi-head query classification stack for conversational traffic.
4
+
5
+ It currently produces:
6
+
7
+ - `intent.type`
8
+ - `intent.subtype`
9
+ - `intent.decision_phase`
10
+ - `iab_content`
11
+ - calibrated confidence per head
12
+ - combined fallback / policy / opportunity decisions
13
+
14
+ The repo is beyond the original v0.1 baseline. It now includes:
15
+
16
+ - shared config and label ownership
17
+ - reusable model runtime
18
+ - calibrated confidence and threshold gating
19
+ - combined inference with fallback/policy logic
20
+ - request/response validation in the demo API
21
+ - repeatable evaluation and regression suites
22
+ - full-TSV IAB taxonomy retrieval support through tier4
23
+ - a local embedding index for taxonomy-node retrieval over IAB content paths
24
+ - a separate synthetic full-intent-taxonomy augmentation dataset for non-IAB heads
25
+ - a dedicated intent-type difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
26
+ - a dedicated decision-phase difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
27
+
28
+ Generated model weights are intentionally not committed.
29
+
30
+ ## Current Taxonomy
31
+
32
+ ### `intent.type`
33
+
34
+ - `informational`
35
+ - `exploratory`
36
+ - `commercial`
37
+ - `transactional`
38
+ - `support`
39
+ - `personal_reflection`
40
+ - `creative_generation`
41
+ - `chit_chat`
42
+ - `ambiguous`
43
+ - `prohibited`
44
+
45
+ ### `intent.decision_phase`
46
+
47
+ - `awareness`
48
+ - `research`
49
+ - `consideration`
50
+ - `decision`
51
+ - `action`
52
+ - `post_purchase`
53
+ - `support`
54
+
55
+ ### `intent.subtype`
56
+
57
+ - `education`
58
+ - `product_discovery`
59
+ - `comparison`
60
+ - `evaluation`
61
+ - `deal_seeking`
62
+ - `provider_selection`
63
+ - `signup`
64
+ - `purchase`
65
+ - `booking`
66
+ - `download`
67
+ - `contact_sales`
68
+ - `task_execution`
69
+ - `onboarding_setup`
70
+ - `troubleshooting`
71
+ - `account_help`
72
+ - `billing_help`
73
+ - `follow_up`
74
+ - `emotional_reflection`
75
+
76
+ ### `iab_content`
77
+
78
+ - candidates are derived from every row in [data/iab-content/Content Taxonomy 3.0.tsv](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab-content/Content%20Taxonomy%203.0.tsv)
79
+ - retrieval output supports `tier1`, `tier2`, `tier3`, and optional `tier4`
80
+
81
+ ## What The System Does
82
+
83
+ - runs three classifier heads:
84
+ - `intent_type`
85
+ - `intent_subtype`
86
+ - `decision_phase`
87
+ - resolves `iab_content` through a local embedding index over taxonomy nodes plus generic label/path reranking
88
+ - applies calibration artifacts when present
89
+ - computes `commercial_score`
90
+ - applies fallback when confidence is too weak or policy-safe blocking is required
91
+ - emits a schema-validated combined envelope
92
+
93
+ ## What The System Does Not Do
94
+
95
+ - it is not a multi-turn memory system
96
+ - it is not a production-optimized low-latency serving path
97
+ - it is not yet trained on large real-traffic human-labeled intent data
98
+ - combined decision logic is still heuristic, even though it is materially stronger than the original baseline
99
+
100
+ ## Project Layout
101
+
102
+ - [config.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/config.py): labels, thresholds, artifact paths, model paths
103
+ - [model_runtime.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/model_runtime.py): shared calibrated inference runtime
104
+ - [combined_inference.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/combined_inference.py): composed system response
105
+ - [inference_intent_type.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_intent_type.py): direct `intent_type` inference entrypoint
106
+ - [inference_iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_iab_classifier.py): direct supervised `iab_content` inference entrypoint
107
+ - [schemas.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/schemas.py): request/response validation
108
+ - [demo_api.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/demo_api.py): local validated API
109
+ - [iab_taxonomy.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_taxonomy.py): full taxonomy parser/index
110
+ - [iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_classifier.py): supervised IAB runtime with taxonomy-aware parent fallback
111
+ - [iab_retrieval.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_retrieval.py): optional shadow retrieval baseline
112
+ - [training/build_full_intent_taxonomy_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_full_intent_taxonomy_dataset.py): separate synthetic intent augmentation dataset
113
+ - [training/build_intent_type_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_intent_type_difficulty_dataset.py): extra `intent_type` augmentation plus held-out difficulty benchmark
114
+ - [training/build_decision_phase_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_decision_phase_difficulty_dataset.py): extra `decision_phase` augmentation plus held-out difficulty benchmark
115
+ - [training/build_subtype_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_difficulty_dataset.py): extra `intent_subtype` augmentation plus held-out difficulty benchmark
116
+ - [training/build_subtype_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_dataset.py): subtype dataset generation from existing corpora
117
+ - [training/train_iab.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/train_iab.py): train the supervised IAB classifier head
118
+ - [training/build_iab_taxonomy_embeddings.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_iab_taxonomy_embeddings.py): build local IAB node embedding artifacts
119
+ - [training/run_full_training_pipeline.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/run_full_training_pipeline.py): full multi-head training/calibration/eval pipeline
120
+ - [evaluation/run_evaluation.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_evaluation.py): repeatable benchmark runner
121
+ - [evaluation/run_regression_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_regression_suite.py): known-failure regression runner
122
+ - [evaluation/run_iab_mapping_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_mapping_suite.py): IAB behavior-lock regression runner
123
+ - [evaluation/run_iab_quality_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_quality_suite.py): curated IAB quality-target runner
124
+ - [known_limitations.md](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/known_limitations.md): current gaps and caveats
125
+
126
+ ## Quickstart: Run From Hugging Face
127
+
128
+ Download the trained bundle and run inference in three lines — no local training required.
129
 
130
+ ```python
131
+ import sys
132
+ from huggingface_hub import snapshot_download
 
133
 
134
+ # Download the full bundle (models + calibration + code)
135
+ local_dir = snapshot_download(
136
+ repo_id="admesh/agentic-intent-classifier",
137
+ repo_type="model",
138
+ )
139
+ sys.path.insert(0, local_dir)
 
 
140
 
141
+ # Import and instantiate
142
+ from pipeline import AdmeshIntentPipeline
143
+ clf = AdmeshIntentPipeline()
144
 
145
+ # Classify
146
+ import json
147
+ result = clf("Which laptop should I buy for college?")
148
+ print(json.dumps(result, indent=2))
149
+ ```
150
 
151
+ Or use the one-liner factory method:
152
 
153
  ```python
154
+ from pipeline import AdmeshIntentPipeline # after sys.path.insert above
155
 
156
+ clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
157
+ result = clf("I need a CRM for a 5-person startup")
 
 
 
 
 
158
  ```
159
 
160
+ Batch mode and custom thresholds are also supported:
161
 
162
  ```python
163
+ # Batch
164
  results = clf([
165
  "Best running shoes under $100",
166
+ "How does gradient descent work?",
167
  "Buy noise-cancelling headphones",
168
  ])
169
 
170
+ # Custom confidence thresholds
171
  result = clf(
172
+ "Buy noise-cancelling headphones",
173
  threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
174
  )
175
  ```
176
 
177
+ Verify artifacts and run a smoke test from the CLI:
178
+
179
+ ```bash
180
+ cd "<local_dir>"
181
+ python3 training/pipeline_verify.py
182
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
183
+ ```
184
+
185
+ Pin a specific revision for reproducibility:
186
+
187
+ ```python
188
+ local_dir = snapshot_download(
189
+ repo_id="admesh/agentic-intent-classifier",
190
+ repo_type="model",
191
+ revision="0584798f8efee6beccd778b0afa06782ab5add60",
192
+ )
193
+ ```
194
+
195
  ---
196
 
197
+ ## Setup (for local training)
198
 
199
+ ```bash
200
+ python3 -m venv .venv
201
+ source .venv/bin/activate
202
+ pip install -r agentic-intent-classifier/requirements.txt
203
+ ```
204
+
205
+ ## Inference (local training path)
206
 
207
+ Run one query locally:
208
 
209
  ```bash
210
+ cd agentic-intent-classifier
211
+ python3 training/train_iab.py
212
+ python3 training/calibrate_confidence.py --head iab_content
213
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
214
  ```
215
 
216
+ Run only the `intent_type` head:
217
+
218
+ ```bash
219
+ cd agentic-intent-classifier
220
+ python3 inference_intent_type.py "best shoes under 100"
221
+ ```
222
 
223
+ Run the demo API:
224
 
225
+ ```bash
226
+ cd agentic-intent-classifier
227
+ python3 demo_api.py
228
+ ```
 
 
 
 
 
 
229
 
230
+ Example request:
 
231
 
232
+ ```bash
233
+ curl -sS -X POST http://127.0.0.1:8008/classify \
234
+ -H 'Content-Type: application/json' \
235
+ -d '{"text":"I cannot log into my account"}'
236
  ```
237
 
238
+ Infra endpoints:
239
 
240
+ ```bash
241
+ curl -sS http://127.0.0.1:8008/health
242
+ curl -sS http://127.0.0.1:8008/version
243
+ ```
244
 
245
+ Train only the IAB classifier head:
 
 
246
 
247
+ ```bash
248
+ cd agentic-intent-classifier
249
+ python3 training/train_iab.py
250
+ python3 training/calibrate_confidence.py --head iab_content
251
+ ```
252
 
253
+ The online `iab_content` path now uses the compact supervised classifier. Retrieval is still available as an optional shadow baseline.
254
+
255
+ Build the optional retrieval shadow index:
256
+
257
+ ```bash
258
+ cd agentic-intent-classifier
259
+ python3 training/build_iab_taxonomy_embeddings.py
260
  ```
261
 
262
+ By default the shadow retrieval path uses `Alibaba-NLP/gte-Qwen2-1.5B-instruct`. The retrieval runtime applies the model's query-side instruction format and last-token pooling, matching the Hugging Face usage guidance. If you want to point retrieval at a different embedding model, set `IAB_RETRIEVAL_MODEL_NAME_OVERRIDE` before building the index.
263
 
264
+ Open-source users can swap in their own embedding model, but the contract is:
265
+
266
+ - query embeddings and taxonomy-node embeddings must be produced by the same model and model revision
267
+ - after changing models, you must rebuild `artifacts/iab/taxonomy_embeddings.pt`
268
+ - the repository only tests and supports the default model path out of the box
269
+ - not every Hugging Face embedding model is drop-in compatible with this runtime; some require custom pooling, query instructions, or `trust_remote_code`
270
+
271
+ Example override:
272
+
273
+ ```bash
274
+ cd agentic-intent-classifier
275
+ export IAB_RETRIEVAL_MODEL_NAME_OVERRIDE=mixedbread-ai/mxbai-embed-large-v1
276
+ python3 training/build_iab_taxonomy_embeddings.py
277
  ```
278
 
279
+ This writes:
280
 
281
+ - `artifacts/iab/taxonomy_nodes.json`
282
+ - `artifacts/iab/taxonomy_embeddings.pt`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
+ ## Training
285
+
286
+ ### Full local pipeline
287
+
288
+ ```bash
289
+ cd agentic-intent-classifier
290
+ python3 training/run_full_training_pipeline.py
291
  ```
292
 
293
+ This pipeline now does:
294
+
295
+ 1. build separate full-intent-taxonomy augmentation data
296
+ 2. build separate `intent_type` difficulty augmentation + benchmark
297
+ 3. train `intent_type`
298
+ 4. build subtype corpus
299
+ 5. build separate `intent_subtype` difficulty augmentation + benchmark
300
+ 6. train `intent_subtype`
301
+ 7. build separate `decision_phase` difficulty augmentation + benchmark
302
+ 8. train `decision_phase`
303
+ 9. train `iab_content`
304
+ 10. calibrate all classifier heads, including `iab_content`
305
+ 11. run regression/evaluation unless `--skip-full-eval` is used
306
+
307
+ ### Build datasets individually
308
+
309
+ Separate full-intent augmentation:
310
+
311
+ ```bash
312
+ cd agentic-intent-classifier
313
+ python3 training/build_full_intent_taxonomy_dataset.py
314
+ ```
315
+
316
+ Intent-type difficulty augmentation and benchmark:
317
+
318
+ ```bash
319
+ cd agentic-intent-classifier
320
+ python3 training/build_intent_type_difficulty_dataset.py
321
+ ```
322
+
323
+ Decision-phase difficulty augmentation and benchmark:
324
+
325
+ ```bash
326
+ cd agentic-intent-classifier
327
+ python3 training/build_decision_phase_difficulty_dataset.py
328
+ ```
329
+
330
+ Subtype difficulty augmentation and benchmark:
331
+
332
+ ```bash
333
+ cd agentic-intent-classifier
334
+ python3 training/build_subtype_difficulty_dataset.py
335
+ ```
336
+
337
+ Subtype dataset:
338
+
339
+ ```bash
340
+ cd agentic-intent-classifier
341
+ python3 training/build_subtype_dataset.py
342
+ ```
343
+
344
+ IAB embedding index:
345
+
346
+ ```bash
347
+ cd agentic-intent-classifier
348
+ python3 training/build_iab_taxonomy_embeddings.py
349
+ ```
350
+
351
+ ### Train heads individually
352
+
353
+ ```bash
354
+ cd agentic-intent-classifier
355
+ python3 training/train.py
356
+ python3 training/train_subtype.py
357
+ python3 training/train_decision_phase.py
358
+ ```
359
+
360
+ ### Calibration
361
+
362
+ ```bash
363
+ cd agentic-intent-classifier
364
+ python3 training/calibrate_confidence.py --head intent_type
365
+ python3 training/calibrate_confidence.py --head intent_subtype
366
+ python3 training/calibrate_confidence.py --head decision_phase
367
+ ```
368
+
369
+ ## Evaluation
370
+
371
+ Full evaluation:
372
+
373
+ ```bash
374
+ cd agentic-intent-classifier
375
+ python3 evaluation/run_evaluation.py
376
+ ```
377
+
378
+ Known-failure regression:
379
+
380
+ ```bash
381
+ cd agentic-intent-classifier
382
+ python3 evaluation/run_regression_suite.py
383
+ ```
384
+
385
+ IAB behavior-lock regression:
386
+
387
+ ```bash
388
+ cd agentic-intent-classifier
389
+ python3 evaluation/run_iab_mapping_suite.py
390
+ ```
391
+
392
+ IAB quality-target evaluation:
393
+
394
+ ```bash
395
+ cd agentic-intent-classifier
396
+ python3 evaluation/run_iab_quality_suite.py
397
+ ```
398
+
399
+ Threshold sweeps:
400
+
401
+ ```bash
402
+ cd agentic-intent-classifier
403
+ python3 evaluation/sweep_intent_threshold.py
404
+ ```
405
+
406
+ Artifacts are written to:
407
+
408
+ - `artifacts/calibration/`
409
+ - `artifacts/evaluation/latest/`
410
+
411
+ ## Google Colab
412
+
413
+ Use Colab for the full retraining pass if local memory is limited.
414
+
415
+ Clone once:
416
+
417
+ ```bash
418
+ %cd /content
419
+ !git clone https://github.com/GouniManikumar12/agentic-intent-classifier.git
420
+ %cd /content/agentic-intent-classifier
421
+ ```
422
+
423
+ If the repo is already cloned and you want the latest code, pull manually:
424
+
425
+ ```bash
426
+ !git pull origin main
427
+ ```
428
+
429
+ Full pipeline:
430
+
431
+ ```bash
432
+ !python training/run_full_training_pipeline.py
433
+ ```
434
+
435
+ If full evaluation is too heavy for the current Colab runtime:
436
+
437
+ ```bash
438
+ !python training/run_full_training_pipeline.py \
439
+ --iab-embedding-batch-size 32 \
440
+ --skip-full-eval
441
+ ```
442
+
443
+ Then run eval separately after training:
444
+
445
+ ```bash
446
+ !python evaluation/run_regression_suite.py
447
+ !python evaluation/run_iab_mapping_suite.py
448
+ !python evaluation/run_iab_quality_suite.py
449
+ !python evaluation/run_evaluation.py
450
+ ```
451
+
452
+ ## Current Saved Metrics
453
+
454
+ Generate fresh metrics with:
455
+
456
+ ```bash
457
+ cd agentic-intent-classifier
458
+ python3 evaluation/run_evaluation.py
459
+ ```
460
+
461
+ Do not treat any checked-in summary as canonical unless it was regenerated after the current code and artifacts were built. The IAB path is now retrieval-based, so older saved reports from the deleted hierarchy stack are not meaningful.
462
+
463
+ ## Latency Note
464
+
465
+ `combined_inference.py` is a debugging/offline path, not a production latency path.
466
+
467
+ Current production truth:
468
+
469
+ - per-request CLI execution is not a sub-50ms architecture
470
+ - production serving should use a long-lived API process with preloaded models
471
+ - if sub-50ms becomes a hard requirement, the serving path will need:
472
+ - persistent loaded models
473
+ - runtime optimization
474
+ - likely fewer model passes or a shared multi-head model
475
+
476
+ ## Current Status
477
+
478
+ Current repo status:
479
 
480
+ - full 10-class `intent.type` taxonomy is wired
481
+ - subtype and phase heads are present
482
+ - difficulty benchmarks are wired for `intent_type`, `intent_subtype`, and `decision_phase`
483
+ - full-TSV IAB taxonomy retrieval is wired through tier4
484
+ - separate full-intent augmentation dataset is in place
485
+ - evaluation/runtime memory handling is improved for large IAB splits
 
486
 
487
+ The main remaining gap is not basic infrastructure anymore. It is improving real-world robustness, especially for:
488
 
489
+ - `decision_phase`
490
+ - `intent_subtype`
491
+ - confidence quality on borderline commercial queries
492
+ - real-traffic supervision beyond synthetic data
artifacts/calibration/decision_phase.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
- "generated_at": "2026-03-22T17:27:04.263877+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
- "calibrated_accuracy": 0.6552,
8
- "calibrated_expected_calibration_error": 0.3883,
9
- "calibrated_negative_log_likelihood": 1.4596,
10
- "mean_calibrated_confidence": 0.2668,
11
- "mean_raw_confidence": 0.2383,
12
- "raw_accuracy": 0.6552,
13
- "raw_expected_calibration_error": 0.4169,
14
- "raw_negative_log_likelihood": 1.5421
15
  },
16
  "minimum_threshold_floor": 0.22,
17
- "optimized_temperature_candidate": 0.798354,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.75,
20
- "coverage": 0.6897,
21
- "threshold": 0.22
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 0.798354,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.75,
29
- "coverage": 0.6897,
30
  "threshold": 0.22
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
+ "generated_at": "2026-03-25T16:15:10.949430+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.8621,
8
+ "calibrated_expected_calibration_error": 0.0672,
9
+ "calibrated_negative_log_likelihood": 0.4798,
10
+ "mean_calibrated_confidence": 0.868,
11
+ "mean_raw_confidence": 0.868,
12
+ "raw_accuracy": 0.8621,
13
+ "raw_expected_calibration_error": 0.0672,
14
+ "raw_negative_log_likelihood": 0.4798
15
  },
16
  "minimum_threshold_floor": 0.22,
17
+ "optimized_temperature_candidate": 1.000144,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.8621,
20
+ "coverage": 1.0,
21
+ "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 1.000144,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.8621,
29
+ "coverage": 1.0,
30
  "threshold": 0.22
31
  }
32
  }
artifacts/calibration/iab_content.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
- "generated_at": "2026-03-25T07:04:35.676097+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
- "calibrated_accuracy": 0.9159,
8
- "calibrated_expected_calibration_error": 0.2475,
9
- "calibrated_negative_log_likelihood": 0.5736,
10
- "mean_calibrated_confidence": 0.6684,
11
- "mean_raw_confidence": 0.1932,
12
- "raw_accuracy": 0.9159,
13
- "raw_expected_calibration_error": 0.7227,
14
- "raw_negative_log_likelihood": 1.8448
15
  },
16
  "minimum_threshold_floor": 0.12,
17
- "optimized_temperature_candidate": 0.562804,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.9159,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
- "temperature": 0.562804,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.921,
29
- "coverage": 0.9878,
30
  "threshold": 0.12
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
+ "generated_at": "2026-03-25T16:17:01.813766+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.9308,
8
+ "calibrated_expected_calibration_error": 0.2692,
9
+ "calibrated_negative_log_likelihood": 0.5893,
10
+ "mean_calibrated_confidence": 0.6617,
11
+ "mean_raw_confidence": 0.1809,
12
+ "raw_accuracy": 0.9308,
13
+ "raw_expected_calibration_error": 0.75,
14
+ "raw_negative_log_likelihood": 1.9134
15
  },
16
  "minimum_threshold_floor": 0.12,
17
+ "optimized_temperature_candidate": 0.55869,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.9308,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
+ "temperature": 0.55869,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.9436,
29
+ "coverage": 0.9775,
30
  "threshold": 0.12
31
  }
32
  }
artifacts/calibration/intent_subtype.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
- "generated_at": "2026-03-22T17:27:03.399767+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
- "calibrated_accuracy": 0.6667,
8
- "calibrated_expected_calibration_error": 0.2996,
9
- "calibrated_negative_log_likelihood": 1.5163,
10
- "mean_calibrated_confidence": 0.3894,
11
- "mean_raw_confidence": 0.3894,
12
- "raw_accuracy": 0.6667,
13
- "raw_expected_calibration_error": 0.2996,
14
- "raw_negative_log_likelihood": 1.5163
15
  },
16
  "minimum_threshold_floor": 0.25,
17
- "optimized_temperature_candidate": 0.001,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.75,
20
- "coverage": 0.8485,
21
- "threshold": 0.13
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 1.0,
26
- "temperature_scaling_applied": false,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.878,
29
- "coverage": 0.6212,
30
  "threshold": 0.25
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
+ "generated_at": "2026-03-25T16:15:00.986765+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.875,
8
+ "calibrated_expected_calibration_error": 0.0667,
9
+ "calibrated_negative_log_likelihood": 0.3811,
10
+ "mean_calibrated_confidence": 0.8307,
11
+ "mean_raw_confidence": 0.7584,
12
+ "raw_accuracy": 0.875,
13
+ "raw_expected_calibration_error": 0.1314,
14
+ "raw_negative_log_likelihood": 0.4541
15
  },
16
  "minimum_threshold_floor": 0.25,
17
+ "optimized_temperature_candidate": 0.824082,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.875,
20
+ "coverage": 1.0,
21
+ "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 0.824082,
26
+ "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.8734,
29
+ "coverage": 0.9875,
30
  "threshold": 0.25
31
  }
32
  }
artifacts/calibration/intent_type.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
- "generated_at": "2026-03-22T17:27:02.517983+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
- "calibrated_accuracy": 0.975,
8
- "calibrated_expected_calibration_error": 0.3585,
9
- "calibrated_negative_log_likelihood": 0.5728,
10
- "mean_calibrated_confidence": 0.6165,
11
- "mean_raw_confidence": 0.4936,
12
- "raw_accuracy": 0.975,
13
- "raw_expected_calibration_error": 0.4814,
14
- "raw_negative_log_likelihood": 0.7776
15
  },
16
  "minimum_threshold_floor": 0.4,
17
- "optimized_temperature_candidate": 0.68171,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.975,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
- "temperature": 0.68171,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 1.0,
29
- "coverage": 0.775,
30
  "threshold": 0.4
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
+ "generated_at": "2026-03-25T16:14:49.053223+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.9362,
8
+ "calibrated_expected_calibration_error": 0.0715,
9
+ "calibrated_negative_log_likelihood": 0.2384,
10
+ "mean_calibrated_confidence": 0.917,
11
+ "mean_raw_confidence": 0.8891,
12
+ "raw_accuracy": 0.9362,
13
+ "raw_expected_calibration_error": 0.0807,
14
+ "raw_negative_log_likelihood": 0.257
15
  },
16
  "minimum_threshold_floor": 0.4,
17
+ "optimized_temperature_candidate": 0.901567,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.9362,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
+ "temperature": 0.901567,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.9362,
29
+ "coverage": 1.0,
30
  "threshold": 0.4
31
  }
32
  }
artifacts/evaluation/latest/combined_demo_benchmark.json CHANGED
@@ -5,12 +5,13 @@
5
  "response": {
6
  "meta": {
7
  "calibration_enabled": true,
 
8
  "system_version": "0.6.0-phase4"
9
  },
10
  "model_output": {
11
  "classification": {
12
  "iab_content": {
13
- "mapping_confidence": 0.9035,
14
  "mapping_mode": "nearest_equivalent",
15
  "taxonomy": "IAB Content Taxonomy",
16
  "taxonomy_version": "3.0",
@@ -21,10 +22,6 @@
21
  "tier2": {
22
  "id": "599",
23
  "label": "Computing"
24
- },
25
- "tier3": {
26
- "id": "602",
27
- "label": "Software and Applications"
28
  }
29
  },
30
  "intent": {
@@ -32,31 +29,31 @@
32
  "component_confidence": {
33
  "decision_phase": {
34
  "calibrated": true,
35
- "confidence": 0.9947,
36
  "confidence_threshold": 0.22,
37
  "label": "awareness",
38
  "meets_threshold": true,
39
- "raw_confidence": 0.9788
40
  },
41
  "intent_subtype": {
42
  "calibrated": true,
43
- "confidence": 0.9547,
44
  "confidence_threshold": 0.25,
45
  "label": "education",
46
  "meets_threshold": true,
47
- "raw_confidence": 0.9547
48
  },
49
  "intent_type": {
50
  "calibrated": true,
51
- "confidence": 0.9972,
52
  "confidence_threshold": 0.4,
53
  "label": "informational",
54
  "meets_threshold": true,
55
- "raw_confidence": 0.9662
56
  },
57
  "overall_strategy": "min_required_component_confidence"
58
  },
59
- "confidence": 0.9947,
60
  "decision_phase": "awareness",
61
  "subtype": "education",
62
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -95,18 +92,19 @@
95
  "response": {
96
  "meta": {
97
  "calibration_enabled": true,
 
98
  "system_version": "0.6.0-phase4"
99
  },
100
  "model_output": {
101
  "classification": {
102
  "iab_content": {
103
- "mapping_confidence": 0.8427,
104
  "mapping_mode": "nearest_equivalent",
105
  "taxonomy": "IAB Content Taxonomy",
106
  "taxonomy_version": "3.0",
107
  "tier1": {
108
- "id": "1",
109
- "label": "Automotive"
110
  }
111
  },
112
  "intent": {
@@ -114,31 +112,31 @@
114
  "component_confidence": {
115
  "decision_phase": {
116
  "calibrated": true,
117
- "confidence": 0.9944,
118
  "confidence_threshold": 0.22,
119
  "label": "awareness",
120
  "meets_threshold": true,
121
- "raw_confidence": 0.9779
122
  },
123
  "intent_subtype": {
124
  "calibrated": true,
125
- "confidence": 0.955,
126
  "confidence_threshold": 0.25,
127
  "label": "education",
128
  "meets_threshold": true,
129
- "raw_confidence": 0.955
130
  },
131
  "intent_type": {
132
  "calibrated": true,
133
- "confidence": 0.9969,
134
  "confidence_threshold": 0.4,
135
  "label": "informational",
136
  "meets_threshold": true,
137
- "raw_confidence": 0.9637
138
  },
139
  "overall_strategy": "min_required_component_confidence"
140
  },
141
- "confidence": 0.9944,
142
  "decision_phase": "awareness",
143
  "subtype": "education",
144
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -177,12 +175,13 @@
177
  "response": {
178
  "meta": {
179
  "calibration_enabled": true,
 
180
  "system_version": "0.6.0-phase4"
181
  },
182
  "model_output": {
183
  "classification": {
184
  "iab_content": {
185
- "mapping_confidence": 0.7798,
186
  "mapping_mode": "nearest_equivalent",
187
  "taxonomy": "IAB Content Taxonomy",
188
  "taxonomy_version": "3.0",
@@ -192,38 +191,38 @@
192
  }
193
  },
194
  "intent": {
195
- "commercial_score": 0.656,
196
  "component_confidence": {
197
  "decision_phase": {
198
  "calibrated": true,
199
- "confidence": 0.9965,
200
  "confidence_threshold": 0.22,
201
  "label": "consideration",
202
  "meets_threshold": true,
203
- "raw_confidence": 0.9846
204
  },
205
  "intent_subtype": {
206
  "calibrated": true,
207
- "confidence": 0.4682,
208
  "confidence_threshold": 0.25,
209
- "label": "product_discovery",
210
  "meets_threshold": true,
211
- "raw_confidence": 0.4682
212
  },
213
  "intent_type": {
214
  "calibrated": true,
215
- "confidence": 0.9995,
216
  "confidence_threshold": 0.4,
217
  "label": "commercial",
218
  "meets_threshold": true,
219
- "raw_confidence": 0.9895
220
  },
221
  "overall_strategy": "min_required_component_confidence"
222
  },
223
- "confidence": 0.4682,
224
  "decision_phase": "consideration",
225
- "subtype": "product_discovery",
226
- "summary": "Classified as commercial intent with subtype product_discovery in the consideration phase.",
227
  "type": "commercial"
228
  }
229
  },
@@ -234,8 +233,8 @@
234
  "consideration"
235
  ],
236
  "opportunity": {
237
- "strength": "medium",
238
- "type": "soft_recommendation"
239
  },
240
  "policy": {
241
  "applied_thresholds": {
@@ -245,7 +244,7 @@
245
  "intent_type_confidence_min": 0.4
246
  },
247
  "decision_basis": "score_threshold",
248
- "eligibility_reason": "commercial_discovery_signal_present",
249
  "monetization_eligibility": "allowed_with_caution",
250
  "regulated_vertical": false,
251
  "sensitivity": "low"
@@ -259,26 +258,19 @@
259
  "response": {
260
  "meta": {
261
  "calibration_enabled": true,
 
262
  "system_version": "0.6.0-phase4"
263
  },
264
  "model_output": {
265
  "classification": {
266
  "iab_content": {
267
- "mapping_confidence": 0.8606,
268
  "mapping_mode": "nearest_equivalent",
269
  "taxonomy": "IAB Content Taxonomy",
270
  "taxonomy_version": "3.0",
271
  "tier1": {
272
  "id": "596",
273
  "label": "Technology & Computing"
274
- },
275
- "tier2": {
276
- "id": "599",
277
- "label": "Computing"
278
- },
279
- "tier3": {
280
- "id": "619",
281
- "label": "Internet"
282
  }
283
  },
284
  "intent": {
@@ -286,31 +278,31 @@
286
  "component_confidence": {
287
  "decision_phase": {
288
  "calibrated": true,
289
- "confidence": 0.9964,
290
  "confidence_threshold": 0.22,
291
  "label": "consideration",
292
  "meets_threshold": true,
293
- "raw_confidence": 0.9842
294
  },
295
  "intent_subtype": {
296
  "calibrated": true,
297
- "confidence": 0.9449,
298
  "confidence_threshold": 0.25,
299
  "label": "comparison",
300
  "meets_threshold": true,
301
- "raw_confidence": 0.9449
302
  },
303
  "intent_type": {
304
  "calibrated": true,
305
- "confidence": 0.9995,
306
  "confidence_threshold": 0.4,
307
  "label": "commercial",
308
  "meets_threshold": true,
309
- "raw_confidence": 0.9892
310
  },
311
  "overall_strategy": "min_required_component_confidence"
312
  },
313
- "confidence": 0.9449,
314
  "decision_phase": "consideration",
315
  "subtype": "comparison",
316
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -349,12 +341,13 @@
349
  "response": {
350
  "meta": {
351
  "calibration_enabled": true,
 
352
  "system_version": "0.6.0-phase4"
353
  },
354
  "model_output": {
355
  "classification": {
356
  "iab_content": {
357
- "mapping_confidence": 0.8737,
358
  "mapping_mode": "nearest_equivalent",
359
  "taxonomy": "IAB Content Taxonomy",
360
  "taxonomy_version": "3.0",
@@ -372,31 +365,31 @@
372
  "component_confidence": {
373
  "decision_phase": {
374
  "calibrated": true,
375
- "confidence": 0.963,
376
  "confidence_threshold": 0.22,
377
  "label": "decision",
378
  "meets_threshold": true,
379
- "raw_confidence": 0.9122
380
  },
381
  "intent_subtype": {
382
  "calibrated": true,
383
- "confidence": 0.9119,
384
  "confidence_threshold": 0.25,
385
  "label": "provider_selection",
386
  "meets_threshold": true,
387
- "raw_confidence": 0.9119
388
  },
389
  "intent_type": {
390
  "calibrated": true,
391
- "confidence": 0.9994,
392
  "confidence_threshold": 0.4,
393
  "label": "commercial",
394
  "meets_threshold": true,
395
- "raw_confidence": 0.9874
396
  },
397
  "overall_strategy": "min_required_component_confidence"
398
  },
399
- "confidence": 0.9119,
400
  "decision_phase": "decision",
401
  "subtype": "provider_selection",
402
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
@@ -435,26 +428,19 @@
435
  "response": {
436
  "meta": {
437
  "calibration_enabled": true,
 
438
  "system_version": "0.6.0-phase4"
439
  },
440
  "model_output": {
441
  "classification": {
442
  "iab_content": {
443
- "mapping_confidence": 0.7133,
444
  "mapping_mode": "nearest_equivalent",
445
  "taxonomy": "IAB Content Taxonomy",
446
  "taxonomy_version": "3.0",
447
  "tier1": {
448
- "id": "239",
449
- "label": "Hobbies & Interests"
450
- },
451
- "tier2": {
452
- "id": "264",
453
- "label": "Content Production"
454
- },
455
- "tier3": {
456
- "id": "266",
457
- "label": "Freelance Writing"
458
  }
459
  },
460
  "intent": {
@@ -462,31 +448,31 @@
462
  "component_confidence": {
463
  "decision_phase": {
464
  "calibrated": true,
465
- "confidence": 0.9991,
466
  "confidence_threshold": 0.22,
467
  "label": "action",
468
  "meets_threshold": true,
469
- "raw_confidence": 0.9947
470
  },
471
  "intent_subtype": {
472
  "calibrated": true,
473
- "confidence": 0.9382,
474
  "confidence_threshold": 0.25,
475
  "label": "signup",
476
  "meets_threshold": true,
477
- "raw_confidence": 0.9382
478
  },
479
  "intent_type": {
480
  "calibrated": true,
481
- "confidence": 0.9996,
482
  "confidence_threshold": 0.4,
483
  "label": "transactional",
484
  "meets_threshold": true,
485
- "raw_confidence": 0.9902
486
  },
487
  "overall_strategy": "min_required_component_confidence"
488
  },
489
- "confidence": 0.9382,
490
  "decision_phase": "action",
491
  "subtype": "signup",
492
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -525,13 +511,14 @@
525
  "response": {
526
  "meta": {
527
  "calibration_enabled": true,
 
528
  "system_version": "0.6.0-phase4"
529
  },
530
  "model_output": {
531
  "classification": {
532
  "iab_content": {
533
- "mapping_confidence": 0.7997,
534
- "mapping_mode": "nearest_equivalent",
535
  "taxonomy": "IAB Content Taxonomy",
536
  "taxonomy_version": "3.0",
537
  "tier1": {
@@ -548,31 +535,31 @@
548
  "component_confidence": {
549
  "decision_phase": {
550
  "calibrated": true,
551
- "confidence": 0.999,
552
  "confidence_threshold": 0.22,
553
  "label": "action",
554
  "meets_threshold": true,
555
- "raw_confidence": 0.9945
556
  },
557
  "intent_subtype": {
558
  "calibrated": true,
559
- "confidence": 0.8724,
560
  "confidence_threshold": 0.25,
561
  "label": "booking",
562
  "meets_threshold": true,
563
- "raw_confidence": 0.8724
564
  },
565
  "intent_type": {
566
  "calibrated": true,
567
- "confidence": 0.9996,
568
  "confidence_threshold": 0.4,
569
  "label": "transactional",
570
  "meets_threshold": true,
571
- "raw_confidence": 0.9901
572
  },
573
  "overall_strategy": "min_required_component_confidence"
574
  },
575
- "confidence": 0.8724,
576
  "decision_phase": "action",
577
  "subtype": "booking",
578
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
@@ -611,30 +598,19 @@
611
  "response": {
612
  "meta": {
613
  "calibration_enabled": true,
 
614
  "system_version": "0.6.0-phase4"
615
  },
616
  "model_output": {
617
  "classification": {
618
  "iab_content": {
619
- "mapping_confidence": 0.8423,
620
  "mapping_mode": "nearest_equivalent",
621
  "taxonomy": "IAB Content Taxonomy",
622
  "taxonomy_version": "3.0",
623
  "tier1": {
624
  "id": "596",
625
  "label": "Technology & Computing"
626
- },
627
- "tier2": {
628
- "id": "599",
629
- "label": "Computing"
630
- },
631
- "tier3": {
632
- "id": "619",
633
- "label": "Internet"
634
- },
635
- "tier4": {
636
- "id": "620",
637
- "label": "Cloud Computing"
638
  }
639
  },
640
  "intent": {
@@ -642,31 +618,31 @@
642
  "component_confidence": {
643
  "decision_phase": {
644
  "calibrated": true,
645
- "confidence": 0.9736,
646
  "confidence_threshold": 0.22,
647
  "label": "post_purchase",
648
  "meets_threshold": true,
649
- "raw_confidence": 0.9264
650
  },
651
  "intent_subtype": {
652
  "calibrated": true,
653
- "confidence": 0.921,
654
  "confidence_threshold": 0.25,
655
  "label": "onboarding_setup",
656
  "meets_threshold": true,
657
- "raw_confidence": 0.921
658
  },
659
  "intent_type": {
660
  "calibrated": true,
661
- "confidence": 0.9935,
662
  "confidence_threshold": 0.4,
663
  "label": "transactional",
664
  "meets_threshold": true,
665
- "raw_confidence": 0.9448
666
  },
667
  "overall_strategy": "min_required_component_confidence"
668
  },
669
- "confidence": 0.9736,
670
  "decision_phase": "post_purchase",
671
  "subtype": "onboarding_setup",
672
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
@@ -705,26 +681,19 @@
705
  "response": {
706
  "meta": {
707
  "calibration_enabled": true,
 
708
  "system_version": "0.6.0-phase4"
709
  },
710
  "model_output": {
711
  "classification": {
712
  "iab_content": {
713
- "mapping_confidence": 0.8039,
714
  "mapping_mode": "nearest_equivalent",
715
  "taxonomy": "IAB Content Taxonomy",
716
  "taxonomy_version": "3.0",
717
  "tier1": {
718
- "id": "596",
719
- "label": "Technology & Computing"
720
- },
721
- "tier2": {
722
- "id": "599",
723
- "label": "Computing"
724
- },
725
- "tier3": {
726
- "id": "619",
727
- "label": "Internet"
728
  }
729
  },
730
  "intent": {
@@ -732,31 +701,31 @@
732
  "component_confidence": {
733
  "decision_phase": {
734
  "calibrated": true,
735
- "confidence": 0.9969,
736
  "confidence_threshold": 0.22,
737
  "label": "support",
738
  "meets_threshold": true,
739
- "raw_confidence": 0.9863
740
  },
741
  "intent_subtype": {
742
  "calibrated": true,
743
- "confidence": 0.923,
744
  "confidence_threshold": 0.25,
745
  "label": "account_help",
746
  "meets_threshold": true,
747
- "raw_confidence": 0.923
748
  },
749
  "intent_type": {
750
  "calibrated": true,
751
- "confidence": 0.9988,
752
  "confidence_threshold": 0.4,
753
  "label": "support",
754
  "meets_threshold": true,
755
- "raw_confidence": 0.9811
756
  },
757
  "overall_strategy": "min_required_component_confidence"
758
  },
759
- "confidence": 0.923,
760
  "decision_phase": "support",
761
  "subtype": "account_help",
762
  "summary": "Classified as support intent with subtype account_help in the support phase.",
@@ -801,22 +770,19 @@
801
  "response": {
802
  "meta": {
803
  "calibration_enabled": true,
 
804
  "system_version": "0.6.0-phase4"
805
  },
806
  "model_output": {
807
  "classification": {
808
  "iab_content": {
809
- "mapping_confidence": 0.7854,
810
  "mapping_mode": "nearest_equivalent",
811
  "taxonomy": "IAB Content Taxonomy",
812
  "taxonomy_version": "3.0",
813
  "tier1": {
814
- "id": "286",
815
- "label": "Medical Health"
816
- },
817
- "tier2": {
818
- "id": "287",
819
- "label": "Diseases and Conditions"
820
  }
821
  },
822
  "intent": {
@@ -824,31 +790,31 @@
824
  "component_confidence": {
825
  "decision_phase": {
826
  "calibrated": true,
827
- "confidence": 0.9699,
828
  "confidence_threshold": 0.22,
829
  "label": "awareness",
830
  "meets_threshold": true,
831
- "raw_confidence": 0.9258
832
  },
833
  "intent_subtype": {
834
  "calibrated": true,
835
- "confidence": 0.9435,
836
  "confidence_threshold": 0.25,
837
  "label": "emotional_reflection",
838
  "meets_threshold": true,
839
- "raw_confidence": 0.9435
840
  },
841
  "intent_type": {
842
  "calibrated": true,
843
- "confidence": 0.9916,
844
  "confidence_threshold": 0.4,
845
  "label": "personal_reflection",
846
  "meets_threshold": true,
847
- "raw_confidence": 0.9406
848
  },
849
  "overall_strategy": "min_required_component_confidence"
850
  },
851
- "confidence": 0.9435,
852
  "decision_phase": "awareness",
853
  "subtype": "emotional_reflection",
854
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
@@ -893,18 +859,19 @@
893
  "response": {
894
  "meta": {
895
  "calibration_enabled": true,
 
896
  "system_version": "0.6.0-phase4"
897
  },
898
  "model_output": {
899
  "classification": {
900
  "iab_content": {
901
- "mapping_confidence": 0.7304,
902
  "mapping_mode": "nearest_equivalent",
903
  "taxonomy": "IAB Content Taxonomy",
904
  "taxonomy_version": "3.0",
905
  "tier1": {
906
- "id": "SPSHQ5",
907
- "label": "Genres"
908
  }
909
  },
910
  "intent": {
@@ -912,31 +879,31 @@
912
  "component_confidence": {
913
  "decision_phase": {
914
  "calibrated": true,
915
- "confidence": 0.9934,
916
  "confidence_threshold": 0.22,
917
  "label": "research",
918
  "meets_threshold": true,
919
- "raw_confidence": 0.9746
920
  },
921
  "intent_subtype": {
922
  "calibrated": true,
923
- "confidence": 0.9631,
924
  "confidence_threshold": 0.25,
925
  "label": "follow_up",
926
  "meets_threshold": true,
927
- "raw_confidence": 0.9631
928
  },
929
  "intent_type": {
930
  "calibrated": true,
931
- "confidence": 0.9934,
932
  "confidence_threshold": 0.4,
933
  "label": "ambiguous",
934
  "meets_threshold": true,
935
- "raw_confidence": 0.9405
936
  },
937
  "overall_strategy": "min_required_component_confidence"
938
  },
939
- "confidence": 0.9934,
940
  "decision_phase": "research",
941
  "subtype": "follow_up",
942
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -981,22 +948,19 @@
981
  "response": {
982
  "meta": {
983
  "calibration_enabled": true,
 
984
  "system_version": "0.6.0-phase4"
985
  },
986
  "model_output": {
987
  "classification": {
988
  "iab_content": {
989
- "mapping_confidence": 0.7779,
990
  "mapping_mode": "nearest_equivalent",
991
  "taxonomy": "IAB Content Taxonomy",
992
  "taxonomy_version": "3.0",
993
  "tier1": {
994
- "id": "52",
995
- "label": "Business and Finance"
996
- },
997
- "tier2": {
998
- "id": "53",
999
- "label": "Business"
1000
  }
1001
  },
1002
  "intent": {
@@ -1004,31 +968,31 @@
1004
  "component_confidence": {
1005
  "decision_phase": {
1006
  "calibrated": true,
1007
- "confidence": 0.9888,
1008
  "confidence_threshold": 0.22,
1009
  "label": "research",
1010
  "meets_threshold": true,
1011
- "raw_confidence": 0.9639
1012
  },
1013
  "intent_subtype": {
1014
  "calibrated": true,
1015
- "confidence": 0.9487,
1016
  "confidence_threshold": 0.25,
1017
  "label": "follow_up",
1018
  "meets_threshold": true,
1019
- "raw_confidence": 0.9487
1020
  },
1021
  "intent_type": {
1022
  "calibrated": true,
1023
- "confidence": 0.9916,
1024
  "confidence_threshold": 0.4,
1025
  "label": "ambiguous",
1026
  "meets_threshold": true,
1027
- "raw_confidence": 0.9321
1028
  },
1029
  "overall_strategy": "min_required_component_confidence"
1030
  },
1031
- "confidence": 0.9888,
1032
  "decision_phase": "research",
1033
  "subtype": "follow_up",
1034
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -1073,30 +1037,23 @@
1073
  "response": {
1074
  "meta": {
1075
  "calibration_enabled": true,
 
1076
  "system_version": "0.6.0-phase4"
1077
  },
1078
  "model_output": {
1079
  "classification": {
1080
  "iab_content": {
1081
- "mapping_confidence": 0.7753,
1082
- "mapping_mode": "nearest_equivalent",
1083
  "taxonomy": "IAB Content Taxonomy",
1084
  "taxonomy_version": "3.0",
1085
  "tier1": {
1086
- "id": "596",
1087
- "label": "Technology & Computing"
1088
  },
1089
  "tier2": {
1090
- "id": "599",
1091
- "label": "Computing"
1092
- },
1093
- "tier3": {
1094
- "id": "619",
1095
- "label": "Internet"
1096
- },
1097
- "tier4": {
1098
- "id": "623",
1099
- "label": "Email"
1100
  }
1101
  },
1102
  "intent": {
@@ -1104,31 +1061,31 @@
1104
  "component_confidence": {
1105
  "decision_phase": {
1106
  "calibrated": true,
1107
- "confidence": 0.9991,
1108
  "confidence_threshold": 0.22,
1109
  "label": "action",
1110
  "meets_threshold": true,
1111
- "raw_confidence": 0.9948
1112
  },
1113
  "intent_subtype": {
1114
  "calibrated": true,
1115
- "confidence": 0.8874,
1116
  "confidence_threshold": 0.25,
1117
  "label": "signup",
1118
  "meets_threshold": true,
1119
- "raw_confidence": 0.8874
1120
  },
1121
  "intent_type": {
1122
  "calibrated": true,
1123
- "confidence": 0.9996,
1124
  "confidence_threshold": 0.4,
1125
  "label": "transactional",
1126
  "meets_threshold": true,
1127
- "raw_confidence": 0.9908
1128
  },
1129
  "overall_strategy": "min_required_component_confidence"
1130
  },
1131
- "confidence": 0.8874,
1132
  "decision_phase": "action",
1133
  "subtype": "signup",
1134
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -1167,30 +1124,19 @@
1167
  "response": {
1168
  "meta": {
1169
  "calibration_enabled": true,
 
1170
  "system_version": "0.6.0-phase4"
1171
  },
1172
  "model_output": {
1173
  "classification": {
1174
  "iab_content": {
1175
- "mapping_confidence": 0.8626,
1176
  "mapping_mode": "nearest_equivalent",
1177
  "taxonomy": "IAB Content Taxonomy",
1178
  "taxonomy_version": "3.0",
1179
  "tier1": {
1180
- "id": "596",
1181
- "label": "Technology & Computing"
1182
- },
1183
- "tier2": {
1184
- "id": "599",
1185
- "label": "Computing"
1186
- },
1187
- "tier3": {
1188
- "id": "619",
1189
- "label": "Internet"
1190
- },
1191
- "tier4": {
1192
- "id": "627",
1193
- "label": "Search"
1194
  }
1195
  },
1196
  "intent": {
@@ -1198,31 +1144,31 @@
1198
  "component_confidence": {
1199
  "decision_phase": {
1200
  "calibrated": true,
1201
- "confidence": 0.9966,
1202
  "confidence_threshold": 0.22,
1203
  "label": "consideration",
1204
  "meets_threshold": true,
1205
- "raw_confidence": 0.9852
1206
  },
1207
  "intent_subtype": {
1208
  "calibrated": true,
1209
- "confidence": 0.9415,
1210
  "confidence_threshold": 0.25,
1211
  "label": "comparison",
1212
  "meets_threshold": true,
1213
- "raw_confidence": 0.9415
1214
  },
1215
  "intent_type": {
1216
  "calibrated": true,
1217
- "confidence": 0.9994,
1218
  "confidence_threshold": 0.4,
1219
  "label": "commercial",
1220
  "meets_threshold": true,
1221
- "raw_confidence": 0.9884
1222
  },
1223
  "overall_strategy": "min_required_component_confidence"
1224
  },
1225
- "confidence": 0.9415,
1226
  "decision_phase": "consideration",
1227
  "subtype": "comparison",
1228
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -1261,13 +1207,14 @@
1261
  "response": {
1262
  "meta": {
1263
  "calibration_enabled": true,
 
1264
  "system_version": "0.6.0-phase4"
1265
  },
1266
  "model_output": {
1267
  "classification": {
1268
  "iab_content": {
1269
- "mapping_confidence": 0.8741,
1270
- "mapping_mode": "nearest_equivalent",
1271
  "taxonomy": "IAB Content Taxonomy",
1272
  "taxonomy_version": "3.0",
1273
  "tier1": {
@@ -1279,12 +1226,8 @@
1279
  "label": "Computing"
1280
  },
1281
  "tier3": {
1282
- "id": "619",
1283
- "label": "Internet"
1284
- },
1285
- "tier4": {
1286
- "id": "620",
1287
- "label": "Cloud Computing"
1288
  }
1289
  },
1290
  "intent": {
@@ -1292,31 +1235,31 @@
1292
  "component_confidence": {
1293
  "decision_phase": {
1294
  "calibrated": true,
1295
- "confidence": 0.9939,
1296
  "confidence_threshold": 0.22,
1297
  "label": "awareness",
1298
  "meets_threshold": true,
1299
- "raw_confidence": 0.9764
1300
  },
1301
  "intent_subtype": {
1302
  "calibrated": true,
1303
- "confidence": 0.9545,
1304
  "confidence_threshold": 0.25,
1305
  "label": "education",
1306
  "meets_threshold": true,
1307
- "raw_confidence": 0.9545
1308
  },
1309
  "intent_type": {
1310
  "calibrated": true,
1311
- "confidence": 0.9964,
1312
  "confidence_threshold": 0.4,
1313
  "label": "informational",
1314
  "meets_threshold": true,
1315
- "raw_confidence": 0.961
1316
  },
1317
  "overall_strategy": "min_required_component_confidence"
1318
  },
1319
- "confidence": 0.9939,
1320
  "decision_phase": "awareness",
1321
  "subtype": "education",
1322
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
5
  "response": {
6
  "meta": {
7
  "calibration_enabled": true,
8
+ "iab_mapping_is_placeholder": false,
9
  "system_version": "0.6.0-phase4"
10
  },
11
  "model_output": {
12
  "classification": {
13
  "iab_content": {
14
+ "mapping_confidence": 0.676,
15
  "mapping_mode": "nearest_equivalent",
16
  "taxonomy": "IAB Content Taxonomy",
17
  "taxonomy_version": "3.0",
 
22
  "tier2": {
23
  "id": "599",
24
  "label": "Computing"
 
 
 
 
25
  }
26
  },
27
  "intent": {
 
29
  "component_confidence": {
30
  "decision_phase": {
31
  "calibrated": true,
32
+ "confidence": 0.961,
33
  "confidence_threshold": 0.22,
34
  "label": "awareness",
35
  "meets_threshold": true,
36
+ "raw_confidence": 0.9611
37
  },
38
  "intent_subtype": {
39
  "calibrated": true,
40
+ "confidence": 0.9853,
41
  "confidence_threshold": 0.25,
42
  "label": "education",
43
  "meets_threshold": true,
44
+ "raw_confidence": 0.9516
45
  },
46
  "intent_type": {
47
  "calibrated": true,
48
+ "confidence": 0.9807,
49
  "confidence_threshold": 0.4,
50
  "label": "informational",
51
  "meets_threshold": true,
52
+ "raw_confidence": 0.9655
53
  },
54
  "overall_strategy": "min_required_component_confidence"
55
  },
56
+ "confidence": 0.961,
57
  "decision_phase": "awareness",
58
  "subtype": "education",
59
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
92
  "response": {
93
  "meta": {
94
  "calibration_enabled": true,
95
+ "iab_mapping_is_placeholder": false,
96
  "system_version": "0.6.0-phase4"
97
  },
98
  "model_output": {
99
  "classification": {
100
  "iab_content": {
101
+ "mapping_confidence": 0.4448,
102
  "mapping_mode": "nearest_equivalent",
103
  "taxonomy": "IAB Content Taxonomy",
104
  "taxonomy_version": "3.0",
105
  "tier1": {
106
+ "id": "596",
107
+ "label": "Technology & Computing"
108
  }
109
  },
110
  "intent": {
 
112
  "component_confidence": {
113
  "decision_phase": {
114
  "calibrated": true,
115
+ "confidence": 0.9381,
116
  "confidence_threshold": 0.22,
117
  "label": "awareness",
118
  "meets_threshold": true,
119
+ "raw_confidence": 0.9381
120
  },
121
  "intent_subtype": {
122
  "calibrated": true,
123
+ "confidence": 0.9753,
124
  "confidence_threshold": 0.25,
125
  "label": "education",
126
  "meets_threshold": true,
127
+ "raw_confidence": 0.9275
128
  },
129
  "intent_type": {
130
  "calibrated": true,
131
+ "confidence": 0.9768,
132
  "confidence_threshold": 0.4,
133
  "label": "informational",
134
  "meets_threshold": true,
135
+ "raw_confidence": 0.9597
136
  },
137
  "overall_strategy": "min_required_component_confidence"
138
  },
139
+ "confidence": 0.9381,
140
  "decision_phase": "awareness",
141
  "subtype": "education",
142
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
175
  "response": {
176
  "meta": {
177
  "calibration_enabled": true,
178
+ "iab_mapping_is_placeholder": false,
179
  "system_version": "0.6.0-phase4"
180
  },
181
  "model_output": {
182
  "classification": {
183
  "iab_content": {
184
+ "mapping_confidence": 0.7819,
185
  "mapping_mode": "nearest_equivalent",
186
  "taxonomy": "IAB Content Taxonomy",
187
  "taxonomy_version": "3.0",
 
191
  }
192
  },
193
  "intent": {
194
+ "commercial_score": 0.728,
195
  "component_confidence": {
196
  "decision_phase": {
197
  "calibrated": true,
198
+ "confidence": 0.9691,
199
  "confidence_threshold": 0.22,
200
  "label": "consideration",
201
  "meets_threshold": true,
202
+ "raw_confidence": 0.9691
203
  },
204
  "intent_subtype": {
205
  "calibrated": true,
206
+ "confidence": 0.563,
207
  "confidence_threshold": 0.25,
208
+ "label": "comparison",
209
  "meets_threshold": true,
210
+ "raw_confidence": 0.4806
211
  },
212
  "intent_type": {
213
  "calibrated": true,
214
+ "confidence": 0.9869,
215
  "confidence_threshold": 0.4,
216
  "label": "commercial",
217
  "meets_threshold": true,
218
+ "raw_confidence": 0.9756
219
  },
220
  "overall_strategy": "min_required_component_confidence"
221
  },
222
+ "confidence": 0.563,
223
  "decision_phase": "consideration",
224
+ "subtype": "comparison",
225
+ "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
226
  "type": "commercial"
227
  }
228
  },
 
233
  "consideration"
234
  ],
235
  "opportunity": {
236
+ "strength": "high",
237
+ "type": "comparison_slot"
238
  },
239
  "policy": {
240
  "applied_thresholds": {
 
244
  "intent_type_confidence_min": 0.4
245
  },
246
  "decision_basis": "score_threshold",
247
+ "eligibility_reason": "commercial_comparison_signal_present",
248
  "monetization_eligibility": "allowed_with_caution",
249
  "regulated_vertical": false,
250
  "sensitivity": "low"
 
258
  "response": {
259
  "meta": {
260
  "calibration_enabled": true,
261
+ "iab_mapping_is_placeholder": false,
262
  "system_version": "0.6.0-phase4"
263
  },
264
  "model_output": {
265
  "classification": {
266
  "iab_content": {
267
+ "mapping_confidence": 0.3576,
268
  "mapping_mode": "nearest_equivalent",
269
  "taxonomy": "IAB Content Taxonomy",
270
  "taxonomy_version": "3.0",
271
  "tier1": {
272
  "id": "596",
273
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
274
  }
275
  },
276
  "intent": {
 
278
  "component_confidence": {
279
  "decision_phase": {
280
  "calibrated": true,
281
+ "confidence": 0.9283,
282
  "confidence_threshold": 0.22,
283
  "label": "consideration",
284
  "meets_threshold": true,
285
+ "raw_confidence": 0.9284
286
  },
287
  "intent_subtype": {
288
  "calibrated": true,
289
+ "confidence": 0.9727,
290
  "confidence_threshold": 0.25,
291
  "label": "comparison",
292
  "meets_threshold": true,
293
+ "raw_confidence": 0.9236
294
  },
295
  "intent_type": {
296
  "calibrated": true,
297
+ "confidence": 0.9734,
298
  "confidence_threshold": 0.4,
299
  "label": "commercial",
300
  "meets_threshold": true,
301
+ "raw_confidence": 0.954
302
  },
303
  "overall_strategy": "min_required_component_confidence"
304
  },
305
+ "confidence": 0.9283,
306
  "decision_phase": "consideration",
307
  "subtype": "comparison",
308
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
341
  "response": {
342
  "meta": {
343
  "calibration_enabled": true,
344
+ "iab_mapping_is_placeholder": false,
345
  "system_version": "0.6.0-phase4"
346
  },
347
  "model_output": {
348
  "classification": {
349
  "iab_content": {
350
+ "mapping_confidence": 0.3542,
351
  "mapping_mode": "nearest_equivalent",
352
  "taxonomy": "IAB Content Taxonomy",
353
  "taxonomy_version": "3.0",
 
365
  "component_confidence": {
366
  "decision_phase": {
367
  "calibrated": true,
368
+ "confidence": 0.8012,
369
  "confidence_threshold": 0.22,
370
  "label": "decision",
371
  "meets_threshold": true,
372
+ "raw_confidence": 0.8012
373
  },
374
  "intent_subtype": {
375
  "calibrated": true,
376
+ "confidence": 0.9028,
377
  "confidence_threshold": 0.25,
378
  "label": "provider_selection",
379
  "meets_threshold": true,
380
+ "raw_confidence": 0.8041
381
  },
382
  "intent_type": {
383
  "calibrated": true,
384
+ "confidence": 0.9759,
385
  "confidence_threshold": 0.4,
386
  "label": "commercial",
387
  "meets_threshold": true,
388
+ "raw_confidence": 0.9582
389
  },
390
  "overall_strategy": "min_required_component_confidence"
391
  },
392
+ "confidence": 0.8012,
393
  "decision_phase": "decision",
394
  "subtype": "provider_selection",
395
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
 
428
  "response": {
429
  "meta": {
430
  "calibration_enabled": true,
431
+ "iab_mapping_is_placeholder": false,
432
  "system_version": "0.6.0-phase4"
433
  },
434
  "model_output": {
435
  "classification": {
436
  "iab_content": {
437
+ "mapping_confidence": 0.2259,
438
  "mapping_mode": "nearest_equivalent",
439
  "taxonomy": "IAB Content Taxonomy",
440
  "taxonomy_version": "3.0",
441
  "tier1": {
442
+ "id": "483",
443
+ "label": "Sports"
 
 
 
 
 
 
 
 
444
  }
445
  },
446
  "intent": {
 
448
  "component_confidence": {
449
  "decision_phase": {
450
  "calibrated": true,
451
+ "confidence": 0.9176,
452
  "confidence_threshold": 0.22,
453
  "label": "action",
454
  "meets_threshold": true,
455
+ "raw_confidence": 0.9176
456
  },
457
  "intent_subtype": {
458
  "calibrated": true,
459
+ "confidence": 0.9675,
460
  "confidence_threshold": 0.25,
461
  "label": "signup",
462
  "meets_threshold": true,
463
+ "raw_confidence": 0.9135
464
  },
465
  "intent_type": {
466
  "calibrated": true,
467
+ "confidence": 0.9416,
468
  "confidence_threshold": 0.4,
469
  "label": "transactional",
470
  "meets_threshold": true,
471
+ "raw_confidence": 0.909
472
  },
473
  "overall_strategy": "min_required_component_confidence"
474
  },
475
+ "confidence": 0.9176,
476
  "decision_phase": "action",
477
  "subtype": "signup",
478
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
511
  "response": {
512
  "meta": {
513
  "calibration_enabled": true,
514
+ "iab_mapping_is_placeholder": false,
515
  "system_version": "0.6.0-phase4"
516
  },
517
  "model_output": {
518
  "classification": {
519
  "iab_content": {
520
+ "mapping_confidence": 0.5455,
521
+ "mapping_mode": "exact",
522
  "taxonomy": "IAB Content Taxonomy",
523
  "taxonomy_version": "3.0",
524
  "tier1": {
 
535
  "component_confidence": {
536
  "decision_phase": {
537
  "calibrated": true,
538
+ "confidence": 0.9628,
539
  "confidence_threshold": 0.22,
540
  "label": "action",
541
  "meets_threshold": true,
542
+ "raw_confidence": 0.9628
543
  },
544
  "intent_subtype": {
545
  "calibrated": true,
546
+ "confidence": 0.7841,
547
  "confidence_threshold": 0.25,
548
  "label": "booking",
549
  "meets_threshold": true,
550
+ "raw_confidence": 0.6676
551
  },
552
  "intent_type": {
553
  "calibrated": true,
554
+ "confidence": 0.9761,
555
  "confidence_threshold": 0.4,
556
  "label": "transactional",
557
  "meets_threshold": true,
558
+ "raw_confidence": 0.9583
559
  },
560
  "overall_strategy": "min_required_component_confidence"
561
  },
562
+ "confidence": 0.7841,
563
  "decision_phase": "action",
564
  "subtype": "booking",
565
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
 
598
  "response": {
599
  "meta": {
600
  "calibration_enabled": true,
601
+ "iab_mapping_is_placeholder": false,
602
  "system_version": "0.6.0-phase4"
603
  },
604
  "model_output": {
605
  "classification": {
606
  "iab_content": {
607
+ "mapping_confidence": 0.3927,
608
  "mapping_mode": "nearest_equivalent",
609
  "taxonomy": "IAB Content Taxonomy",
610
  "taxonomy_version": "3.0",
611
  "tier1": {
612
  "id": "596",
613
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
 
 
 
 
614
  }
615
  },
616
  "intent": {
 
618
  "component_confidence": {
619
  "decision_phase": {
620
  "calibrated": true,
621
+ "confidence": 0.9539,
622
  "confidence_threshold": 0.22,
623
  "label": "post_purchase",
624
  "meets_threshold": true,
625
+ "raw_confidence": 0.9539
626
  },
627
  "intent_subtype": {
628
  "calibrated": true,
629
+ "confidence": 0.9652,
630
  "confidence_threshold": 0.25,
631
  "label": "onboarding_setup",
632
  "meets_threshold": true,
633
+ "raw_confidence": 0.9053
634
  },
635
  "intent_type": {
636
  "calibrated": true,
637
+ "confidence": 0.7786,
638
  "confidence_threshold": 0.4,
639
  "label": "transactional",
640
  "meets_threshold": true,
641
+ "raw_confidence": 0.7173
642
  },
643
  "overall_strategy": "min_required_component_confidence"
644
  },
645
+ "confidence": 0.7786,
646
  "decision_phase": "post_purchase",
647
  "subtype": "onboarding_setup",
648
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
 
681
  "response": {
682
  "meta": {
683
  "calibration_enabled": true,
684
+ "iab_mapping_is_placeholder": false,
685
  "system_version": "0.6.0-phase4"
686
  },
687
  "model_output": {
688
  "classification": {
689
  "iab_content": {
690
+ "mapping_confidence": 0.2935,
691
  "mapping_mode": "nearest_equivalent",
692
  "taxonomy": "IAB Content Taxonomy",
693
  "taxonomy_version": "3.0",
694
  "tier1": {
695
+ "id": "52",
696
+ "label": "Business and Finance"
 
 
 
 
 
 
 
 
697
  }
698
  },
699
  "intent": {
 
701
  "component_confidence": {
702
  "decision_phase": {
703
  "calibrated": true,
704
+ "confidence": 0.9528,
705
  "confidence_threshold": 0.22,
706
  "label": "support",
707
  "meets_threshold": true,
708
+ "raw_confidence": 0.9528
709
  },
710
  "intent_subtype": {
711
  "calibrated": true,
712
+ "confidence": 0.894,
713
  "confidence_threshold": 0.25,
714
  "label": "account_help",
715
  "meets_threshold": true,
716
+ "raw_confidence": 0.8027
717
  },
718
  "intent_type": {
719
  "calibrated": true,
720
+ "confidence": 0.9636,
721
  "confidence_threshold": 0.4,
722
  "label": "support",
723
  "meets_threshold": true,
724
+ "raw_confidence": 0.9403
725
  },
726
  "overall_strategy": "min_required_component_confidence"
727
  },
728
+ "confidence": 0.894,
729
  "decision_phase": "support",
730
  "subtype": "account_help",
731
  "summary": "Classified as support intent with subtype account_help in the support phase.",
 
770
  "response": {
771
  "meta": {
772
  "calibration_enabled": true,
773
+ "iab_mapping_is_placeholder": false,
774
  "system_version": "0.6.0-phase4"
775
  },
776
  "model_output": {
777
  "classification": {
778
  "iab_content": {
779
+ "mapping_confidence": 0.1521,
780
  "mapping_mode": "nearest_equivalent",
781
  "taxonomy": "IAB Content Taxonomy",
782
  "taxonomy_version": "3.0",
783
  "tier1": {
784
+ "id": "v9i3On",
785
+ "label": "Sensitive Topics"
 
 
 
 
786
  }
787
  },
788
  "intent": {
 
790
  "component_confidence": {
791
  "decision_phase": {
792
  "calibrated": true,
793
+ "confidence": 0.843,
794
  "confidence_threshold": 0.22,
795
  "label": "awareness",
796
  "meets_threshold": true,
797
+ "raw_confidence": 0.843
798
  },
799
  "intent_subtype": {
800
  "calibrated": true,
801
+ "confidence": 0.9678,
802
  "confidence_threshold": 0.25,
803
  "label": "emotional_reflection",
804
  "meets_threshold": true,
805
+ "raw_confidence": 0.9123
806
  },
807
  "intent_type": {
808
  "calibrated": true,
809
+ "confidence": 0.929,
810
  "confidence_threshold": 0.4,
811
  "label": "personal_reflection",
812
  "meets_threshold": true,
813
+ "raw_confidence": 0.8937
814
  },
815
  "overall_strategy": "min_required_component_confidence"
816
  },
817
+ "confidence": 0.843,
818
  "decision_phase": "awareness",
819
  "subtype": "emotional_reflection",
820
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
 
859
  "response": {
860
  "meta": {
861
  "calibration_enabled": true,
862
+ "iab_mapping_is_placeholder": false,
863
  "system_version": "0.6.0-phase4"
864
  },
865
  "model_output": {
866
  "classification": {
867
  "iab_content": {
868
+ "mapping_confidence": 0.0846,
869
  "mapping_mode": "nearest_equivalent",
870
  "taxonomy": "IAB Content Taxonomy",
871
  "taxonomy_version": "3.0",
872
  "tier1": {
873
+ "id": "42",
874
+ "label": "Books and Literature"
875
  }
876
  },
877
  "intent": {
 
879
  "component_confidence": {
880
  "decision_phase": {
881
  "calibrated": true,
882
+ "confidence": 0.8336,
883
  "confidence_threshold": 0.22,
884
  "label": "research",
885
  "meets_threshold": true,
886
+ "raw_confidence": 0.8336
887
  },
888
  "intent_subtype": {
889
  "calibrated": true,
890
+ "confidence": 0.9685,
891
  "confidence_threshold": 0.25,
892
  "label": "follow_up",
893
  "meets_threshold": true,
894
+ "raw_confidence": 0.9121
895
  },
896
  "intent_type": {
897
  "calibrated": true,
898
+ "confidence": 0.9583,
899
  "confidence_threshold": 0.4,
900
  "label": "ambiguous",
901
  "meets_threshold": true,
902
+ "raw_confidence": 0.9339
903
  },
904
  "overall_strategy": "min_required_component_confidence"
905
  },
906
+ "confidence": 0.8336,
907
  "decision_phase": "research",
908
  "subtype": "follow_up",
909
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
948
  "response": {
949
  "meta": {
950
  "calibration_enabled": true,
951
+ "iab_mapping_is_placeholder": false,
952
  "system_version": "0.6.0-phase4"
953
  },
954
  "model_output": {
955
  "classification": {
956
  "iab_content": {
957
+ "mapping_confidence": 0.2223,
958
  "mapping_mode": "nearest_equivalent",
959
  "taxonomy": "IAB Content Taxonomy",
960
  "taxonomy_version": "3.0",
961
  "tier1": {
962
+ "id": "391",
963
+ "label": "Personal Finance"
 
 
 
 
964
  }
965
  },
966
  "intent": {
 
968
  "component_confidence": {
969
  "decision_phase": {
970
  "calibrated": true,
971
+ "confidence": 0.9337,
972
  "confidence_threshold": 0.22,
973
  "label": "research",
974
  "meets_threshold": true,
975
+ "raw_confidence": 0.9337
976
  },
977
  "intent_subtype": {
978
  "calibrated": true,
979
+ "confidence": 0.9493,
980
  "confidence_threshold": 0.25,
981
  "label": "follow_up",
982
  "meets_threshold": true,
983
+ "raw_confidence": 0.875
984
  },
985
  "intent_type": {
986
  "calibrated": true,
987
+ "confidence": 0.9454,
988
  "confidence_threshold": 0.4,
989
  "label": "ambiguous",
990
  "meets_threshold": true,
991
+ "raw_confidence": 0.9149
992
  },
993
  "overall_strategy": "min_required_component_confidence"
994
  },
995
+ "confidence": 0.9337,
996
  "decision_phase": "research",
997
  "subtype": "follow_up",
998
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
1037
  "response": {
1038
  "meta": {
1039
  "calibration_enabled": true,
1040
+ "iab_mapping_is_placeholder": false,
1041
  "system_version": "0.6.0-phase4"
1042
  },
1043
  "model_output": {
1044
  "classification": {
1045
  "iab_content": {
1046
+ "mapping_confidence": 0.1233,
1047
+ "mapping_mode": "exact",
1048
  "taxonomy": "IAB Content Taxonomy",
1049
  "taxonomy_version": "3.0",
1050
  "tier1": {
1051
+ "id": "239",
1052
+ "label": "Hobbies & Interests"
1053
  },
1054
  "tier2": {
1055
+ "id": "264",
1056
+ "label": "Content Production"
 
 
 
 
 
 
 
 
1057
  }
1058
  },
1059
  "intent": {
 
1061
  "component_confidence": {
1062
  "decision_phase": {
1063
  "calibrated": true,
1064
+ "confidence": 0.9632,
1065
  "confidence_threshold": 0.22,
1066
  "label": "action",
1067
  "meets_threshold": true,
1068
+ "raw_confidence": 0.9632
1069
  },
1070
  "intent_subtype": {
1071
  "calibrated": true,
1072
+ "confidence": 0.9349,
1073
  "confidence_threshold": 0.25,
1074
  "label": "signup",
1075
  "meets_threshold": true,
1076
+ "raw_confidence": 0.8563
1077
  },
1078
  "intent_type": {
1079
  "calibrated": true,
1080
+ "confidence": 0.9378,
1081
  "confidence_threshold": 0.4,
1082
  "label": "transactional",
1083
  "meets_threshold": true,
1084
+ "raw_confidence": 0.9047
1085
  },
1086
  "overall_strategy": "min_required_component_confidence"
1087
  },
1088
+ "confidence": 0.9349,
1089
  "decision_phase": "action",
1090
  "subtype": "signup",
1091
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
1124
  "response": {
1125
  "meta": {
1126
  "calibration_enabled": true,
1127
+ "iab_mapping_is_placeholder": false,
1128
  "system_version": "0.6.0-phase4"
1129
  },
1130
  "model_output": {
1131
  "classification": {
1132
  "iab_content": {
1133
+ "mapping_confidence": 0.2185,
1134
  "mapping_mode": "nearest_equivalent",
1135
  "taxonomy": "IAB Content Taxonomy",
1136
  "taxonomy_version": "3.0",
1137
  "tier1": {
1138
+ "id": "52",
1139
+ "label": "Business and Finance"
 
 
 
 
 
 
 
 
 
 
 
 
1140
  }
1141
  },
1142
  "intent": {
 
1144
  "component_confidence": {
1145
  "decision_phase": {
1146
  "calibrated": true,
1147
+ "confidence": 0.9572,
1148
  "confidence_threshold": 0.22,
1149
  "label": "consideration",
1150
  "meets_threshold": true,
1151
+ "raw_confidence": 0.9572
1152
  },
1153
  "intent_subtype": {
1154
  "calibrated": true,
1155
+ "confidence": 0.9432,
1156
  "confidence_threshold": 0.25,
1157
  "label": "comparison",
1158
  "meets_threshold": true,
1159
+ "raw_confidence": 0.8708
1160
  },
1161
  "intent_type": {
1162
  "calibrated": true,
1163
+ "confidence": 0.9622,
1164
  "confidence_threshold": 0.4,
1165
  "label": "commercial",
1166
  "meets_threshold": true,
1167
+ "raw_confidence": 0.9374
1168
  },
1169
  "overall_strategy": "min_required_component_confidence"
1170
  },
1171
+ "confidence": 0.9432,
1172
  "decision_phase": "consideration",
1173
  "subtype": "comparison",
1174
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
1207
  "response": {
1208
  "meta": {
1209
  "calibration_enabled": true,
1210
+ "iab_mapping_is_placeholder": false,
1211
  "system_version": "0.6.0-phase4"
1212
  },
1213
  "model_output": {
1214
  "classification": {
1215
  "iab_content": {
1216
+ "mapping_confidence": 0.1513,
1217
+ "mapping_mode": "exact",
1218
  "taxonomy": "IAB Content Taxonomy",
1219
  "taxonomy_version": "3.0",
1220
  "tier1": {
 
1226
  "label": "Computing"
1227
  },
1228
  "tier3": {
1229
+ "id": "618",
1230
+ "label": "Information and Network Security"
 
 
 
 
1231
  }
1232
  },
1233
  "intent": {
 
1235
  "component_confidence": {
1236
  "decision_phase": {
1237
  "calibrated": true,
1238
+ "confidence": 0.9609,
1239
  "confidence_threshold": 0.22,
1240
  "label": "awareness",
1241
  "meets_threshold": true,
1242
+ "raw_confidence": 0.961
1243
  },
1244
  "intent_subtype": {
1245
  "calibrated": true,
1246
+ "confidence": 0.9867,
1247
  "confidence_threshold": 0.25,
1248
  "label": "education",
1249
  "meets_threshold": true,
1250
+ "raw_confidence": 0.9556
1251
  },
1252
  "intent_type": {
1253
  "calibrated": true,
1254
+ "confidence": 0.975,
1255
  "confidence_threshold": 0.4,
1256
  "label": "informational",
1257
  "meets_threshold": true,
1258
+ "raw_confidence": 0.9567
1259
  },
1260
  "overall_strategy": "min_required_component_confidence"
1261
  },
1262
+ "confidence": 0.9609,
1263
  "decision_phase": "awareness",
1264
  "subtype": "education",
1265
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,14,1,0,0,0,0,0
3
- research,0,14,0,0,0,1,0
4
  consideration,0,1,14,0,0,0,0
5
  decision,0,0,0,15,0,0,0
6
- action,0,0,0,1,13,1,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,14,1,0,0,0,0,0
3
+ research,0,15,0,0,0,0,0
4
  consideration,0,1,14,0,0,0,0
5
  decision,0,0,0,15,0,0,0
6
+ action,0,0,0,0,15,0,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "accepted_accuracy": 0.9524,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9524,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 0.9714,
@@ -15,12 +15,12 @@
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.8857,
19
  "accepted_coverage": 1.0,
20
- "accuracy": 0.8857,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
- "macro_f1": 0.883
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
@@ -33,13 +33,13 @@
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
- "macro_f1": 0.9526,
37
  "per_class_metrics": {
38
- "accuracy": 0.9523809523809523,
39
  "action": {
40
- "f1-score": 0.9285714285714286,
41
  "precision": 1.0,
42
- "recall": 0.8666666666666667,
43
  "support": 15.0
44
  },
45
  "awareness": {
@@ -55,27 +55,27 @@
55
  "support": 15.0
56
  },
57
  "decision": {
58
- "f1-score": 0.967741935483871,
59
- "precision": 0.9375,
60
  "recall": 1.0,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
- "f1-score": 0.9525819504665047,
65
- "precision": 0.9564075630252101,
66
- "recall": 0.9523809523809523,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
70
- "f1-score": 0.9375,
71
- "precision": 0.8823529411764706,
72
  "recall": 1.0,
73
  "support": 15.0
74
  },
75
  "research": {
76
- "f1-score": 0.9032258064516129,
77
- "precision": 0.875,
78
- "recall": 0.9333333333333333,
79
  "support": 15.0
80
  },
81
  "support": {
@@ -85,9 +85,9 @@
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
- "f1-score": 0.9525819504665048,
89
- "precision": 0.9564075630252101,
90
- "recall": 0.9523809523809523,
91
  "support": 105.0
92
  }
93
  },
 
1
  {
2
+ "accepted_accuracy": 0.981,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.981,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 0.9714,
 
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.9714,
19
  "accepted_coverage": 1.0,
20
+ "accuracy": 0.9714,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
+ "macro_f1": 0.9711
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
 
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
+ "macro_f1": 0.9812,
37
  "per_class_metrics": {
38
+ "accuracy": 0.9809523809523809,
39
  "action": {
40
+ "f1-score": 1.0,
41
  "precision": 1.0,
42
+ "recall": 1.0,
43
  "support": 15.0
44
  },
45
  "awareness": {
 
55
  "support": 15.0
56
  },
57
  "decision": {
58
+ "f1-score": 1.0,
59
+ "precision": 1.0,
60
  "recall": 1.0,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
+ "f1-score": 0.9812192118226601,
65
+ "precision": 0.9831932773109244,
66
+ "recall": 0.980952380952381,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
70
+ "f1-score": 1.0,
71
+ "precision": 1.0,
72
  "recall": 1.0,
73
  "support": 15.0
74
  },
75
  "research": {
76
+ "f1-score": 0.9375,
77
+ "precision": 0.8823529411764706,
78
+ "recall": 1.0,
79
  "support": 15.0
80
  },
81
  "support": {
 
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
+ "f1-score": 0.9812192118226601,
89
+ "precision": 0.9831932773109243,
90
+ "recall": 0.9809523809523809,
91
  "support": 105.0
92
  }
93
  },
artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv CHANGED
@@ -1,7 +1,7 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
- research,1,3,0,0,0,0,0
4
- consideration,0,0,5,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,4,0
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
+ research,2,2,0,0,0,0,0
4
+ consideration,0,1,4,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,4,0
artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.963,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.963,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv",
6
  "count": 27,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.961,
11
  "per_class_metrics": {
12
- "accuracy": 0.9629629629629629,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
@@ -17,15 +17,15 @@
17
  "support": 0.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.9090909090909091,
21
- "precision": 0.8333333333333334,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
25
  "consideration": {
26
- "f1-score": 1.0,
27
  "precision": 1.0,
28
- "recall": 1.0,
29
  "support": 5.0
30
  },
31
  "decision": {
@@ -35,9 +35,9 @@
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.823747680890538,
39
- "precision": 0.8333333333333334,
40
- "recall": 0.8214285714285714,
41
  "support": 27.0
42
  },
43
  "post_purchase": {
@@ -47,9 +47,9 @@
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.8571428571428571,
51
- "precision": 1.0,
52
- "recall": 0.75,
53
  "support": 4.0
54
  },
55
  "support": {
@@ -59,9 +59,9 @@
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.962000962000962,
63
- "precision": 0.9691358024691359,
64
- "recall": 0.9629629629629629,
65
  "support": 27.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.8889,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8889,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv",
6
  "count": 27,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.8823,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8888888888888888,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
 
17
  "support": 0.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.8333333333333334,
21
+ "precision": 0.7142857142857143,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.8888888888888888,
27
  "precision": 1.0,
28
+ "recall": 0.8,
29
  "support": 5.0
30
  },
31
  "decision": {
 
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.7562358276643991,
39
+ "precision": 0.7687074829931974,
40
+ "recall": 0.7571428571428571,
41
  "support": 27.0
42
  },
43
  "post_purchase": {
 
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.5714285714285714,
51
+ "precision": 0.6666666666666666,
52
+ "recall": 0.5,
53
  "support": 4.0
54
  },
55
  "support": {
 
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8850676072898296,
63
+ "precision": 0.8977072310405644,
64
+ "recall": 0.8888888888888888,
65
  "support": 27.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv CHANGED
@@ -1,7 +1,7 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,6,0,0,0,0,0,0
3
  research,2,5,0,0,0,0,0
4
- consideration,0,1,6,0,0,0,0
5
  decision,0,0,0,7,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,6,0
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,6,0,0,0,0,0,0
3
  research,2,5,0,0,0,0,0
4
+ consideration,0,2,5,0,0,0,0
5
  decision,0,0,0,7,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,6,0
artifacts/evaluation/latest/decision_phase_hard_cases_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9231,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9231,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv",
6
  "count": 39,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.9249,
11
  "per_class_metrics": {
12
- "accuracy": 0.9230769230769231,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
@@ -23,9 +23,9 @@
23
  "support": 6.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.9230769230769231,
27
  "precision": 1.0,
28
- "recall": 0.8571428571428571,
29
  "support": 7.0
30
  },
31
  "decision": {
@@ -35,9 +35,9 @@
35
  "support": 7.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.792778649921507,
39
- "precision": 0.7976190476190477,
40
- "recall": 0.7959183673469388,
41
  "support": 39.0
42
  },
43
  "post_purchase": {
@@ -47,8 +47,8 @@
47
  "support": 6.0
48
  },
49
  "research": {
50
- "f1-score": 0.7692307692307693,
51
- "precision": 0.8333333333333334,
52
  "recall": 0.7142857142857143,
53
  "support": 7.0
54
  },
@@ -59,9 +59,9 @@
59
  "support": 6.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.9227951535643845,
63
- "precision": 0.9316239316239316,
64
- "recall": 0.9230769230769231,
65
  "support": 39.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.8974,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8974,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_hard_cases_confusion_matrix.csv",
6
  "count": 39,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.9008,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8974358974358975,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
 
23
  "support": 6.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.8333333333333334,
27
  "precision": 1.0,
28
+ "recall": 0.7142857142857143,
29
  "support": 7.0
30
  },
31
  "decision": {
 
35
  "support": 7.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.772108843537415,
39
+ "precision": 0.7806122448979592,
40
+ "recall": 0.7755102040816327,
41
  "support": 39.0
42
  },
43
  "post_purchase": {
 
47
  "support": 6.0
48
  },
49
  "research": {
50
+ "f1-score": 0.7142857142857143,
51
+ "precision": 0.7142857142857143,
52
  "recall": 0.7142857142857143,
53
  "support": 7.0
54
  },
 
59
  "support": 6.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8968253968253967,
63
+ "precision": 0.9102564102564102,
64
+ "recall": 0.8974358974358975,
65
  "support": 39.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,3,0,0,0,0,0,0
3
  research,3,2,0,0,0,0,0
4
- consideration,0,1,4,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
- support,0,0,0,0,0,0,4
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,3,0,0,0,0,0,0
3
  research,3,2,0,0,0,0,0
4
+ consideration,0,2,3,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
+ support,0,0,0,0,0,1,3
artifacts/evaluation/latest/decision_phase_test_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.8621,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8621,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv",
6
  "count": 29,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8651,
11
  "per_class_metrics": {
12
- "accuracy": 0.8620689655172413,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -23,9 +23,9 @@
23
  "support": 3.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.8888888888888888,
27
  "precision": 1.0,
28
- "recall": 0.8,
29
  "support": 5.0
30
  },
31
  "decision": {
@@ -35,33 +35,33 @@
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.865079365079365,
39
- "precision": 0.8809523809523808,
40
- "recall": 0.8857142857142858,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 1.0,
45
- "precision": 1.0,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.5,
51
- "precision": 0.6666666666666666,
52
  "recall": 0.4,
53
  "support": 5.0
54
  },
55
  "support": {
56
- "f1-score": 1.0,
57
  "precision": 1.0,
58
- "recall": 1.0,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8601532567049808,
63
- "precision": 0.8908045977011494,
64
- "recall": 0.8620689655172413,
65
  "support": 29.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.7931,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.7931,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv",
6
  "count": 29,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.801,
11
  "per_class_metrics": {
12
+ "accuracy": 0.7931034482758621,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
23
  "support": 3.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.75,
27
  "precision": 1.0,
28
+ "recall": 0.6,
29
  "support": 5.0
30
  },
31
  "decision": {
 
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.8010204081632653,
39
+ "precision": 0.8285714285714285,
40
+ "recall": 0.8214285714285714,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 0.8888888888888888,
45
+ "precision": 0.8,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.4444444444444444,
51
+ "precision": 0.5,
52
  "recall": 0.4,
53
  "support": 5.0
54
  },
55
  "support": {
56
+ "f1-score": 0.8571428571428571,
57
  "precision": 1.0,
58
+ "recall": 0.75,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.7915982484948002,
63
+ "precision": 0.8344827586206897,
64
+ "recall": 0.7931034482758621,
65
  "support": 29.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
- research,1,14,0,0,0,0,0
4
- consideration,0,0,17,0,0,0,0
5
- decision,0,0,0,16,0,0,0
6
  action,0,0,0,0,10,0,0
7
- post_purchase,0,0,0,0,0,14,0
8
  support,0,0,0,0,0,0,14
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
+ research,1,13,1,0,0,0,0
4
+ consideration,0,3,14,0,0,0,0
5
+ decision,0,0,1,15,0,0,0
6
  action,0,0,0,0,10,0,0
7
+ post_purchase,0,1,0,0,0,13,0
8
  support,0,0,0,0,0,0,14
artifacts/evaluation/latest/decision_phase_train_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9902,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9902,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.9907,
11
  "per_class_metrics": {
12
- "accuracy": 0.9901960784313726,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -23,33 +23,33 @@
23
  "support": 16.0
24
  },
25
  "consideration": {
26
- "f1-score": 1.0,
27
- "precision": 1.0,
28
- "recall": 1.0,
29
  "support": 17.0
30
  },
31
  "decision": {
32
- "f1-score": 1.0,
33
  "precision": 1.0,
34
- "recall": 1.0,
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.9907448872966115,
39
- "precision": 0.9915966386554622,
40
- "recall": 0.9904761904761905,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 1.0,
45
  "precision": 1.0,
46
- "recall": 1.0,
47
  "support": 14.0
48
  },
49
  "research": {
50
- "f1-score": 0.9655172413793104,
51
- "precision": 1.0,
52
- "recall": 0.9333333333333333,
53
  "support": 15.0
54
  },
55
  "support": {
@@ -59,9 +59,9 @@
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.9901755895670704,
63
- "precision": 0.9907727797001153,
64
- "recall": 0.9901960784313726,
65
  "support": 102.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.9314,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9314,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.9373,
11
  "per_class_metrics": {
12
+ "accuracy": 0.9313725490196079,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
23
  "support": 16.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.8484848484848485,
27
+ "precision": 0.875,
28
+ "recall": 0.8235294117647058,
29
  "support": 17.0
30
  },
31
  "decision": {
32
+ "f1-score": 0.967741935483871,
33
  "precision": 1.0,
34
+ "recall": 0.9375,
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.9373409595183789,
39
+ "precision": 0.9401260504201681,
40
+ "recall": 0.9366096438575431,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 0.9629629629629629,
45
  "precision": 1.0,
46
+ "recall": 0.9285714285714286,
47
  "support": 14.0
48
  },
49
  "research": {
50
+ "f1-score": 0.8125,
51
+ "precision": 0.7647058823529411,
52
+ "recall": 0.8666666666666667,
53
  "support": 15.0
54
  },
55
  "support": {
 
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.9322769253786015,
63
+ "precision": 0.9353373702422145,
64
+ "recall": 0.9313725490196079,
65
  "support": 102.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv CHANGED
@@ -1,6 +1,6 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
- research,1,3,0,0,0,0,0
4
  consideration,0,0,5,0,0,0,0
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
+ research,2,2,0,0,0,0,0
4
  consideration,0,0,5,0,0,0,0
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
artifacts/evaluation/latest/decision_phase_val_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.8966,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8966,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv",
6
  "count": 29,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8975,
11
  "per_class_metrics": {
12
- "accuracy": 0.896551724137931,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -17,8 +17,8 @@
17
  "support": 3.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.9090909090909091,
21
- "precision": 0.8333333333333334,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
@@ -35,9 +35,9 @@
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.8974953617810761,
39
- "precision": 0.9166666666666667,
40
- "recall": 0.8928571428571429,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
@@ -47,9 +47,9 @@
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.75,
51
- "precision": 0.75,
52
- "recall": 0.75,
53
  "support": 4.0
54
  },
55
  "support": {
@@ -59,9 +59,9 @@
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8947604120017911,
63
- "precision": 0.9080459770114944,
64
- "recall": 0.896551724137931,
65
  "support": 29.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.8621,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8621,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv",
6
  "count": 29,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.8612,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8620689655172413,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
17
  "support": 3.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.8333333333333334,
21
+ "precision": 0.7142857142857143,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
 
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.8611626468769326,
39
+ "precision": 0.8877551020408163,
40
+ "recall": 0.8571428571428571,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
 
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.5714285714285714,
51
+ "precision": 0.6666666666666666,
52
+ "recall": 0.5,
53
  "support": 4.0
54
  },
55
  "support": {
 
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8570682191371846,
63
+ "precision": 0.8760262725779967,
64
+ "recall": 0.8620689655172413,
65
  "support": 29.0
66
  }
67
  },
artifacts/evaluation/latest/iab_behavior_lock_regression.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 0,
5
- "passed": 12,
6
  "total": 12
7
  }
8
  },
9
- "cases_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/iab_behavior_lock_cases.json",
10
  "count": 12,
11
- "failed": 0,
12
- "passed": 12,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
- "model_output.classification.iab_content.tier2.label": "Auto Type"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -23,15 +23,21 @@
23
  "model_output.classification.iab_content.tier2.label": "Auto Type"
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
- "mismatches": [],
 
 
 
 
 
 
27
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
28
- "pass": true,
29
  "status": "must_fix",
30
  "text": "Which car to buy in 2026"
31
  },
32
  {
33
  "actual": {
34
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
35
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
36
  "model_output.classification.iab_content.tier2.label": "Computing",
37
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -43,9 +49,15 @@
43
  "model_output.classification.iab_content.tier3.label": "Laptops"
44
  },
45
  "id": "laptop-buying-maps-to-laptops",
46
- "mismatches": [],
 
 
 
 
 
 
47
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
48
- "pass": true,
49
  "status": "must_fix",
50
  "text": "Which laptop to buy in 2026"
51
  },
@@ -53,8 +65,8 @@
53
  "actual": {
54
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
55
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
56
- "model_output.classification.iab_content.tier2.label": "Computing",
57
- "model_output.classification.iab_content.tier3.label": "Laptops"
58
  },
59
  "expected": {
60
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -63,9 +75,20 @@
63
  "model_output.classification.iab_content.tier3.label": "Laptops"
64
  },
65
  "id": "labtop-buying-maps-to-laptops",
66
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
67
  "notes": "Common typo handling should still land in the laptops branch.",
68
- "pass": true,
69
  "status": "must_fix",
70
  "text": "Which labtop to buy in 2026"
71
  },
@@ -74,7 +97,7 @@
74
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
75
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
76
  "model_output.classification.iab_content.tier2.label": "Computing",
77
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
78
  },
79
  "expected": {
80
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -83,9 +106,15 @@
83
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
84
  },
85
  "id": "crm-awareness-maps-to-sales",
86
- "mismatches": [],
 
 
 
 
 
 
87
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
88
- "pass": true,
89
  "status": "must_fix",
90
  "text": "What is CRM software?"
91
  },
@@ -93,8 +122,8 @@
93
  "actual": {
94
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
95
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
96
- "model_output.classification.iab_content.tier2.label": "Computing",
97
- "model_output.classification.iab_content.tier3.label": "Internet"
98
  },
99
  "expected": {
100
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -103,18 +132,29 @@
103
  "model_output.classification.iab_content.tier3.label": "Internet"
104
  },
105
  "id": "crm-comparison-maps-to-sales",
106
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
107
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
108
- "pass": true,
109
  "status": "must_fix",
110
  "text": "HubSpot vs Zoho for a small team"
111
  },
112
  {
113
  "actual": {
114
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
115
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
116
- "model_output.classification.iab_content.tier2.label": "Computing",
117
- "model_output.classification.iab_content.tier3.label": "Internet"
118
  },
119
  "expected": {
120
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -123,25 +163,47 @@
123
  "model_output.classification.iab_content.tier3.label": "Internet"
124
  },
125
  "id": "marketing-tools-map-to-marketing",
126
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
128
- "pass": true,
129
  "status": "must_fix",
130
  "text": "Best AI SEO tools for content teams"
131
  },
132
  {
133
  "actual": {
134
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
135
- "model_output.classification.iab_content.tier1.label": "Technology & Computing"
136
  },
137
  "expected": {
138
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
139
  "model_output.classification.iab_content.tier1.label": "Technology & Computing"
140
  },
141
  "id": "ml-explanation-maps-to-ai",
142
- "mismatches": [],
 
 
 
 
 
 
143
  "notes": "ML and NLP educational prompts should land in the AI branch.",
144
- "pass": true,
145
  "status": "must_fix",
146
  "text": "What is intent classification in NLP?"
147
  },
@@ -150,7 +212,7 @@
150
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
151
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
152
  "model_output.classification.iab_content.tier2.label": "Computing",
153
- "model_output.classification.iab_content.tier3.label": "Internet"
154
  },
155
  "expected": {
156
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -159,15 +221,21 @@
159
  "model_output.classification.iab_content.tier3.label": "Internet"
160
  },
161
  "id": "support-credential-help-maps-to-business-it",
162
- "mismatches": [],
 
 
 
 
 
 
163
  "notes": "Credential and account help should map to business IT rather than generic business.",
164
- "pass": true,
165
  "status": "must_fix",
166
  "text": "How do I reset my password?"
167
  },
168
  {
169
  "actual": {
170
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
171
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
172
  "model_output.classification.iab_content.tier2.label": "Dining Out"
173
  },
@@ -177,18 +245,24 @@
177
  "model_output.classification.iab_content.tier2.label": "Dining Out"
178
  },
179
  "id": "restaurant-booking-maps-to-dining-out",
180
- "mismatches": [],
 
 
 
 
 
 
181
  "notes": "Generic dining requests should not inherit the repo's business default.",
182
- "pass": true,
183
  "status": "must_fix",
184
  "text": "Book a table for 2 tonight"
185
  },
186
  {
187
  "actual": {
188
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
189
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
190
- "model_output.classification.iab_content.tier2.label": "Content Production",
191
- "model_output.classification.iab_content.tier3.label": "Freelance Writing"
192
  },
193
  "expected": {
194
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -197,19 +271,35 @@
197
  "model_output.classification.iab_content.tier3.label": "Freelance Writing"
198
  },
199
  "id": "trial-signup-maps-to-software",
200
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  "notes": "Software action queries should map to the software/application branch.",
202
- "pass": true,
203
  "status": "must_fix",
204
  "text": "Start my free trial"
205
  },
206
  {
207
  "actual": {
208
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
209
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
210
- "model_output.classification.iab_content.tier2.label": "Computing",
211
- "model_output.classification.iab_content.tier3.label": "Software and Applications",
212
- "model_output.classification.iab_content.tier4.label": "Communication"
213
  },
214
  "expected": {
215
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -219,15 +309,41 @@
219
  "model_output.classification.iab_content.tier4.label": "Communication"
220
  },
221
  "id": "communication-software-maps-to-tier4",
222
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
224
- "pass": true,
225
  "status": "must_fix",
226
  "text": "best communication software for remote teams"
227
  },
228
  {
229
  "actual": {
230
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
231
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
232
  },
233
  "expected": {
@@ -235,9 +351,15 @@
235
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
236
  },
237
  "id": "vodka-query-maps-to-alcoholic-beverages",
238
- "mismatches": [],
 
 
 
 
 
 
239
  "notes": "Food and beverage prompts should not fall through to the business default.",
240
- "pass": true,
241
  "status": "must_fix",
242
  "text": "what is best vodka drink should i try"
243
  }
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 12,
5
+ "passed": 0,
6
  "total": 12
7
  }
8
  },
9
+ "cases_path": "/content/agentic-intent-classifier/examples/iab_behavior_lock_cases.json",
10
  "count": 12,
11
+ "failed": 12,
12
+ "passed": 0,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
+ "model_output.classification.iab_content.tier2.label": null
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
23
  "model_output.classification.iab_content.tier2.label": "Auto Type"
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
+ "mismatches": [
27
+ {
28
+ "actual": null,
29
+ "expected": "Auto Type",
30
+ "path": "model_output.classification.iab_content.tier2.label"
31
+ }
32
+ ],
33
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
34
+ "pass": false,
35
  "status": "must_fix",
36
  "text": "Which car to buy in 2026"
37
  },
38
  {
39
  "actual": {
40
+ "model_output.classification.iab_content.mapping_mode": "exact",
41
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
42
  "model_output.classification.iab_content.tier2.label": "Computing",
43
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
49
  "model_output.classification.iab_content.tier3.label": "Laptops"
50
  },
51
  "id": "laptop-buying-maps-to-laptops",
52
+ "mismatches": [
53
+ {
54
+ "actual": "exact",
55
+ "expected": "nearest_equivalent",
56
+ "path": "model_output.classification.iab_content.mapping_mode"
57
+ }
58
+ ],
59
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
60
+ "pass": false,
61
  "status": "must_fix",
62
  "text": "Which laptop to buy in 2026"
63
  },
 
65
  "actual": {
66
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
67
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
68
+ "model_output.classification.iab_content.tier2.label": null,
69
+ "model_output.classification.iab_content.tier3.label": null
70
  },
71
  "expected": {
72
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
75
  "model_output.classification.iab_content.tier3.label": "Laptops"
76
  },
77
  "id": "labtop-buying-maps-to-laptops",
78
+ "mismatches": [
79
+ {
80
+ "actual": null,
81
+ "expected": "Computing",
82
+ "path": "model_output.classification.iab_content.tier2.label"
83
+ },
84
+ {
85
+ "actual": null,
86
+ "expected": "Laptops",
87
+ "path": "model_output.classification.iab_content.tier3.label"
88
+ }
89
+ ],
90
  "notes": "Common typo handling should still land in the laptops branch.",
91
+ "pass": false,
92
  "status": "must_fix",
93
  "text": "Which labtop to buy in 2026"
94
  },
 
97
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
98
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
99
  "model_output.classification.iab_content.tier2.label": "Computing",
100
+ "model_output.classification.iab_content.tier3.label": null
101
  },
102
  "expected": {
103
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
106
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
107
  },
108
  "id": "crm-awareness-maps-to-sales",
109
+ "mismatches": [
110
+ {
111
+ "actual": null,
112
+ "expected": "Software and Applications",
113
+ "path": "model_output.classification.iab_content.tier3.label"
114
+ }
115
+ ],
116
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
117
+ "pass": false,
118
  "status": "must_fix",
119
  "text": "What is CRM software?"
120
  },
 
122
  "actual": {
123
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
124
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
125
+ "model_output.classification.iab_content.tier2.label": null,
126
+ "model_output.classification.iab_content.tier3.label": null
127
  },
128
  "expected": {
129
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
132
  "model_output.classification.iab_content.tier3.label": "Internet"
133
  },
134
  "id": "crm-comparison-maps-to-sales",
135
+ "mismatches": [
136
+ {
137
+ "actual": null,
138
+ "expected": "Computing",
139
+ "path": "model_output.classification.iab_content.tier2.label"
140
+ },
141
+ {
142
+ "actual": null,
143
+ "expected": "Internet",
144
+ "path": "model_output.classification.iab_content.tier3.label"
145
+ }
146
+ ],
147
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
148
+ "pass": false,
149
  "status": "must_fix",
150
  "text": "HubSpot vs Zoho for a small team"
151
  },
152
  {
153
  "actual": {
154
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
155
+ "model_output.classification.iab_content.tier1.label": "Careers",
156
+ "model_output.classification.iab_content.tier2.label": null,
157
+ "model_output.classification.iab_content.tier3.label": null
158
  },
159
  "expected": {
160
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
163
  "model_output.classification.iab_content.tier3.label": "Internet"
164
  },
165
  "id": "marketing-tools-map-to-marketing",
166
+ "mismatches": [
167
+ {
168
+ "actual": "Careers",
169
+ "expected": "Technology & Computing",
170
+ "path": "model_output.classification.iab_content.tier1.label"
171
+ },
172
+ {
173
+ "actual": null,
174
+ "expected": "Computing",
175
+ "path": "model_output.classification.iab_content.tier2.label"
176
+ },
177
+ {
178
+ "actual": null,
179
+ "expected": "Internet",
180
+ "path": "model_output.classification.iab_content.tier3.label"
181
+ }
182
+ ],
183
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
184
+ "pass": false,
185
  "status": "must_fix",
186
  "text": "Best AI SEO tools for content teams"
187
  },
188
  {
189
  "actual": {
190
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
191
+ "model_output.classification.iab_content.tier1.label": "Sports"
192
  },
193
  "expected": {
194
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
195
  "model_output.classification.iab_content.tier1.label": "Technology & Computing"
196
  },
197
  "id": "ml-explanation-maps-to-ai",
198
+ "mismatches": [
199
+ {
200
+ "actual": "Sports",
201
+ "expected": "Technology & Computing",
202
+ "path": "model_output.classification.iab_content.tier1.label"
203
+ }
204
+ ],
205
  "notes": "ML and NLP educational prompts should land in the AI branch.",
206
+ "pass": false,
207
  "status": "must_fix",
208
  "text": "What is intent classification in NLP?"
209
  },
 
212
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
213
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
214
  "model_output.classification.iab_content.tier2.label": "Computing",
215
+ "model_output.classification.iab_content.tier3.label": null
216
  },
217
  "expected": {
218
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
221
  "model_output.classification.iab_content.tier3.label": "Internet"
222
  },
223
  "id": "support-credential-help-maps-to-business-it",
224
+ "mismatches": [
225
+ {
226
+ "actual": null,
227
+ "expected": "Internet",
228
+ "path": "model_output.classification.iab_content.tier3.label"
229
+ }
230
+ ],
231
  "notes": "Credential and account help should map to business IT rather than generic business.",
232
+ "pass": false,
233
  "status": "must_fix",
234
  "text": "How do I reset my password?"
235
  },
236
  {
237
  "actual": {
238
+ "model_output.classification.iab_content.mapping_mode": "exact",
239
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
240
  "model_output.classification.iab_content.tier2.label": "Dining Out"
241
  },
 
245
  "model_output.classification.iab_content.tier2.label": "Dining Out"
246
  },
247
  "id": "restaurant-booking-maps-to-dining-out",
248
+ "mismatches": [
249
+ {
250
+ "actual": "exact",
251
+ "expected": "nearest_equivalent",
252
+ "path": "model_output.classification.iab_content.mapping_mode"
253
+ }
254
+ ],
255
  "notes": "Generic dining requests should not inherit the repo's business default.",
256
+ "pass": false,
257
  "status": "must_fix",
258
  "text": "Book a table for 2 tonight"
259
  },
260
  {
261
  "actual": {
262
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
263
+ "model_output.classification.iab_content.tier1.label": "Sports",
264
+ "model_output.classification.iab_content.tier2.label": null,
265
+ "model_output.classification.iab_content.tier3.label": null
266
  },
267
  "expected": {
268
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
271
  "model_output.classification.iab_content.tier3.label": "Freelance Writing"
272
  },
273
  "id": "trial-signup-maps-to-software",
274
+ "mismatches": [
275
+ {
276
+ "actual": "Sports",
277
+ "expected": "Hobbies & Interests",
278
+ "path": "model_output.classification.iab_content.tier1.label"
279
+ },
280
+ {
281
+ "actual": null,
282
+ "expected": "Content Production",
283
+ "path": "model_output.classification.iab_content.tier2.label"
284
+ },
285
+ {
286
+ "actual": null,
287
+ "expected": "Freelance Writing",
288
+ "path": "model_output.classification.iab_content.tier3.label"
289
+ }
290
+ ],
291
  "notes": "Software action queries should map to the software/application branch.",
292
+ "pass": false,
293
  "status": "must_fix",
294
  "text": "Start my free trial"
295
  },
296
  {
297
  "actual": {
298
+ "model_output.classification.iab_content.mapping_mode": "exact",
299
+ "model_output.classification.iab_content.tier1.label": "Careers",
300
+ "model_output.classification.iab_content.tier2.label": "Remote Working",
301
+ "model_output.classification.iab_content.tier3.label": null,
302
+ "model_output.classification.iab_content.tier4.label": null
303
  },
304
  "expected": {
305
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
309
  "model_output.classification.iab_content.tier4.label": "Communication"
310
  },
311
  "id": "communication-software-maps-to-tier4",
312
+ "mismatches": [
313
+ {
314
+ "actual": "Careers",
315
+ "expected": "Technology & Computing",
316
+ "path": "model_output.classification.iab_content.tier1.label"
317
+ },
318
+ {
319
+ "actual": "exact",
320
+ "expected": "nearest_equivalent",
321
+ "path": "model_output.classification.iab_content.mapping_mode"
322
+ },
323
+ {
324
+ "actual": "Remote Working",
325
+ "expected": "Computing",
326
+ "path": "model_output.classification.iab_content.tier2.label"
327
+ },
328
+ {
329
+ "actual": null,
330
+ "expected": "Software and Applications",
331
+ "path": "model_output.classification.iab_content.tier3.label"
332
+ },
333
+ {
334
+ "actual": null,
335
+ "expected": "Communication",
336
+ "path": "model_output.classification.iab_content.tier4.label"
337
+ }
338
+ ],
339
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
340
+ "pass": false,
341
  "status": "must_fix",
342
  "text": "best communication software for remote teams"
343
  },
344
  {
345
  "actual": {
346
+ "model_output.classification.iab_content.mapping_mode": "exact",
347
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
348
  },
349
  "expected": {
 
351
  "model_output.classification.iab_content.tier1.label": "Food & Drink"
352
  },
353
  "id": "vodka-query-maps-to-alcoholic-beverages",
354
+ "mismatches": [
355
+ {
356
+ "actual": "exact",
357
+ "expected": "nearest_equivalent",
358
+ "path": "model_output.classification.iab_content.mapping_mode"
359
+ }
360
+ ],
361
  "notes": "Food and beverage prompts should not fall through to the business default.",
362
+ "pass": false,
363
  "status": "must_fix",
364
  "text": "what is best vodka drink should i try"
365
  }
artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json CHANGED
@@ -1,93 +1,98 @@
1
  {
2
- "accepted_accuracy": 0.3444,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.3444,
5
  "count": 90,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.2667,
10
- "accepted_coverage": 1.0,
11
- "accuracy": 0.2667,
12
  "count": 30,
13
- "fallback_rate": 0.0,
14
- "macro_f1": 0.1633
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.3667,
18
- "accepted_coverage": 1.0,
19
- "accuracy": 0.3667,
20
  "count": 30,
21
- "fallback_rate": 0.0,
22
- "macro_f1": 0.2174
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.4,
26
- "accepted_coverage": 1.0,
27
- "accuracy": 0.4,
28
  "count": 30,
29
- "fallback_rate": 0.0,
30
- "macro_f1": 0.2667
31
  }
32
  },
33
- "fallback_rate": 0.0,
34
  "head": "iab_content",
35
- "macro_f1": 0.1808,
36
- "primary_source": "embedding_retrieval",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 2.5333,
40
  "error_buckets": {
41
- "exact_match": 31,
42
- "parent_safe_stop": 5,
43
- "right_tier1_wrong_tier2": 19,
44
- "wrong_deep_leaf": 13,
45
- "wrong_tier1": 22
46
  },
47
- "exact_path_accuracy": 0.3444,
48
- "parent_safe_accuracy": 0.4889,
49
- "tier1_accuracy": 0.7556,
50
- "tier2_accuracy": 0.5238,
51
- "tier3_accuracy": 0.4762,
52
- "tier4_accuracy": 1.0
53
  },
54
  "view_metrics": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "combined_path": {
56
- "average_prediction_depth": 2.5333,
57
  "error_buckets": {
58
- "exact_match": 27,
59
- "parent_safe_stop": 5,
60
- "right_tier1_wrong_tier2": 19,
61
- "wrong_deep_leaf": 17,
62
- "wrong_tier1": 22
63
  },
64
- "exact_path_accuracy": 0.3,
65
- "fallback_overuse_count": 12,
66
- "fallback_rate": 0.1333,
67
- "parent_safe_accuracy": 0.4444,
68
- "tier1_accuracy": 0.7556,
69
- "tier2_accuracy": 0.5238,
70
- "tier3_accuracy": 0.381,
71
- "tier4_accuracy": 0.5
72
  },
73
  "disagreements": {
74
- "retrieval_vs_combined": 0
75
  },
76
- "embedding_retrieval": {
77
- "average_prediction_depth": 2.5333,
78
- "error_buckets": {
79
- "exact_match": 27,
80
- "parent_safe_stop": 5,
81
- "right_tier1_wrong_tier2": 19,
82
- "wrong_deep_leaf": 17,
83
- "wrong_tier1": 22
84
- },
85
- "exact_path_accuracy": 0.3,
86
- "parent_safe_accuracy": 0.4444,
87
- "tier1_accuracy": 0.7556,
88
- "tier2_accuracy": 0.5238,
89
- "tier3_accuracy": 0.381,
90
- "tier4_accuracy": 0.5
91
  }
92
  }
93
  }
 
1
  {
2
+ "accepted_accuracy": 0.3,
3
+ "accepted_coverage": 0.8889,
4
+ "accuracy": 0.2667,
5
  "count": 90,
6
+ "dataset_path": "/content/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.28,
10
+ "accepted_coverage": 0.8333,
11
+ "accuracy": 0.2333,
12
  "count": 30,
13
+ "fallback_rate": 0.1667,
14
+ "macro_f1": 0.1556
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.3846,
18
+ "accepted_coverage": 0.8667,
19
+ "accuracy": 0.3333,
20
  "count": 30,
21
+ "fallback_rate": 0.1333,
22
+ "macro_f1": 0.2083
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.2414,
26
+ "accepted_coverage": 0.9667,
27
+ "accuracy": 0.2333,
28
  "count": 30,
29
+ "fallback_rate": 0.0333,
30
+ "macro_f1": 0.1458
31
  }
32
  },
33
+ "fallback_rate": 0.1111,
34
  "head": "iab_content",
35
+ "macro_f1": 0.1418,
36
+ "primary_source": "supervised_classifier",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 1.9556,
40
  "error_buckets": {
41
+ "exact_match": 24,
42
+ "parent_safe_stop": 10,
43
+ "right_tier1_wrong_tier2": 15,
44
+ "wrong_deep_leaf": 12,
45
+ "wrong_tier1": 29
46
  },
47
+ "exact_path_accuracy": 0.2667,
48
+ "parent_safe_accuracy": 0.4556,
49
+ "tier1_accuracy": 0.6778,
50
+ "tier2_accuracy": 0.4762,
51
+ "tier3_accuracy": 0.2143,
52
+ "tier4_accuracy": 0.0
53
  },
54
  "view_metrics": {
55
+ "classifier": {
56
+ "average_prediction_depth": 1.9556,
57
+ "error_buckets": {
58
+ "exact_match": 24,
59
+ "parent_safe_stop": 10,
60
+ "right_tier1_wrong_tier2": 15,
61
+ "wrong_deep_leaf": 12,
62
+ "wrong_tier1": 29
63
+ },
64
+ "exact_path_accuracy": 0.2667,
65
+ "parent_safe_accuracy": 0.4556,
66
+ "tier1_accuracy": 0.6778,
67
+ "tier2_accuracy": 0.4762,
68
+ "tier3_accuracy": 0.2143,
69
+ "tier4_accuracy": 0.0
70
+ },
71
  "combined_path": {
72
+ "average_prediction_depth": 1.9556,
73
  "error_buckets": {
74
+ "exact_match": 24,
75
+ "parent_safe_stop": 10,
76
+ "right_tier1_wrong_tier2": 15,
77
+ "wrong_deep_leaf": 12,
78
+ "wrong_tier1": 29
79
  },
80
+ "exact_path_accuracy": 0.2667,
81
+ "fallback_overuse_count": 17,
82
+ "fallback_rate": 0.1889,
83
+ "parent_safe_accuracy": 0.4556,
84
+ "tier1_accuracy": 0.6778,
85
+ "tier2_accuracy": 0.4762,
86
+ "tier3_accuracy": 0.2143,
87
+ "tier4_accuracy": 0.0
88
  },
89
  "disagreements": {
90
+ "classifier_vs_combined": 0
91
  },
92
+ "shadow_embedding_retrieval": {
93
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
94
+ "reason": "disabled_by_default",
95
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  }
98
  }
artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json CHANGED
@@ -1,93 +1,98 @@
1
  {
2
- "accepted_accuracy": 0.3782,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.3782,
5
  "count": 156,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.4038,
10
- "accepted_coverage": 1.0,
11
- "accuracy": 0.4038,
12
  "count": 52,
13
- "fallback_rate": 0.0,
14
- "macro_f1": 0.2171
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.3077,
18
- "accepted_coverage": 1.0,
19
- "accuracy": 0.3077,
20
  "count": 52,
21
- "fallback_rate": 0.0,
22
- "macro_f1": 0.1626
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.4231,
26
- "accepted_coverage": 1.0,
27
- "accuracy": 0.4231,
28
  "count": 52,
29
- "fallback_rate": 0.0,
30
- "macro_f1": 0.2265
31
  }
32
  },
33
- "fallback_rate": 0.0,
34
  "head": "iab_content",
35
- "macro_f1": 0.1593,
36
- "primary_source": "embedding_retrieval",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 2.5833,
40
  "error_buckets": {
41
- "exact_match": 59,
42
- "parent_safe_stop": 17,
43
- "right_tier1_wrong_tier2": 42,
44
- "wrong_deep_leaf": 13,
45
- "wrong_tier1": 25
46
  },
47
- "exact_path_accuracy": 0.3782,
48
- "parent_safe_accuracy": 0.6154,
49
- "tier1_accuracy": 0.8397,
50
- "tier2_accuracy": 0.5705,
51
- "tier3_accuracy": 0.5648,
52
- "tier4_accuracy": 0.5833
53
  },
54
  "view_metrics": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "combined_path": {
56
- "average_prediction_depth": 2.5833,
57
  "error_buckets": {
58
- "exact_match": 48,
59
- "parent_safe_stop": 17,
60
- "right_tier1_wrong_tier2": 42,
61
- "wrong_deep_leaf": 24,
62
- "wrong_tier1": 25
63
  },
64
- "exact_path_accuracy": 0.3077,
65
- "fallback_overuse_count": 11,
66
- "fallback_rate": 0.0705,
67
- "parent_safe_accuracy": 0.5449,
68
- "tier1_accuracy": 0.8397,
69
- "tier2_accuracy": 0.5705,
70
- "tier3_accuracy": 0.4352,
71
- "tier4_accuracy": 0.25
72
  },
73
  "disagreements": {
74
- "retrieval_vs_combined": 0
75
  },
76
- "embedding_retrieval": {
77
- "average_prediction_depth": 2.5833,
78
- "error_buckets": {
79
- "exact_match": 48,
80
- "parent_safe_stop": 17,
81
- "right_tier1_wrong_tier2": 42,
82
- "wrong_deep_leaf": 24,
83
- "wrong_tier1": 25
84
- },
85
- "exact_path_accuracy": 0.3077,
86
- "parent_safe_accuracy": 0.5449,
87
- "tier1_accuracy": 0.8397,
88
- "tier2_accuracy": 0.5705,
89
- "tier3_accuracy": 0.4352,
90
- "tier4_accuracy": 0.25
91
  }
92
  }
93
  }
 
1
  {
2
+ "accepted_accuracy": 0.4219,
3
+ "accepted_coverage": 0.8205,
4
+ "accuracy": 0.3462,
5
  "count": 156,
6
+ "dataset_path": "/content/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.4889,
10
+ "accepted_coverage": 0.8654,
11
+ "accuracy": 0.4231,
12
  "count": 52,
13
+ "fallback_rate": 0.1346,
14
+ "macro_f1": 0.2305
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.3846,
18
+ "accepted_coverage": 0.75,
19
+ "accuracy": 0.2885,
20
  "count": 52,
21
+ "fallback_rate": 0.25,
22
+ "macro_f1": 0.1638
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.3864,
26
+ "accepted_coverage": 0.8462,
27
+ "accuracy": 0.3269,
28
  "count": 52,
29
+ "fallback_rate": 0.1538,
30
+ "macro_f1": 0.1819
31
  }
32
  },
33
+ "fallback_rate": 0.1795,
34
  "head": "iab_content",
35
+ "macro_f1": 0.1478,
36
+ "primary_source": "supervised_classifier",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 2.0256,
40
  "error_buckets": {
41
+ "exact_match": 54,
42
+ "parent_safe_stop": 21,
43
+ "right_tier1_wrong_tier2": 37,
44
+ "wrong_deep_leaf": 3,
45
+ "wrong_tier1": 41
46
  },
47
+ "exact_path_accuracy": 0.3462,
48
+ "parent_safe_accuracy": 0.6603,
49
+ "tier1_accuracy": 0.7372,
50
+ "tier2_accuracy": 0.5,
51
+ "tier3_accuracy": 0.3519,
52
+ "tier4_accuracy": 0.2917
53
  },
54
  "view_metrics": {
55
+ "classifier": {
56
+ "average_prediction_depth": 2.0256,
57
+ "error_buckets": {
58
+ "exact_match": 49,
59
+ "parent_safe_stop": 21,
60
+ "right_tier1_wrong_tier2": 37,
61
+ "wrong_deep_leaf": 8,
62
+ "wrong_tier1": 41
63
+ },
64
+ "exact_path_accuracy": 0.3141,
65
+ "parent_safe_accuracy": 0.6282,
66
+ "tier1_accuracy": 0.7372,
67
+ "tier2_accuracy": 0.5,
68
+ "tier3_accuracy": 0.3056,
69
+ "tier4_accuracy": 0.2083
70
+ },
71
  "combined_path": {
72
+ "average_prediction_depth": 2.0256,
73
  "error_buckets": {
74
+ "exact_match": 49,
75
+ "parent_safe_stop": 21,
76
+ "right_tier1_wrong_tier2": 37,
77
+ "wrong_deep_leaf": 8,
78
+ "wrong_tier1": 41
79
  },
80
+ "exact_path_accuracy": 0.3141,
81
+ "fallback_overuse_count": 14,
82
+ "fallback_rate": 0.0897,
83
+ "parent_safe_accuracy": 0.6282,
84
+ "tier1_accuracy": 0.7372,
85
+ "tier2_accuracy": 0.5,
86
+ "tier3_accuracy": 0.3056,
87
+ "tier4_accuracy": 0.2083
88
  },
89
  "disagreements": {
90
+ "classifier_vs_combined": 0
91
  },
92
+ "shadow_embedding_retrieval": {
93
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
94
+ "reason": "disabled_by_default",
95
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  }
98
  }
artifacts/evaluation/latest/iab_content_extended_cases_report.json CHANGED
@@ -1,64 +1,69 @@
1
  {
2
- "accepted_accuracy": 0.25,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.25,
5
  "count": 8,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.1429,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.375,
14
  "error_buckets": {
15
- "exact_match": 2,
16
- "right_tier1_wrong_tier2": 3,
17
  "wrong_deep_leaf": 2,
18
  "wrong_tier1": 1
19
  },
20
- "exact_path_accuracy": 0.25,
21
  "parent_safe_accuracy": 0.375,
22
  "tier1_accuracy": 0.875,
23
- "tier2_accuracy": 0.4286,
24
- "tier3_accuracy": 1.0,
25
  "tier4_accuracy": 0.0
26
  },
27
  "view_metrics": {
28
- "combined_path": {
29
- "average_prediction_depth": 2.375,
30
  "error_buckets": {
31
- "exact_match": 2,
32
- "right_tier1_wrong_tier2": 3,
33
  "wrong_deep_leaf": 2,
34
  "wrong_tier1": 1
35
  },
36
- "exact_path_accuracy": 0.25,
37
- "fallback_overuse_count": 1,
38
- "fallback_rate": 0.125,
39
  "parent_safe_accuracy": 0.375,
40
  "tier1_accuracy": 0.875,
41
- "tier2_accuracy": 0.4286,
42
  "tier3_accuracy": 0.0,
43
  "tier4_accuracy": 0.0
44
  },
45
- "disagreements": {
46
- "retrieval_vs_combined": 0
47
- },
48
- "embedding_retrieval": {
49
- "average_prediction_depth": 2.375,
50
  "error_buckets": {
51
- "exact_match": 2,
52
- "right_tier1_wrong_tier2": 3,
53
  "wrong_deep_leaf": 2,
54
  "wrong_tier1": 1
55
  },
56
- "exact_path_accuracy": 0.25,
 
 
57
  "parent_safe_accuracy": 0.375,
58
  "tier1_accuracy": 0.875,
59
- "tier2_accuracy": 0.4286,
60
  "tier3_accuracy": 0.0,
61
  "tier4_accuracy": 0.0
 
 
 
 
 
 
 
 
62
  }
63
  }
64
  }
 
1
  {
2
+ "accepted_accuracy": 0.4286,
3
+ "accepted_coverage": 0.875,
4
+ "accuracy": 0.375,
5
  "count": 8,
6
+ "dataset_path": "/content/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
+ "fallback_rate": 0.125,
8
  "head": "iab_content",
9
+ "macro_f1": 0.2308,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.0,
14
  "error_buckets": {
15
+ "exact_match": 3,
16
+ "right_tier1_wrong_tier2": 2,
17
  "wrong_deep_leaf": 2,
18
  "wrong_tier1": 1
19
  },
20
+ "exact_path_accuracy": 0.375,
21
  "parent_safe_accuracy": 0.375,
22
  "tier1_accuracy": 0.875,
23
+ "tier2_accuracy": 0.5714,
24
+ "tier3_accuracy": 0.0,
25
  "tier4_accuracy": 0.0
26
  },
27
  "view_metrics": {
28
+ "classifier": {
29
+ "average_prediction_depth": 2.0,
30
  "error_buckets": {
31
+ "exact_match": 3,
32
+ "right_tier1_wrong_tier2": 2,
33
  "wrong_deep_leaf": 2,
34
  "wrong_tier1": 1
35
  },
36
+ "exact_path_accuracy": 0.375,
 
 
37
  "parent_safe_accuracy": 0.375,
38
  "tier1_accuracy": 0.875,
39
+ "tier2_accuracy": 0.5714,
40
  "tier3_accuracy": 0.0,
41
  "tier4_accuracy": 0.0
42
  },
43
+ "combined_path": {
44
+ "average_prediction_depth": 2.0,
 
 
 
45
  "error_buckets": {
46
+ "exact_match": 3,
47
+ "right_tier1_wrong_tier2": 2,
48
  "wrong_deep_leaf": 2,
49
  "wrong_tier1": 1
50
  },
51
+ "exact_path_accuracy": 0.375,
52
+ "fallback_overuse_count": 2,
53
+ "fallback_rate": 0.25,
54
  "parent_safe_accuracy": 0.375,
55
  "tier1_accuracy": 0.875,
56
+ "tier2_accuracy": 0.5714,
57
  "tier3_accuracy": 0.0,
58
  "tier4_accuracy": 0.0
59
+ },
60
+ "disagreements": {
61
+ "classifier_vs_combined": 0
62
+ },
63
+ "shadow_embedding_retrieval": {
64
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
65
+ "reason": "disabled_by_default",
66
+ "skipped": true
67
  }
68
  }
69
  }
artifacts/evaluation/latest/iab_content_hard_cases_report.json CHANGED
@@ -1,66 +1,66 @@
1
  {
2
- "accepted_accuracy": 0.25,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.25,
5
  "count": 8,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.1429,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.375,
14
  "error_buckets": {
15
- "exact_match": 2,
16
- "parent_safe_stop": 1,
17
- "right_tier1_wrong_tier2": 2,
18
- "wrong_tier1": 3
19
  },
20
- "exact_path_accuracy": 0.25,
21
  "parent_safe_accuracy": 0.5,
22
- "tier1_accuracy": 0.625,
23
  "tier2_accuracy": 0.375,
24
- "tier3_accuracy": 0.2,
25
- "tier4_accuracy": 1.0
26
  },
27
  "view_metrics": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "combined_path": {
29
- "average_prediction_depth": 2.375,
30
  "error_buckets": {
31
- "exact_match": 1,
32
- "parent_safe_stop": 1,
33
- "right_tier1_wrong_tier2": 2,
34
- "wrong_deep_leaf": 1,
35
- "wrong_tier1": 3
36
  },
37
- "exact_path_accuracy": 0.125,
38
  "fallback_overuse_count": 1,
39
  "fallback_rate": 0.125,
40
- "parent_safe_accuracy": 0.375,
41
- "tier1_accuracy": 0.625,
42
  "tier2_accuracy": 0.375,
43
- "tier3_accuracy": 0.0,
44
  "tier4_accuracy": 0.0
45
  },
46
  "disagreements": {
47
- "retrieval_vs_combined": 0
48
  },
49
- "embedding_retrieval": {
50
- "average_prediction_depth": 2.375,
51
- "error_buckets": {
52
- "exact_match": 1,
53
- "parent_safe_stop": 1,
54
- "right_tier1_wrong_tier2": 2,
55
- "wrong_deep_leaf": 1,
56
- "wrong_tier1": 3
57
- },
58
- "exact_path_accuracy": 0.125,
59
- "parent_safe_accuracy": 0.375,
60
- "tier1_accuracy": 0.625,
61
- "tier2_accuracy": 0.375,
62
- "tier3_accuracy": 0.0,
63
- "tier4_accuracy": 0.0
64
  }
65
  }
66
  }
 
1
  {
2
+ "accepted_accuracy": 0.5,
3
+ "accepted_coverage": 0.75,
4
+ "accuracy": 0.375,
5
  "count": 8,
6
+ "dataset_path": "/content/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
+ "fallback_rate": 0.25,
8
  "head": "iab_content",
9
+ "macro_f1": 0.25,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.0,
14
  "error_buckets": {
15
+ "exact_match": 3,
16
+ "right_tier1_wrong_tier2": 1,
17
+ "wrong_tier1": 4
 
18
  },
19
+ "exact_path_accuracy": 0.375,
20
  "parent_safe_accuracy": 0.5,
21
+ "tier1_accuracy": 0.5,
22
  "tier2_accuracy": 0.375,
23
+ "tier3_accuracy": 0.4,
24
+ "tier4_accuracy": 0.0
25
  },
26
  "view_metrics": {
27
+ "classifier": {
28
+ "average_prediction_depth": 2.0,
29
+ "error_buckets": {
30
+ "exact_match": 3,
31
+ "right_tier1_wrong_tier2": 1,
32
+ "wrong_tier1": 4
33
+ },
34
+ "exact_path_accuracy": 0.375,
35
+ "parent_safe_accuracy": 0.5,
36
+ "tier1_accuracy": 0.5,
37
+ "tier2_accuracy": 0.375,
38
+ "tier3_accuracy": 0.4,
39
+ "tier4_accuracy": 0.0
40
+ },
41
  "combined_path": {
42
+ "average_prediction_depth": 2.0,
43
  "error_buckets": {
44
+ "exact_match": 3,
45
+ "right_tier1_wrong_tier2": 1,
46
+ "wrong_tier1": 4
 
 
47
  },
48
+ "exact_path_accuracy": 0.375,
49
  "fallback_overuse_count": 1,
50
  "fallback_rate": 0.125,
51
+ "parent_safe_accuracy": 0.5,
52
+ "tier1_accuracy": 0.5,
53
  "tier2_accuracy": 0.375,
54
+ "tier3_accuracy": 0.4,
55
  "tier4_accuracy": 0.0
56
  },
57
  "disagreements": {
58
+ "classifier_vs_combined": 0
59
  },
60
+ "shadow_embedding_retrieval": {
61
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
62
+ "reason": "disabled_by_default",
63
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
64
  }
65
  }
66
  }
artifacts/evaluation/latest/iab_content_test_report.json CHANGED
@@ -1,31 +1,47 @@
1
  {
2
- "accepted_accuracy": 0.6527,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.6527,
5
  "count": 3282,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/test.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.6922,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "test",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.1889,
14
  "error_buckets": {
15
- "exact_match": 2142,
16
- "parent_safe_stop": 115,
17
- "right_tier1_wrong_tier2": 674,
18
- "wrong_deep_leaf": 236,
19
- "wrong_tier1": 115
20
  },
21
- "exact_path_accuracy": 0.6527,
22
- "parent_safe_accuracy": 0.7721,
23
- "tier1_accuracy": 0.965,
24
- "tier2_accuracy": 0.7587,
25
- "tier3_accuracy": 0.8041,
26
- "tier4_accuracy": 0.7929
27
  },
28
  "view_metrics": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "combined_path": {
30
  "count": 3282,
31
  "max_combined_rows": 500,
@@ -38,21 +54,10 @@
38
  "reason": "dataset_too_large_for_combined_view",
39
  "skipped": true
40
  },
41
- "embedding_retrieval": {
42
- "average_prediction_depth": 2.1889,
43
- "error_buckets": {
44
- "exact_match": 2107,
45
- "parent_safe_stop": 109,
46
- "right_tier1_wrong_tier2": 680,
47
- "wrong_deep_leaf": 271,
48
- "wrong_tier1": 115
49
- },
50
- "exact_path_accuracy": 0.642,
51
- "parent_safe_accuracy": 0.7596,
52
- "tier1_accuracy": 0.965,
53
- "tier2_accuracy": 0.7566,
54
- "tier3_accuracy": 0.7679,
55
- "tier4_accuracy": 0.6071
56
  }
57
  }
58
  }
 
1
  {
2
+ "accepted_accuracy": 0.9242,
3
+ "accepted_coverage": 0.9973,
4
+ "accuracy": 0.922,
5
  "count": 3282,
6
+ "dataset_path": "/content/agentic-intent-classifier/data/iab/test.jsonl",
7
+ "fallback_rate": 0.0027,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8741,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "test",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1789,
14
  "error_buckets": {
15
+ "exact_match": 3026,
16
+ "parent_safe_stop": 68,
17
+ "right_tier1_wrong_tier2": 59,
18
+ "wrong_deep_leaf": 96,
19
+ "wrong_tier1": 33
20
  },
21
+ "exact_path_accuracy": 0.922,
22
+ "parent_safe_accuracy": 0.9509,
23
+ "tier1_accuracy": 0.9899,
24
+ "tier2_accuracy": 0.9693,
25
+ "tier3_accuracy": 0.8477,
26
+ "tier4_accuracy": 0.5143
27
  },
28
  "view_metrics": {
29
+ "classifier": {
30
+ "average_prediction_depth": 2.1789,
31
+ "error_buckets": {
32
+ "exact_match": 2995,
33
+ "parent_safe_stop": 62,
34
+ "right_tier1_wrong_tier2": 71,
35
+ "wrong_deep_leaf": 121,
36
+ "wrong_tier1": 33
37
+ },
38
+ "exact_path_accuracy": 0.9126,
39
+ "parent_safe_accuracy": 0.9397,
40
+ "tier1_accuracy": 0.9899,
41
+ "tier2_accuracy": 0.9651,
42
+ "tier3_accuracy": 0.8218,
43
+ "tier4_accuracy": 0.3929
44
+ },
45
  "combined_path": {
46
  "count": 3282,
47
  "max_combined_rows": 500,
 
54
  "reason": "dataset_too_large_for_combined_view",
55
  "skipped": true
56
  },
57
+ "shadow_embedding_retrieval": {
58
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
59
+ "reason": "disabled_by_default",
60
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
artifacts/evaluation/latest/iab_content_train_report.json CHANGED
@@ -1,67 +1,63 @@
1
  {
2
- "accepted_accuracy": 0.8115,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.8115,
5
  "count": 13211,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/train.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.8293,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "train",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.2368,
14
  "error_buckets": {
15
- "exact_match": 10721,
16
- "parent_safe_stop": 346,
17
- "right_tier1_wrong_tier2": 812,
18
- "wrong_deep_leaf": 809,
19
- "wrong_tier1": 523
20
  },
21
- "exact_path_accuracy": 0.8115,
22
- "parent_safe_accuracy": 0.8753,
23
- "tier1_accuracy": 0.9604,
24
- "tier2_accuracy": 0.9208,
25
- "tier3_accuracy": 0.8788,
26
- "tier4_accuracy": 0.8732
27
  },
28
  "view_metrics": {
29
- "combined_path": {
30
- "average_prediction_depth": 2.2368,
31
  "error_buckets": {
32
- "exact_match": 10569,
33
- "parent_safe_stop": 338,
34
- "right_tier1_wrong_tier2": 834,
35
- "wrong_deep_leaf": 947,
36
- "wrong_tier1": 523
37
  },
38
- "exact_path_accuracy": 0.8,
39
- "fallback_overuse_count": 1123,
40
- "fallback_rate": 0.085,
41
- "parent_safe_accuracy": 0.8631,
42
- "tier1_accuracy": 0.9604,
43
- "tier2_accuracy": 0.9189,
44
- "tier3_accuracy": 0.843,
45
- "tier4_accuracy": 0.6589
 
 
 
 
46
  },
47
  "disagreements": {
48
- "retrieval_vs_combined": 0
 
 
 
49
  },
50
- "embedding_retrieval": {
51
- "average_prediction_depth": 2.2368,
52
- "error_buckets": {
53
- "exact_match": 10569,
54
- "parent_safe_stop": 338,
55
- "right_tier1_wrong_tier2": 834,
56
- "wrong_deep_leaf": 947,
57
- "wrong_tier1": 523
58
- },
59
- "exact_path_accuracy": 0.8,
60
- "parent_safe_accuracy": 0.8631,
61
- "tier1_accuracy": 0.9604,
62
- "tier2_accuracy": 0.9189,
63
- "tier3_accuracy": 0.843,
64
- "tier4_accuracy": 0.6589
65
  }
66
  }
67
  }
 
1
  {
2
+ "accepted_accuracy": 0.93,
3
+ "accepted_coverage": 0.9978,
4
+ "accuracy": 0.9282,
5
  "count": 13211,
6
+ "dataset_path": "/content/agentic-intent-classifier/data/iab/train.jsonl",
7
+ "fallback_rate": 0.0022,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8851,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "train",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.172,
14
  "error_buckets": {
15
+ "exact_match": 12263,
16
+ "parent_safe_stop": 259,
17
+ "right_tier1_wrong_tier2": 229,
18
+ "wrong_deep_leaf": 356,
19
+ "wrong_tier1": 104
20
  },
21
+ "exact_path_accuracy": 0.9282,
22
+ "parent_safe_accuracy": 0.9572,
23
+ "tier1_accuracy": 0.9921,
24
+ "tier2_accuracy": 0.9726,
25
+ "tier3_accuracy": 0.8565,
26
+ "tier4_accuracy": 0.5518
27
  },
28
  "view_metrics": {
29
+ "classifier": {
30
+ "average_prediction_depth": 2.172,
31
  "error_buckets": {
32
+ "exact_match": 12130,
33
+ "parent_safe_stop": 238,
34
+ "right_tier1_wrong_tier2": 277,
35
+ "wrong_deep_leaf": 462,
36
+ "wrong_tier1": 104
37
  },
38
+ "exact_path_accuracy": 0.9182,
39
+ "parent_safe_accuracy": 0.9456,
40
+ "tier1_accuracy": 0.9921,
41
+ "tier2_accuracy": 0.9685,
42
+ "tier3_accuracy": 0.829,
43
+ "tier4_accuracy": 0.4214
44
+ },
45
+ "combined_path": {
46
+ "count": 13211,
47
+ "max_combined_rows": 500,
48
+ "reason": "dataset_too_large_for_combined_view",
49
+ "skipped": true
50
  },
51
  "disagreements": {
52
+ "count": 13211,
53
+ "max_combined_rows": 500,
54
+ "reason": "dataset_too_large_for_combined_view",
55
+ "skipped": true
56
  },
57
+ "shadow_embedding_retrieval": {
58
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
59
+ "reason": "disabled_by_default",
60
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
artifacts/evaluation/latest/iab_content_val_report.json CHANGED
@@ -1,67 +1,63 @@
1
  {
2
- "accepted_accuracy": 0.6545,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.6545,
5
  "count": 3282,
6
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/iab/val.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.6957,
10
- "primary_source": "embedding_retrieval",
11
  "suite": "val",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.1813,
14
  "error_buckets": {
15
- "exact_match": 2148,
16
- "parent_safe_stop": 105,
17
- "right_tier1_wrong_tier2": 684,
18
- "wrong_deep_leaf": 234,
19
- "wrong_tier1": 111
20
  },
21
- "exact_path_accuracy": 0.6545,
22
- "parent_safe_accuracy": 0.7821,
23
- "tier1_accuracy": 0.9662,
24
- "tier2_accuracy": 0.7577,
25
- "tier3_accuracy": 0.8352,
26
- "tier4_accuracy": 0.7214
27
  },
28
  "view_metrics": {
29
- "combined_path": {
30
- "average_prediction_depth": 2.1813,
31
  "error_buckets": {
32
- "exact_match": 2116,
33
- "parent_safe_stop": 100,
34
- "right_tier1_wrong_tier2": 689,
35
- "wrong_deep_leaf": 266,
36
- "wrong_tier1": 111
37
  },
38
- "exact_path_accuracy": 0.6447,
39
- "fallback_overuse_count": 413,
40
- "fallback_rate": 0.1258,
41
- "parent_safe_accuracy": 0.7709,
42
- "tier1_accuracy": 0.9662,
43
- "tier2_accuracy": 0.756,
44
- "tier3_accuracy": 0.799,
45
- "tier4_accuracy": 0.55
 
 
 
 
46
  },
47
  "disagreements": {
48
- "retrieval_vs_combined": 0
 
 
 
49
  },
50
- "embedding_retrieval": {
51
- "average_prediction_depth": 2.1813,
52
- "error_buckets": {
53
- "exact_match": 2116,
54
- "parent_safe_stop": 100,
55
- "right_tier1_wrong_tier2": 689,
56
- "wrong_deep_leaf": 266,
57
- "wrong_tier1": 111
58
- },
59
- "exact_path_accuracy": 0.6447,
60
- "parent_safe_accuracy": 0.7709,
61
- "tier1_accuracy": 0.9662,
62
- "tier2_accuracy": 0.756,
63
- "tier3_accuracy": 0.799,
64
- "tier4_accuracy": 0.55
65
  }
66
  }
67
  }
 
1
  {
2
+ "accepted_accuracy": 0.9246,
3
+ "accepted_coverage": 0.9979,
4
+ "accuracy": 0.9229,
5
  "count": 3282,
6
+ "dataset_path": "/content/agentic-intent-classifier/data/iab/val.jsonl",
7
+ "fallback_rate": 0.0021,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8789,
10
+ "primary_source": "supervised_classifier",
11
  "suite": "val",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1789,
14
  "error_buckets": {
15
+ "exact_match": 3029,
16
+ "parent_safe_stop": 69,
17
+ "right_tier1_wrong_tier2": 67,
18
+ "wrong_deep_leaf": 91,
19
+ "wrong_tier1": 26
20
  },
21
+ "exact_path_accuracy": 0.9229,
22
+ "parent_safe_accuracy": 0.9549,
23
+ "tier1_accuracy": 0.9921,
24
+ "tier2_accuracy": 0.9686,
25
+ "tier3_accuracy": 0.8549,
26
+ "tier4_accuracy": 0.5286
27
  },
28
  "view_metrics": {
29
+ "classifier": {
30
+ "average_prediction_depth": 2.1789,
31
  "error_buckets": {
32
+ "exact_match": 2997,
33
+ "parent_safe_stop": 64,
34
+ "right_tier1_wrong_tier2": 79,
35
+ "wrong_deep_leaf": 116,
36
+ "wrong_tier1": 26
37
  },
38
+ "exact_path_accuracy": 0.9132,
39
+ "parent_safe_accuracy": 0.9436,
40
+ "tier1_accuracy": 0.9921,
41
+ "tier2_accuracy": 0.9644,
42
+ "tier3_accuracy": 0.829,
43
+ "tier4_accuracy": 0.4071
44
+ },
45
+ "combined_path": {
46
+ "count": 3282,
47
+ "max_combined_rows": 500,
48
+ "reason": "dataset_too_large_for_combined_view",
49
+ "skipped": true
50
  },
51
  "disagreements": {
52
+ "count": 3282,
53
+ "max_combined_rows": 500,
54
+ "reason": "dataset_too_large_for_combined_view",
55
+ "skipped": true
56
  },
57
+ "shadow_embedding_retrieval": {
58
+ "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
59
+ "reason": "disabled_by_default",
60
+ "skipped": true
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  }
63
  }
artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json CHANGED
The diff for this file is too large to render. See raw diff
 
artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 86,
5
- "passed": 4,
6
  "total": 90
7
  }
8
  },
9
- "cases_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
- "failed": 86,
12
- "passed": 4,
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
- "model_output.classification.iab_content.tier2.label": "Auto Type"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -25,7 +25,12 @@
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
- "actual": "Auto Type",
 
 
 
 
 
29
  "expected": "Auto Buying and Selling",
30
  "path": "model_output.classification.iab_content.tier2.label"
31
  }
@@ -88,7 +93,7 @@
88
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
89
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
90
  "model_output.classification.iab_content.tier2.label": "Computing",
91
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
92
  },
93
  "expected": {
94
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -114,7 +119,7 @@
114
  "path": "model_output.classification.iab_content.tier2.label"
115
  },
116
  {
117
- "actual": "Software and Applications",
118
  "expected": "Sales",
119
  "path": "model_output.classification.iab_content.tier3.label"
120
  }
@@ -128,8 +133,8 @@
128
  "actual": {
129
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
130
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
131
- "model_output.classification.iab_content.tier2.label": "Computing",
132
- "model_output.classification.iab_content.tier3.label": "Internet"
133
  },
134
  "expected": {
135
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -150,12 +155,12 @@
150
  "path": "model_output.classification.iab_content.mapping_mode"
151
  },
152
  {
153
- "actual": "Computing",
154
  "expected": "Business",
155
  "path": "model_output.classification.iab_content.tier2.label"
156
  },
157
  {
158
- "actual": "Internet",
159
  "expected": "Sales",
160
  "path": "model_output.classification.iab_content.tier3.label"
161
  }
@@ -200,7 +205,7 @@
200
  "actual": {
201
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
202
  "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
203
- "model_output.classification.iab_content.tier2.label": "Content Production",
204
  "model_output.classification.iab_content.tier3.label": null
205
  },
206
  "expected": {
@@ -222,7 +227,7 @@
222
  "path": "model_output.classification.iab_content.mapping_mode"
223
  },
224
  {
225
- "actual": "Content Production",
226
  "expected": "Business",
227
  "path": "model_output.classification.iab_content.tier2.label"
228
  },
@@ -240,8 +245,8 @@
240
  {
241
  "actual": {
242
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
243
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
244
- "model_output.classification.iab_content.tier2.label": "Online Piracy",
245
  "model_output.classification.iab_content.tier3.label": null
246
  },
247
  "expected": {
@@ -253,7 +258,7 @@
253
  "id": "marketing-tools-medium",
254
  "mismatches": [
255
  {
256
- "actual": "Sensitive Topics",
257
  "expected": "Business and Finance",
258
  "path": "model_output.classification.iab_content.tier1.label"
259
  },
@@ -263,7 +268,7 @@
263
  "path": "model_output.classification.iab_content.mapping_mode"
264
  },
265
  {
266
- "actual": "Online Piracy",
267
  "expected": "Business",
268
  "path": "model_output.classification.iab_content.tier2.label"
269
  },
@@ -281,9 +286,9 @@
281
  {
282
  "actual": {
283
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
284
- "model_output.classification.iab_content.tier1.label": "Genres",
285
- "model_output.classification.iab_content.tier2.label": "Talk Radio",
286
- "model_output.classification.iab_content.tier3.label": "Public Radio"
287
  },
288
  "expected": {
289
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -294,7 +299,7 @@
294
  "id": "marketing-tools-hard",
295
  "mismatches": [
296
  {
297
- "actual": "Genres",
298
  "expected": "Business and Finance",
299
  "path": "model_output.classification.iab_content.tier1.label"
300
  },
@@ -304,12 +309,12 @@
304
  "path": "model_output.classification.iab_content.mapping_mode"
305
  },
306
  {
307
- "actual": "Talk Radio",
308
  "expected": "Business",
309
  "path": "model_output.classification.iab_content.tier2.label"
310
  },
311
  {
312
- "actual": "Public Radio",
313
  "expected": "Marketing and Advertising",
314
  "path": "model_output.classification.iab_content.tier3.label"
315
  }
@@ -322,8 +327,8 @@
322
  {
323
  "actual": {
324
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
325
- "model_output.classification.iab_content.tier1.label": "Careers",
326
- "model_output.classification.iab_content.tier2.label": "Job Search",
327
  "model_output.classification.iab_content.tier3.label": null
328
  },
329
  "expected": {
@@ -335,7 +340,7 @@
335
  "id": "business-it-easy",
336
  "mismatches": [
337
  {
338
- "actual": "Careers",
339
  "expected": "Business and Finance",
340
  "path": "model_output.classification.iab_content.tier1.label"
341
  },
@@ -345,7 +350,7 @@
345
  "path": "model_output.classification.iab_content.mapping_mode"
346
  },
347
  {
348
- "actual": "Job Search",
349
  "expected": "Business",
350
  "path": "model_output.classification.iab_content.tier2.label"
351
  },
@@ -363,8 +368,8 @@
363
  {
364
  "actual": {
365
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
366
- "model_output.classification.iab_content.tier1.label": "Business and Finance",
367
- "model_output.classification.iab_content.tier2.label": "Business",
368
  "model_output.classification.iab_content.tier3.label": null
369
  },
370
  "expected": {
@@ -375,11 +380,21 @@
375
  },
376
  "id": "business-it-medium",
377
  "mismatches": [
 
 
 
 
 
378
  {
379
  "actual": "nearest_equivalent",
380
  "expected": "exact",
381
  "path": "model_output.classification.iab_content.mapping_mode"
382
  },
 
 
 
 
 
383
  {
384
  "actual": null,
385
  "expected": "Business I.T.",
@@ -393,10 +408,10 @@
393
  },
394
  {
395
  "actual": {
396
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
397
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
398
  "model_output.classification.iab_content.tier2.label": "Computing",
399
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
400
  },
401
  "expected": {
402
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -411,18 +426,13 @@
411
  "expected": "Business and Finance",
412
  "path": "model_output.classification.iab_content.tier1.label"
413
  },
414
- {
415
- "actual": "nearest_equivalent",
416
- "expected": "exact",
417
- "path": "model_output.classification.iab_content.mapping_mode"
418
- },
419
  {
420
  "actual": "Computing",
421
  "expected": "Business",
422
  "path": "model_output.classification.iab_content.tier2.label"
423
  },
424
  {
425
- "actual": "Software and Applications",
426
  "expected": "Business I.T.",
427
  "path": "model_output.classification.iab_content.tier3.label"
428
  }
@@ -434,9 +444,9 @@
434
  },
435
  {
436
  "actual": {
437
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
438
- "model_output.classification.iab_content.tier1.label": "Sports",
439
- "model_output.classification.iab_content.tier2.label": "Table Tennis"
440
  },
441
  "expected": {
442
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -444,33 +454,17 @@
444
  "model_output.classification.iab_content.tier2.label": "Dining Out"
445
  },
446
  "id": "dining-out-easy",
447
- "mismatches": [
448
- {
449
- "actual": "Sports",
450
- "expected": "Food & Drink",
451
- "path": "model_output.classification.iab_content.tier1.label"
452
- },
453
- {
454
- "actual": "nearest_equivalent",
455
- "expected": "exact",
456
- "path": "model_output.classification.iab_content.mapping_mode"
457
- },
458
- {
459
- "actual": "Table Tennis",
460
- "expected": "Dining Out",
461
- "path": "model_output.classification.iab_content.tier2.label"
462
- }
463
- ],
464
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Dining Out.",
465
- "pass": false,
466
  "status": "must_fix",
467
  "text": "Book a table for six tonight"
468
  },
469
  {
470
  "actual": {
471
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
472
  "model_output.classification.iab_content.tier1.label": "Attractions",
473
- "model_output.classification.iab_content.tier2.label": null
474
  },
475
  "expected": {
476
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -485,12 +479,7 @@
485
  "path": "model_output.classification.iab_content.tier1.label"
486
  },
487
  {
488
- "actual": "nearest_equivalent",
489
- "expected": "exact",
490
- "path": "model_output.classification.iab_content.mapping_mode"
491
- },
492
- {
493
- "actual": null,
494
  "expected": "Dining Out",
495
  "path": "model_output.classification.iab_content.tier2.label"
496
  }
@@ -502,9 +491,9 @@
502
  },
503
  {
504
  "actual": {
505
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
506
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
507
- "model_output.classification.iab_content.tier2.label": null
508
  },
509
  "expected": {
510
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -512,28 +501,17 @@
512
  "model_output.classification.iab_content.tier2.label": "Dining Out"
513
  },
514
  "id": "dining-out-hard",
515
- "mismatches": [
516
- {
517
- "actual": "nearest_equivalent",
518
- "expected": "exact",
519
- "path": "model_output.classification.iab_content.mapping_mode"
520
- },
521
- {
522
- "actual": null,
523
- "expected": "Dining Out",
524
- "path": "model_output.classification.iab_content.tier2.label"
525
- }
526
- ],
527
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
528
- "pass": false,
529
  "status": "must_fix",
530
  "text": "Need a place to eat tonight where I can make a reservation online"
531
  },
532
  {
533
  "actual": {
534
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
535
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
536
- "model_output.classification.iab_content.tier2.label": null
537
  },
538
  "expected": {
539
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -541,33 +519,17 @@
541
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
542
  },
543
  "id": "alcoholic-beverages-easy",
544
- "mismatches": [
545
- {
546
- "actual": "Style & Fashion",
547
- "expected": "Food & Drink",
548
- "path": "model_output.classification.iab_content.tier1.label"
549
- },
550
- {
551
- "actual": "nearest_equivalent",
552
- "expected": "exact",
553
- "path": "model_output.classification.iab_content.mapping_mode"
554
- },
555
- {
556
- "actual": null,
557
- "expected": "Alcoholic Beverages",
558
- "path": "model_output.classification.iab_content.tier2.label"
559
- }
560
- ],
561
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Alcoholic Beverages.",
562
- "pass": false,
563
  "status": "must_fix",
564
  "text": "Which whiskey cocktail should I order?"
565
  },
566
  {
567
  "actual": {
568
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
569
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
570
- "model_output.classification.iab_content.tier2.label": null
571
  },
572
  "expected": {
573
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -575,28 +537,17 @@
575
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
576
  },
577
  "id": "alcoholic-beverages-medium",
578
- "mismatches": [
579
- {
580
- "actual": "nearest_equivalent",
581
- "expected": "exact",
582
- "path": "model_output.classification.iab_content.mapping_mode"
583
- },
584
- {
585
- "actual": null,
586
- "expected": "Alcoholic Beverages",
587
- "path": "model_output.classification.iab_content.tier2.label"
588
- }
589
- ],
590
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Alcoholic Beverages.",
591
- "pass": false,
592
  "status": "must_fix",
593
  "text": "Best vodka drinks for beginners"
594
  },
595
  {
596
  "actual": {
597
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
598
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
599
- "model_output.classification.iab_content.tier2.label": null
600
  },
601
  "expected": {
602
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -604,27 +555,16 @@
604
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
605
  },
606
  "id": "alcoholic-beverages-hard",
607
- "mismatches": [
608
- {
609
- "actual": "nearest_equivalent",
610
- "expected": "exact",
611
- "path": "model_output.classification.iab_content.mapping_mode"
612
- },
613
- {
614
- "actual": null,
615
- "expected": "Alcoholic Beverages",
616
- "path": "model_output.classification.iab_content.tier2.label"
617
- }
618
- ],
619
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Alcoholic Beverages.",
620
- "pass": false,
621
  "status": "must_fix",
622
  "text": "Want a spirit-forward drink recommendation, not a restaurant suggestion"
623
  },
624
  {
625
  "actual": {
626
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
627
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
628
  "model_output.classification.iab_content.tier2.label": null
629
  },
630
  "expected": {
@@ -635,7 +575,7 @@
635
  "id": "artificial-intelligence-easy",
636
  "mismatches": [
637
  {
638
- "actual": "Sensitive Topics",
639
  "expected": "Technology & Computing",
640
  "path": "model_output.classification.iab_content.tier1.label"
641
  },
@@ -657,9 +597,9 @@
657
  },
658
  {
659
  "actual": {
660
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
661
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
662
- "model_output.classification.iab_content.tier2.label": null
663
  },
664
  "expected": {
665
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -669,17 +609,12 @@
669
  "id": "artificial-intelligence-medium",
670
  "mismatches": [
671
  {
672
- "actual": "Sensitive Topics",
673
  "expected": "Technology & Computing",
674
  "path": "model_output.classification.iab_content.tier1.label"
675
  },
676
  {
677
- "actual": "nearest_equivalent",
678
- "expected": "exact",
679
- "path": "model_output.classification.iab_content.mapping_mode"
680
- },
681
- {
682
- "actual": null,
683
  "expected": "Artificial Intelligence",
684
  "path": "model_output.classification.iab_content.tier2.label"
685
  }
@@ -691,7 +626,7 @@
691
  },
692
  {
693
  "actual": {
694
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
695
  "model_output.classification.iab_content.tier1.label": "Education",
696
  "model_output.classification.iab_content.tier2.label": "Language Learning"
697
  },
@@ -707,11 +642,6 @@
707
  "expected": "Technology & Computing",
708
  "path": "model_output.classification.iab_content.tier1.label"
709
  },
710
- {
711
- "actual": "nearest_equivalent",
712
- "expected": "exact",
713
- "path": "model_output.classification.iab_content.mapping_mode"
714
- },
715
  {
716
  "actual": "Language Learning",
717
  "expected": "Artificial Intelligence",
@@ -728,7 +658,7 @@
728
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
729
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
730
  "model_output.classification.iab_content.tier2.label": "Computing",
731
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
732
  },
733
  "expected": {
734
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -742,6 +672,11 @@
742
  "actual": "nearest_equivalent",
743
  "expected": "exact",
744
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
745
  }
746
  ],
747
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
@@ -754,7 +689,7 @@
754
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
755
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
756
  "model_output.classification.iab_content.tier2.label": "Computing",
757
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
758
  },
759
  "expected": {
760
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -768,6 +703,11 @@
768
  "actual": "nearest_equivalent",
769
  "expected": "exact",
770
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
771
  }
772
  ],
773
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
@@ -778,9 +718,9 @@
778
  {
779
  "actual": {
780
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
781
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
782
- "model_output.classification.iab_content.tier2.label": "Computing",
783
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
784
  },
785
  "expected": {
786
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -790,10 +730,25 @@
790
  },
791
  "id": "software-apps-hard",
792
  "mismatches": [
 
 
 
 
 
793
  {
794
  "actual": "nearest_equivalent",
795
  "expected": "exact",
796
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
797
  }
798
  ],
799
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
@@ -803,7 +758,7 @@
803
  },
804
  {
805
  "actual": {
806
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
807
  "model_output.classification.iab_content.tier1.label": "Careers",
808
  "model_output.classification.iab_content.tier2.label": "Remote Working",
809
  "model_output.classification.iab_content.tier3.label": null,
@@ -823,11 +778,6 @@
823
  "expected": "Technology & Computing",
824
  "path": "model_output.classification.iab_content.tier1.label"
825
  },
826
- {
827
- "actual": "nearest_equivalent",
828
- "expected": "exact",
829
- "path": "model_output.classification.iab_content.mapping_mode"
830
- },
831
  {
832
  "actual": "Remote Working",
833
  "expected": "Computing",
@@ -854,7 +804,7 @@
854
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
855
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
856
  "model_output.classification.iab_content.tier2.label": "Computing",
857
- "model_output.classification.iab_content.tier3.label": "Internet",
858
  "model_output.classification.iab_content.tier4.label": null
859
  },
860
  "expected": {
@@ -872,7 +822,7 @@
872
  "path": "model_output.classification.iab_content.mapping_mode"
873
  },
874
  {
875
- "actual": "Internet",
876
  "expected": "Software and Applications",
877
  "path": "model_output.classification.iab_content.tier3.label"
878
  },
@@ -891,7 +841,7 @@
891
  "actual": {
892
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
893
  "model_output.classification.iab_content.tier1.label": "Careers",
894
- "model_output.classification.iab_content.tier2.label": "Remote Working",
895
  "model_output.classification.iab_content.tier3.label": null,
896
  "model_output.classification.iab_content.tier4.label": null
897
  },
@@ -915,7 +865,7 @@
915
  "path": "model_output.classification.iab_content.mapping_mode"
916
  },
917
  {
918
- "actual": "Remote Working",
919
  "expected": "Computing",
920
  "path": "model_output.classification.iab_content.tier2.label"
921
  },
@@ -978,7 +928,7 @@
978
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
979
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
980
  "model_output.classification.iab_content.tier2.label": "Computing",
981
- "model_output.classification.iab_content.tier3.label": "Internet",
982
  "model_output.classification.iab_content.tier4.label": null
983
  },
984
  "expected": {
@@ -995,6 +945,11 @@
995
  "expected": "exact",
996
  "path": "model_output.classification.iab_content.mapping_mode"
997
  },
 
 
 
 
 
998
  {
999
  "actual": null,
1000
  "expected": "Web Hosting",
@@ -1011,7 +966,7 @@
1011
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1012
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1013
  "model_output.classification.iab_content.tier2.label": "Computing",
1014
- "model_output.classification.iab_content.tier3.label": "Internet",
1015
  "model_output.classification.iab_content.tier4.label": null
1016
  },
1017
  "expected": {
@@ -1030,9 +985,14 @@
1030
  },
1031
  {
1032
  "actual": null,
1033
- "expected": "Web Hosting",
1034
- "path": "model_output.classification.iab_content.tier4.label"
1035
- }
 
 
 
 
 
1036
  ],
1037
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
1038
  "pass": false,
@@ -1043,8 +1003,8 @@
1043
  "actual": {
1044
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1045
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1046
- "model_output.classification.iab_content.tier2.label": "Computing",
1047
- "model_output.classification.iab_content.tier3.label": "Laptops"
1048
  },
1049
  "expected": {
1050
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1058,6 +1018,16 @@
1058
  "actual": "nearest_equivalent",
1059
  "expected": "exact",
1060
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1061
  }
1062
  ],
1063
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
@@ -1067,7 +1037,7 @@
1067
  },
1068
  {
1069
  "actual": {
1070
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1071
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1072
  "model_output.classification.iab_content.tier2.label": "Computing",
1073
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -1079,15 +1049,9 @@
1079
  "model_output.classification.iab_content.tier3.label": "Laptops"
1080
  },
1081
  "id": "laptops-medium",
1082
- "mismatches": [
1083
- {
1084
- "actual": "nearest_equivalent",
1085
- "expected": "exact",
1086
- "path": "model_output.classification.iab_content.mapping_mode"
1087
- }
1088
- ],
1089
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Laptops.",
1090
- "pass": false,
1091
  "status": "must_fix",
1092
  "text": "Best laptop for work and study under 1200"
1093
  },
@@ -1096,7 +1060,7 @@
1096
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1097
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1098
  "model_output.classification.iab_content.tier2.label": "Computing",
1099
- "model_output.classification.iab_content.tier3.label": "Laptops"
1100
  },
1101
  "expected": {
1102
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1110,6 +1074,11 @@
1110
  "actual": "nearest_equivalent",
1111
  "expected": "exact",
1112
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1113
  }
1114
  ],
1115
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
@@ -1153,7 +1122,7 @@
1153
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1154
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1155
  "model_output.classification.iab_content.tier2.label": "Computing",
1156
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
1157
  },
1158
  "expected": {
1159
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1169,7 +1138,7 @@
1169
  "path": "model_output.classification.iab_content.mapping_mode"
1170
  },
1171
  {
1172
- "actual": "Software and Applications",
1173
  "expected": "Desktops",
1174
  "path": "model_output.classification.iab_content.tier3.label"
1175
  }
@@ -1181,10 +1150,10 @@
1181
  },
1182
  {
1183
  "actual": {
1184
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1185
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1186
  "model_output.classification.iab_content.tier2.label": "Computing",
1187
- "model_output.classification.iab_content.tier3.label": null
1188
  },
1189
  "expected": {
1190
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1193,29 +1162,18 @@
1193
  "model_output.classification.iab_content.tier3.label": "Desktops"
1194
  },
1195
  "id": "desktops-hard",
1196
- "mismatches": [
1197
- {
1198
- "actual": "nearest_equivalent",
1199
- "expected": "exact",
1200
- "path": "model_output.classification.iab_content.mapping_mode"
1201
- },
1202
- {
1203
- "actual": null,
1204
- "expected": "Desktops",
1205
- "path": "model_output.classification.iab_content.tier3.label"
1206
- }
1207
- ],
1208
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
1209
- "pass": false,
1210
  "status": "must_fix",
1211
  "text": "Need a desktop PC with strong performance for creative work"
1212
  },
1213
  {
1214
  "actual": {
1215
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1216
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1217
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1218
- "model_output.classification.iab_content.tier3.label": null
1219
  },
1220
  "expected": {
1221
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1226,12 +1184,7 @@
1226
  "id": "smartphones-easy",
1227
  "mismatches": [
1228
  {
1229
- "actual": "nearest_equivalent",
1230
- "expected": "exact",
1231
- "path": "model_output.classification.iab_content.mapping_mode"
1232
- },
1233
- {
1234
- "actual": null,
1235
  "expected": "Smartphones",
1236
  "path": "model_output.classification.iab_content.tier3.label"
1237
  }
@@ -1243,10 +1196,10 @@
1243
  },
1244
  {
1245
  "actual": {
1246
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1247
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1248
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1249
- "model_output.classification.iab_content.tier3.label": null
1250
  },
1251
  "expected": {
1252
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1257,12 +1210,7 @@
1257
  "id": "smartphones-medium",
1258
  "mismatches": [
1259
  {
1260
- "actual": "nearest_equivalent",
1261
- "expected": "exact",
1262
- "path": "model_output.classification.iab_content.mapping_mode"
1263
- },
1264
- {
1265
- "actual": null,
1266
  "expected": "Smartphones",
1267
  "path": "model_output.classification.iab_content.tier3.label"
1268
  }
@@ -1274,10 +1222,10 @@
1274
  },
1275
  {
1276
  "actual": {
1277
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1278
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1279
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1280
- "model_output.classification.iab_content.tier3.label": "Smartphones"
1281
  },
1282
  "expected": {
1283
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1288,9 +1236,9 @@
1288
  "id": "smartphones-hard",
1289
  "mismatches": [
1290
  {
1291
- "actual": "nearest_equivalent",
1292
- "expected": "exact",
1293
- "path": "model_output.classification.iab_content.mapping_mode"
1294
  }
1295
  ],
1296
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
@@ -1300,7 +1248,7 @@
1300
  },
1301
  {
1302
  "actual": {
1303
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1304
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1305
  },
1306
  "expected": {
@@ -1308,15 +1256,21 @@
1308
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1309
  },
1310
  "id": "style-fashion-parent-easy",
1311
- "mismatches": [],
 
 
 
 
 
 
1312
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion.",
1313
- "pass": true,
1314
  "status": "must_fix",
1315
  "text": "Best shoes under 100 dollars"
1316
  },
1317
  {
1318
  "actual": {
1319
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1320
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1321
  },
1322
  "expected": {
@@ -1324,15 +1278,21 @@
1324
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1325
  },
1326
  "id": "style-fashion-parent-medium",
1327
- "mismatches": [],
 
 
 
 
 
 
1328
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion.",
1329
- "pass": true,
1330
  "status": "must_fix",
1331
  "text": "Affordable fashion accessories for everyday wear"
1332
  },
1333
  {
1334
  "actual": {
1335
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1336
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1337
  },
1338
  "expected": {
@@ -1340,18 +1300,24 @@
1340
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1341
  },
1342
  "id": "style-fashion-parent-hard",
1343
- "mismatches": [],
 
 
 
 
 
 
1344
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion.",
1345
- "pass": true,
1346
  "status": "must_fix",
1347
  "text": "Need style recommendations for clothing and footwear without a specific brand in mind"
1348
  },
1349
  {
1350
  "actual": {
1351
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1352
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1353
- "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1354
- "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1355
  },
1356
  "expected": {
1357
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1361,10 +1327,25 @@
1361
  },
1362
  "id": "womens-shoes-easy",
1363
  "mismatches": [
 
 
 
 
 
1364
  {
1365
  "actual": "nearest_equivalent",
1366
  "expected": "exact",
1367
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1368
  }
1369
  ],
1370
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
@@ -1374,10 +1355,10 @@
1374
  },
1375
  {
1376
  "actual": {
1377
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1378
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1379
- "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1380
- "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1381
  },
1382
  "expected": {
1383
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1388,9 +1369,19 @@
1388
  "id": "womens-shoes-medium",
1389
  "mismatches": [
1390
  {
1391
- "actual": "nearest_equivalent",
1392
- "expected": "exact",
1393
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1394
  }
1395
  ],
1396
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
@@ -1400,7 +1391,7 @@
1400
  },
1401
  {
1402
  "actual": {
1403
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1404
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1405
  "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1406
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
@@ -1412,15 +1403,9 @@
1412
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1413
  },
1414
  "id": "womens-shoes-hard",
1415
- "mismatches": [
1416
- {
1417
- "actual": "nearest_equivalent",
1418
- "expected": "exact",
1419
- "path": "model_output.classification.iab_content.mapping_mode"
1420
- }
1421
- ],
1422
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
1423
- "pass": false,
1424
  "status": "must_fix",
1425
  "text": "Need women's footwear for commuting that looks polished but feels comfortable"
1426
  },
@@ -1428,8 +1413,8 @@
1428
  "actual": {
1429
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1430
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1431
- "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1432
- "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1433
  },
1434
  "expected": {
1435
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1443,6 +1428,16 @@
1443
  "actual": "nearest_equivalent",
1444
  "expected": "exact",
1445
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1446
  }
1447
  ],
1448
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
@@ -1452,7 +1447,7 @@
1452
  },
1453
  {
1454
  "actual": {
1455
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1456
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1457
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1458
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
@@ -1464,15 +1459,9 @@
1464
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1465
  },
1466
  "id": "mens-shoes-medium",
1467
- "mismatches": [
1468
- {
1469
- "actual": "nearest_equivalent",
1470
- "expected": "exact",
1471
- "path": "model_output.classification.iab_content.mapping_mode"
1472
- }
1473
- ],
1474
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
1475
- "pass": false,
1476
  "status": "must_fix",
1477
  "text": "Good men's dress shoes for office use"
1478
  },
@@ -1480,8 +1469,8 @@
1480
  "actual": {
1481
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1482
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1483
- "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1484
- "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1485
  },
1486
  "expected": {
1487
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1495,6 +1484,16 @@
1495
  "actual": "nearest_equivalent",
1496
  "expected": "exact",
1497
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1498
  }
1499
  ],
1500
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
@@ -1504,10 +1503,10 @@
1504
  },
1505
  {
1506
  "actual": {
1507
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1508
- "model_output.classification.iab_content.tier1.label": "Attractions",
1509
- "model_output.classification.iab_content.tier2.label": "Nightclubs",
1510
- "model_output.classification.iab_content.tier3.label": null
1511
  },
1512
  "expected": {
1513
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1516,39 +1515,18 @@
1516
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1517
  },
1518
  "id": "hotels-easy",
1519
- "mismatches": [
1520
- {
1521
- "actual": "Attractions",
1522
- "expected": "Travel",
1523
- "path": "model_output.classification.iab_content.tier1.label"
1524
- },
1525
- {
1526
- "actual": "nearest_equivalent",
1527
- "expected": "exact",
1528
- "path": "model_output.classification.iab_content.mapping_mode"
1529
- },
1530
- {
1531
- "actual": "Nightclubs",
1532
- "expected": "Travel Type",
1533
- "path": "model_output.classification.iab_content.tier2.label"
1534
- },
1535
- {
1536
- "actual": null,
1537
- "expected": "Hotels and Motels",
1538
- "path": "model_output.classification.iab_content.tier3.label"
1539
- }
1540
- ],
1541
  "notes": "Cross-vertical easy IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1542
- "pass": false,
1543
  "status": "must_fix",
1544
  "text": "Need a hotel in Chicago for two nights"
1545
  },
1546
  {
1547
  "actual": {
1548
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1549
  "model_output.classification.iab_content.tier1.label": "Travel",
1550
  "model_output.classification.iab_content.tier2.label": "Travel Type",
1551
- "model_output.classification.iab_content.tier3.label": null
1552
  },
1553
  "expected": {
1554
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1557,26 +1535,15 @@
1557
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1558
  },
1559
  "id": "hotels-medium",
1560
- "mismatches": [
1561
- {
1562
- "actual": "nearest_equivalent",
1563
- "expected": "exact",
1564
- "path": "model_output.classification.iab_content.mapping_mode"
1565
- },
1566
- {
1567
- "actual": null,
1568
- "expected": "Hotels and Motels",
1569
- "path": "model_output.classification.iab_content.tier3.label"
1570
- }
1571
- ],
1572
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1573
- "pass": false,
1574
  "status": "must_fix",
1575
  "text": "Best hotels near Times Square for a weekend trip"
1576
  },
1577
  {
1578
  "actual": {
1579
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1580
  "model_output.classification.iab_content.tier1.label": "Travel",
1581
  "model_output.classification.iab_content.tier2.label": null,
1582
  "model_output.classification.iab_content.tier3.label": null
@@ -1589,11 +1556,6 @@
1589
  },
1590
  "id": "hotels-hard",
1591
  "mismatches": [
1592
- {
1593
- "actual": "nearest_equivalent",
1594
- "expected": "exact",
1595
- "path": "model_output.classification.iab_content.mapping_mode"
1596
- },
1597
  {
1598
  "actual": null,
1599
  "expected": "Travel Type",
@@ -1612,7 +1574,7 @@
1612
  },
1613
  {
1614
  "actual": {
1615
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1616
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1617
  "model_output.classification.iab_content.tier2.label": "Apartments"
1618
  },
@@ -1623,11 +1585,6 @@
1623
  },
1624
  "id": "real-estate-rentals-easy",
1625
  "mismatches": [
1626
- {
1627
- "actual": "nearest_equivalent",
1628
- "expected": "exact",
1629
- "path": "model_output.classification.iab_content.mapping_mode"
1630
- },
1631
  {
1632
  "actual": "Apartments",
1633
  "expected": "Real Estate Renting and Leasing",
@@ -1641,7 +1598,7 @@
1641
  },
1642
  {
1643
  "actual": {
1644
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1645
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1646
  "model_output.classification.iab_content.tier2.label": "Apartments"
1647
  },
@@ -1652,6 +1609,11 @@
1652
  },
1653
  "id": "real-estate-rentals-medium",
1654
  "mismatches": [
 
 
 
 
 
1655
  {
1656
  "actual": "Apartments",
1657
  "expected": "Real Estate Renting and Leasing",
@@ -1665,9 +1627,9 @@
1665
  },
1666
  {
1667
  "actual": {
1668
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1669
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1670
- "model_output.classification.iab_content.tier2.label": null
1671
  },
1672
  "expected": {
1673
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1675,20 +1637,9 @@
1675
  "model_output.classification.iab_content.tier2.label": "Real Estate Renting and Leasing"
1676
  },
1677
  "id": "real-estate-rentals-hard",
1678
- "mismatches": [
1679
- {
1680
- "actual": "nearest_equivalent",
1681
- "expected": "exact",
1682
- "path": "model_output.classification.iab_content.mapping_mode"
1683
- },
1684
- {
1685
- "actual": null,
1686
- "expected": "Real Estate Renting and Leasing",
1687
- "path": "model_output.classification.iab_content.tier2.label"
1688
- }
1689
- ],
1690
  "notes": "Cross-vertical hard IAB mapping case for Real Estate > Real Estate Renting and Leasing.",
1691
- "pass": false,
1692
  "status": "must_fix",
1693
  "text": "Need rental listings for a short move, not home-buying advice"
1694
  },
@@ -1696,8 +1647,8 @@
1696
  "actual": {
1697
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1698
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1699
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1700
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1701
  },
1702
  "expected": {
1703
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1711,6 +1662,16 @@
1711
  "actual": "nearest_equivalent",
1712
  "expected": "exact",
1713
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1714
  }
1715
  ],
1716
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
@@ -1720,10 +1681,10 @@
1720
  },
1721
  {
1722
  "actual": {
1723
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1724
- "model_output.classification.iab_content.tier1.label": "Healthy Living",
1725
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1726
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1727
  },
1728
  "expected": {
1729
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1734,9 +1695,19 @@
1734
  "id": "running-and-jogging-medium",
1735
  "mismatches": [
1736
  {
1737
- "actual": "nearest_equivalent",
1738
- "expected": "exact",
1739
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1740
  }
1741
  ],
1742
  "notes": "Cross-vertical medium IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
@@ -1746,10 +1717,10 @@
1746
  },
1747
  {
1748
  "actual": {
1749
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1750
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1751
  "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1752
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1753
  },
1754
  "expected": {
1755
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1760,9 +1731,9 @@
1760
  "id": "running-and-jogging-hard",
1761
  "mismatches": [
1762
  {
1763
- "actual": "nearest_equivalent",
1764
- "expected": "exact",
1765
- "path": "model_output.classification.iab_content.mapping_mode"
1766
  }
1767
  ],
1768
  "notes": "Cross-vertical hard IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
@@ -1772,9 +1743,9 @@
1772
  },
1773
  {
1774
  "actual": {
1775
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1776
  "model_output.classification.iab_content.tier1.label": "Sports",
1777
- "model_output.classification.iab_content.tier2.label": "Australian Rules Football"
1778
  },
1779
  "expected": {
1780
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1782,28 +1753,17 @@
1782
  "model_output.classification.iab_content.tier2.label": "Soccer"
1783
  },
1784
  "id": "soccer-easy",
1785
- "mismatches": [
1786
- {
1787
- "actual": "nearest_equivalent",
1788
- "expected": "exact",
1789
- "path": "model_output.classification.iab_content.mapping_mode"
1790
- },
1791
- {
1792
- "actual": "Australian Rules Football",
1793
- "expected": "Soccer",
1794
- "path": "model_output.classification.iab_content.tier2.label"
1795
- }
1796
- ],
1797
  "notes": "Cross-vertical easy IAB mapping case for Sports > Soccer.",
1798
- "pass": false,
1799
  "status": "must_fix",
1800
  "text": "How do offside rules work in soccer?"
1801
  },
1802
  {
1803
  "actual": {
1804
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1805
  "model_output.classification.iab_content.tier1.label": "Sports",
1806
- "model_output.classification.iab_content.tier2.label": null
1807
  },
1808
  "expected": {
1809
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1811,28 +1771,17 @@
1811
  "model_output.classification.iab_content.tier2.label": "Soccer"
1812
  },
1813
  "id": "soccer-medium",
1814
- "mismatches": [
1815
- {
1816
- "actual": "nearest_equivalent",
1817
- "expected": "exact",
1818
- "path": "model_output.classification.iab_content.mapping_mode"
1819
- },
1820
- {
1821
- "actual": null,
1822
- "expected": "Soccer",
1823
- "path": "model_output.classification.iab_content.tier2.label"
1824
- }
1825
- ],
1826
  "notes": "Cross-vertical medium IAB mapping case for Sports > Soccer.",
1827
- "pass": false,
1828
  "status": "must_fix",
1829
  "text": "Best soccer drills for beginner players"
1830
  },
1831
  {
1832
  "actual": {
1833
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1834
  "model_output.classification.iab_content.tier1.label": "Sports",
1835
- "model_output.classification.iab_content.tier2.label": "Fantasy Sports"
1836
  },
1837
  "expected": {
1838
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1840,26 +1789,15 @@
1840
  "model_output.classification.iab_content.tier2.label": "Soccer"
1841
  },
1842
  "id": "soccer-hard",
1843
- "mismatches": [
1844
- {
1845
- "actual": "nearest_equivalent",
1846
- "expected": "exact",
1847
- "path": "model_output.classification.iab_content.mapping_mode"
1848
- },
1849
- {
1850
- "actual": "Fantasy Sports",
1851
- "expected": "Soccer",
1852
- "path": "model_output.classification.iab_content.tier2.label"
1853
- }
1854
- ],
1855
  "notes": "Cross-vertical hard IAB mapping case for Sports > Soccer.",
1856
- "pass": false,
1857
  "status": "must_fix",
1858
  "text": "Need help understanding football tactics for the Premier League, not fantasy sports"
1859
  },
1860
  {
1861
  "actual": {
1862
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1863
  "model_output.classification.iab_content.tier1.label": "Genres",
1864
  "model_output.classification.iab_content.tier2.label": "Fantasy"
1865
  },
@@ -1875,11 +1813,6 @@
1875
  "expected": "Books and Literature",
1876
  "path": "model_output.classification.iab_content.tier1.label"
1877
  },
1878
- {
1879
- "actual": "nearest_equivalent",
1880
- "expected": "exact",
1881
- "path": "model_output.classification.iab_content.mapping_mode"
1882
- },
1883
  {
1884
  "actual": "Fantasy",
1885
  "expected": "Fiction",
@@ -1893,7 +1826,7 @@
1893
  },
1894
  {
1895
  "actual": {
1896
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1897
  "model_output.classification.iab_content.tier1.label": "Books and Literature",
1898
  "model_output.classification.iab_content.tier2.label": "Fiction"
1899
  },
@@ -1903,17 +1836,23 @@
1903
  "model_output.classification.iab_content.tier2.label": "Fiction"
1904
  },
1905
  "id": "fiction-medium",
1906
- "mismatches": [],
 
 
 
 
 
 
1907
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
1908
- "pass": true,
1909
  "status": "must_fix",
1910
  "text": "Best fiction books for a long flight"
1911
  },
1912
  {
1913
  "actual": {
1914
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1915
  "model_output.classification.iab_content.tier1.label": "Books and Literature",
1916
- "model_output.classification.iab_content.tier2.label": "Comics and Graphic Novels"
1917
  },
1918
  "expected": {
1919
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1921,26 +1860,15 @@
1921
  "model_output.classification.iab_content.tier2.label": "Fiction"
1922
  },
1923
  "id": "fiction-hard",
1924
- "mismatches": [
1925
- {
1926
- "actual": "nearest_equivalent",
1927
- "expected": "exact",
1928
- "path": "model_output.classification.iab_content.mapping_mode"
1929
- },
1930
- {
1931
- "actual": "Comics and Graphic Novels",
1932
- "expected": "Fiction",
1933
- "path": "model_output.classification.iab_content.tier2.label"
1934
- }
1935
- ],
1936
  "notes": "Cross-vertical hard IAB mapping case for Books and Literature > Fiction.",
1937
- "pass": false,
1938
  "status": "must_fix",
1939
  "text": "Looking for a character-driven novel, not comics or poetry"
1940
  },
1941
  {
1942
  "actual": {
1943
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1944
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1945
  "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1946
  },
@@ -1951,11 +1879,6 @@
1951
  },
1952
  "id": "home-improvement-easy",
1953
  "mismatches": [
1954
- {
1955
- "actual": "nearest_equivalent",
1956
- "expected": "exact",
1957
- "path": "model_output.classification.iab_content.mapping_mode"
1958
- },
1959
  {
1960
  "actual": "Remodeling & Construction",
1961
  "expected": "Home Improvement",
@@ -1970,8 +1893,8 @@
1970
  {
1971
  "actual": {
1972
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1973
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1974
- "model_output.classification.iab_content.tier2.label": "Personal Care"
1975
  },
1976
  "expected": {
1977
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1980,18 +1903,13 @@
1980
  },
1981
  "id": "home-improvement-medium",
1982
  "mismatches": [
1983
- {
1984
- "actual": "Style & Fashion",
1985
- "expected": "Home & Garden",
1986
- "path": "model_output.classification.iab_content.tier1.label"
1987
- },
1988
  {
1989
  "actual": "nearest_equivalent",
1990
  "expected": "exact",
1991
  "path": "model_output.classification.iab_content.mapping_mode"
1992
  },
1993
  {
1994
- "actual": "Personal Care",
1995
  "expected": "Home Improvement",
1996
  "path": "model_output.classification.iab_content.tier2.label"
1997
  }
@@ -2003,9 +1921,9 @@
2003
  },
2004
  {
2005
  "actual": {
2006
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2007
- "model_output.classification.iab_content.tier1.label": "Home & Garden",
2008
- "model_output.classification.iab_content.tier2.label": "Interior Decorating"
2009
  },
2010
  "expected": {
2011
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2015,12 +1933,12 @@
2015
  "id": "home-improvement-hard",
2016
  "mismatches": [
2017
  {
2018
- "actual": "nearest_equivalent",
2019
- "expected": "exact",
2020
- "path": "model_output.classification.iab_content.mapping_mode"
2021
  },
2022
  {
2023
- "actual": "Interior Decorating",
2024
  "expected": "Home Improvement",
2025
  "path": "model_output.classification.iab_content.tier2.label"
2026
  }
@@ -2033,8 +1951,8 @@
2033
  {
2034
  "actual": {
2035
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2036
- "model_output.classification.iab_content.tier1.label": "Education",
2037
- "model_output.classification.iab_content.tier2.label": "Online Education"
2038
  },
2039
  "expected": {
2040
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2043,10 +1961,20 @@
2043
  },
2044
  "id": "online-education-easy",
2045
  "mismatches": [
 
 
 
 
 
2046
  {
2047
  "actual": "nearest_equivalent",
2048
  "expected": "exact",
2049
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
2050
  }
2051
  ],
2052
  "notes": "Cross-vertical easy IAB mapping case for Education > Online Education.",
@@ -2056,7 +1984,7 @@
2056
  },
2057
  {
2058
  "actual": {
2059
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2060
  "model_output.classification.iab_content.tier1.label": "Careers",
2061
  "model_output.classification.iab_content.tier2.label": "Remote Working"
2062
  },
@@ -2072,11 +2000,6 @@
2072
  "expected": "Education",
2073
  "path": "model_output.classification.iab_content.tier1.label"
2074
  },
2075
- {
2076
- "actual": "nearest_equivalent",
2077
- "expected": "exact",
2078
- "path": "model_output.classification.iab_content.mapping_mode"
2079
- },
2080
  {
2081
  "actual": "Remote Working",
2082
  "expected": "Online Education",
@@ -2091,8 +2014,8 @@
2091
  {
2092
  "actual": {
2093
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2094
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
2095
- "model_output.classification.iab_content.tier2.label": "Computing"
2096
  },
2097
  "expected": {
2098
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2102,7 +2025,7 @@
2102
  "id": "online-education-hard",
2103
  "mismatches": [
2104
  {
2105
- "actual": "Technology & Computing",
2106
  "expected": "Education",
2107
  "path": "model_output.classification.iab_content.tier1.label"
2108
  },
@@ -2112,7 +2035,7 @@
2112
  "path": "model_output.classification.iab_content.mapping_mode"
2113
  },
2114
  {
2115
- "actual": "Computing",
2116
  "expected": "Online Education",
2117
  "path": "model_output.classification.iab_content.tier2.label"
2118
  }
@@ -2124,7 +2047,7 @@
2124
  },
2125
  {
2126
  "actual": {
2127
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2128
  "model_output.classification.iab_content.tier1.label": "Education",
2129
  "model_output.classification.iab_content.tier2.label": "College Education",
2130
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
@@ -2136,21 +2059,15 @@
2136
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2137
  },
2138
  "id": "postgraduate-education-easy",
2139
- "mismatches": [
2140
- {
2141
- "actual": "nearest_equivalent",
2142
- "expected": "exact",
2143
- "path": "model_output.classification.iab_content.mapping_mode"
2144
- }
2145
- ],
2146
  "notes": "Cross-vertical easy IAB mapping case for Education > College Education > Postgraduate Education.",
2147
- "pass": false,
2148
  "status": "must_fix",
2149
  "text": "best universities to study masters"
2150
  },
2151
  {
2152
  "actual": {
2153
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2154
  "model_output.classification.iab_content.tier1.label": "Education",
2155
  "model_output.classification.iab_content.tier2.label": "College Education",
2156
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
@@ -2162,21 +2079,15 @@
2162
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2163
  },
2164
  "id": "postgraduate-education-medium",
2165
- "mismatches": [
2166
- {
2167
- "actual": "nearest_equivalent",
2168
- "expected": "exact",
2169
- "path": "model_output.classification.iab_content.mapping_mode"
2170
- }
2171
- ],
2172
  "notes": "Cross-vertical medium IAB mapping case for Education > College Education > Postgraduate Education.",
2173
- "pass": false,
2174
  "status": "must_fix",
2175
  "text": "which graduate schools have strong data science programs"
2176
  },
2177
  {
2178
  "actual": {
2179
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2180
  "model_output.classification.iab_content.tier1.label": "Education",
2181
  "model_output.classification.iab_content.tier2.label": "College Education",
2182
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
@@ -2188,15 +2099,9 @@
2188
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2189
  },
2190
  "id": "postgraduate-education-hard",
2191
- "mismatches": [
2192
- {
2193
- "actual": "nearest_equivalent",
2194
- "expected": "exact",
2195
- "path": "model_output.classification.iab_content.mapping_mode"
2196
- }
2197
- ],
2198
  "notes": "Cross-vertical hard IAB mapping case for Education > College Education > Postgraduate Education.",
2199
- "pass": false,
2200
  "status": "must_fix",
2201
  "text": "need postgraduate options for a master's degree, not short online courses"
2202
  },
@@ -2224,7 +2129,7 @@
2224
  },
2225
  {
2226
  "actual": {
2227
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2228
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2229
  },
2230
  "expected": {
@@ -2232,21 +2137,15 @@
2232
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2233
  },
2234
  "id": "medical-health-medium",
2235
- "mismatches": [
2236
- {
2237
- "actual": "nearest_equivalent",
2238
- "expected": "exact",
2239
- "path": "model_output.classification.iab_content.mapping_mode"
2240
- }
2241
- ],
2242
  "notes": "Cross-vertical medium IAB mapping case for Medical Health.",
2243
- "pass": false,
2244
  "status": "must_fix",
2245
  "text": "when should i see a doctor for persistent knee pain"
2246
  },
2247
  {
2248
  "actual": {
2249
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2250
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2251
  },
2252
  "expected": {
@@ -2254,21 +2153,15 @@
2254
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2255
  },
2256
  "id": "medical-health-hard",
2257
- "mismatches": [
2258
- {
2259
- "actual": "nearest_equivalent",
2260
- "expected": "exact",
2261
- "path": "model_output.classification.iab_content.mapping_mode"
2262
- }
2263
- ],
2264
  "notes": "Cross-vertical hard IAB mapping case for Medical Health.",
2265
- "pass": false,
2266
  "status": "must_fix",
2267
  "text": "need medical advice about symptoms, not wellness or fitness tips"
2268
  },
2269
  {
2270
  "actual": {
2271
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2272
  "model_output.classification.iab_content.tier1.label": "Careers",
2273
  "model_output.classification.iab_content.tier2.label": "Remote Working"
2274
  },
@@ -2279,11 +2172,6 @@
2279
  },
2280
  "id": "careers-job-search-easy",
2281
  "mismatches": [
2282
- {
2283
- "actual": "nearest_equivalent",
2284
- "expected": "exact",
2285
- "path": "model_output.classification.iab_content.mapping_mode"
2286
- },
2287
  {
2288
  "actual": "Remote Working",
2289
  "expected": "Job Search",
@@ -2299,7 +2187,7 @@
2299
  "actual": {
2300
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2301
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
2302
- "model_output.classification.iab_content.tier2.label": "Industries"
2303
  },
2304
  "expected": {
2305
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2319,7 +2207,7 @@
2319
  "path": "model_output.classification.iab_content.mapping_mode"
2320
  },
2321
  {
2322
- "actual": "Industries",
2323
  "expected": "Job Search",
2324
  "path": "model_output.classification.iab_content.tier2.label"
2325
  }
@@ -2332,8 +2220,8 @@
2332
  {
2333
  "actual": {
2334
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2335
- "model_output.classification.iab_content.tier1.label": "Business and Finance",
2336
- "model_output.classification.iab_content.tier2.label": "Industries"
2337
  },
2338
  "expected": {
2339
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2343,7 +2231,7 @@
2343
  "id": "careers-job-search-hard",
2344
  "mismatches": [
2345
  {
2346
- "actual": "Business and Finance",
2347
  "expected": "Careers",
2348
  "path": "model_output.classification.iab_content.tier1.label"
2349
  },
@@ -2353,7 +2241,7 @@
2353
  "path": "model_output.classification.iab_content.mapping_mode"
2354
  },
2355
  {
2356
- "actual": "Industries",
2357
  "expected": "Job Search",
2358
  "path": "model_output.classification.iab_content.tier2.label"
2359
  }
@@ -2366,7 +2254,7 @@
2366
  {
2367
  "actual": {
2368
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2369
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
2370
  "model_output.classification.iab_content.tier2.label": null
2371
  },
2372
  "expected": {
@@ -2376,6 +2264,11 @@
2376
  },
2377
  "id": "personal-finance-easy",
2378
  "mismatches": [
 
 
 
 
 
2379
  {
2380
  "actual": "nearest_equivalent",
2381
  "expected": "exact",
@@ -2394,7 +2287,7 @@
2394
  },
2395
  {
2396
  "actual": {
2397
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2398
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2399
  "model_output.classification.iab_content.tier2.label": null
2400
  },
@@ -2405,11 +2298,6 @@
2405
  },
2406
  "id": "personal-finance-medium",
2407
  "mismatches": [
2408
- {
2409
- "actual": "nearest_equivalent",
2410
- "expected": "exact",
2411
- "path": "model_output.classification.iab_content.mapping_mode"
2412
- },
2413
  {
2414
  "actual": null,
2415
  "expected": "Financial Planning",
@@ -2423,7 +2311,7 @@
2423
  },
2424
  {
2425
  "actual": {
2426
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2427
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2428
  "model_output.classification.iab_content.tier2.label": "Retirement Planning"
2429
  },
@@ -2434,11 +2322,6 @@
2434
  },
2435
  "id": "personal-finance-hard",
2436
  "mismatches": [
2437
- {
2438
- "actual": "nearest_equivalent",
2439
- "expected": "exact",
2440
- "path": "model_output.classification.iab_content.mapping_mode"
2441
- },
2442
  {
2443
  "actual": "Retirement Planning",
2444
  "expected": "Financial Planning",
@@ -2452,7 +2335,7 @@
2452
  },
2453
  {
2454
  "actual": {
2455
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2456
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2457
  "model_output.classification.iab_content.tier2.label": "Parenting"
2458
  },
@@ -2462,23 +2345,17 @@
2462
  "model_output.classification.iab_content.tier2.label": "Parenting"
2463
  },
2464
  "id": "parenting-easy",
2465
- "mismatches": [
2466
- {
2467
- "actual": "nearest_equivalent",
2468
- "expected": "exact",
2469
- "path": "model_output.classification.iab_content.mapping_mode"
2470
- }
2471
- ],
2472
  "notes": "Cross-vertical easy IAB mapping case for Family and Relationships > Parenting.",
2473
- "pass": false,
2474
  "status": "must_fix",
2475
  "text": "tips for parenting a toddler"
2476
  },
2477
  {
2478
  "actual": {
2479
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2480
- "model_output.classification.iab_content.tier1.label": "Education",
2481
- "model_output.classification.iab_content.tier2.label": "Online Education"
2482
  },
2483
  "expected": {
2484
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2488,17 +2365,12 @@
2488
  "id": "parenting-medium",
2489
  "mismatches": [
2490
  {
2491
- "actual": "Education",
2492
  "expected": "Family and Relationships",
2493
  "path": "model_output.classification.iab_content.tier1.label"
2494
  },
2495
  {
2496
- "actual": "nearest_equivalent",
2497
- "expected": "exact",
2498
- "path": "model_output.classification.iab_content.mapping_mode"
2499
- },
2500
- {
2501
- "actual": "Online Education",
2502
  "expected": "Parenting",
2503
  "path": "model_output.classification.iab_content.tier2.label"
2504
  }
@@ -2510,7 +2382,7 @@
2510
  },
2511
  {
2512
  "actual": {
2513
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2514
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2515
  "model_output.classification.iab_content.tier2.label": "Parenting"
2516
  },
@@ -2520,21 +2392,15 @@
2520
  "model_output.classification.iab_content.tier2.label": "Parenting"
2521
  },
2522
  "id": "parenting-hard",
2523
- "mismatches": [
2524
- {
2525
- "actual": "nearest_equivalent",
2526
- "expected": "exact",
2527
- "path": "model_output.classification.iab_content.mapping_mode"
2528
- }
2529
- ],
2530
  "notes": "Cross-vertical hard IAB mapping case for Family and Relationships > Parenting.",
2531
- "pass": false,
2532
  "status": "must_fix",
2533
  "text": "need parenting advice for a child starting preschool"
2534
  },
2535
  {
2536
  "actual": {
2537
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2538
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2539
  "model_output.classification.iab_content.tier2.label": "Gardening"
2540
  },
@@ -2544,22 +2410,16 @@
2544
  "model_output.classification.iab_content.tier2.label": "Gardening"
2545
  },
2546
  "id": "gardening-easy",
2547
- "mismatches": [
2548
- {
2549
- "actual": "nearest_equivalent",
2550
- "expected": "exact",
2551
- "path": "model_output.classification.iab_content.mapping_mode"
2552
- }
2553
- ],
2554
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Gardening.",
2555
- "pass": false,
2556
  "status": "must_fix",
2557
  "text": "best plants for a small balcony garden"
2558
  },
2559
  {
2560
  "actual": {
2561
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2562
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
2563
  "model_output.classification.iab_content.tier2.label": null
2564
  },
2565
  "expected": {
@@ -2570,7 +2430,7 @@
2570
  "id": "gardening-medium",
2571
  "mismatches": [
2572
  {
2573
- "actual": "Personal Finance",
2574
  "expected": "Home & Garden",
2575
  "path": "model_output.classification.iab_content.tier1.label"
2576
  },
@@ -2592,9 +2452,9 @@
2592
  },
2593
  {
2594
  "actual": {
2595
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2596
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2597
- "model_output.classification.iab_content.tier2.label": null
2598
  },
2599
  "expected": {
2600
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2602,28 +2462,17 @@
2602
  "model_output.classification.iab_content.tier2.label": "Gardening"
2603
  },
2604
  "id": "gardening-hard",
2605
- "mismatches": [
2606
- {
2607
- "actual": "nearest_equivalent",
2608
- "expected": "exact",
2609
- "path": "model_output.classification.iab_content.mapping_mode"
2610
- },
2611
- {
2612
- "actual": null,
2613
- "expected": "Gardening",
2614
- "path": "model_output.classification.iab_content.tier2.label"
2615
- }
2616
- ],
2617
  "notes": "Cross-vertical hard IAB mapping case for Home & Garden > Gardening.",
2618
- "pass": false,
2619
  "status": "must_fix",
2620
  "text": "need gardening advice for a shady backyard, not interior decor ideas"
2621
  },
2622
  {
2623
  "actual": {
2624
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2625
- "model_output.classification.iab_content.tier1.label": "Genres",
2626
- "model_output.classification.iab_content.tier2.label": null
2627
  },
2628
  "expected": {
2629
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2631,31 +2480,15 @@
2631
  "model_output.classification.iab_content.tier2.label": "Movies"
2632
  },
2633
  "id": "movies-easy",
2634
- "mismatches": [
2635
- {
2636
- "actual": "Genres",
2637
- "expected": "Entertainment",
2638
- "path": "model_output.classification.iab_content.tier1.label"
2639
- },
2640
- {
2641
- "actual": "nearest_equivalent",
2642
- "expected": "exact",
2643
- "path": "model_output.classification.iab_content.mapping_mode"
2644
- },
2645
- {
2646
- "actual": null,
2647
- "expected": "Movies",
2648
- "path": "model_output.classification.iab_content.tier2.label"
2649
- }
2650
- ],
2651
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
2652
- "pass": false,
2653
  "status": "must_fix",
2654
  "text": "What movie should we watch tonight?"
2655
  },
2656
  {
2657
  "actual": {
2658
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2659
  "model_output.classification.iab_content.tier1.label": "Genres",
2660
  "model_output.classification.iab_content.tier2.label": "Horror"
2661
  },
@@ -2671,11 +2504,6 @@
2671
  "expected": "Entertainment",
2672
  "path": "model_output.classification.iab_content.tier1.label"
2673
  },
2674
- {
2675
- "actual": "nearest_equivalent",
2676
- "expected": "exact",
2677
- "path": "model_output.classification.iab_content.mapping_mode"
2678
- },
2679
  {
2680
  "actual": "Horror",
2681
  "expected": "Movies",
@@ -2689,9 +2517,9 @@
2689
  },
2690
  {
2691
  "actual": {
2692
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2693
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2694
- "model_output.classification.iab_content.tier2.label": "Music"
2695
  },
2696
  "expected": {
2697
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2699,20 +2527,9 @@
2699
  "model_output.classification.iab_content.tier2.label": "Movies"
2700
  },
2701
  "id": "movies-hard",
2702
- "mismatches": [
2703
- {
2704
- "actual": "nearest_equivalent",
2705
- "expected": "exact",
2706
- "path": "model_output.classification.iab_content.mapping_mode"
2707
- },
2708
- {
2709
- "actual": "Music",
2710
- "expected": "Movies",
2711
- "path": "model_output.classification.iab_content.tier2.label"
2712
- }
2713
- ],
2714
  "notes": "Cross-vertical hard IAB mapping case for Entertainment > Movies.",
2715
- "pass": false,
2716
  "status": "must_fix",
2717
  "text": "Looking for film recommendations, not TV shows or music"
2718
  }
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 63,
5
+ "passed": 27,
6
  "total": 90
7
  }
8
  },
9
+ "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
+ "failed": 63,
12
+ "passed": 27,
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "exact",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
+ "model_output.classification.iab_content.tier2.label": "Auto Rentals"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
+ "actual": "exact",
29
+ "expected": "nearest_equivalent",
30
+ "path": "model_output.classification.iab_content.mapping_mode"
31
+ },
32
+ {
33
+ "actual": "Auto Rentals",
34
  "expected": "Auto Buying and Selling",
35
  "path": "model_output.classification.iab_content.tier2.label"
36
  }
 
93
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
94
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
95
  "model_output.classification.iab_content.tier2.label": "Computing",
96
+ "model_output.classification.iab_content.tier3.label": null
97
  },
98
  "expected": {
99
  "model_output.classification.iab_content.mapping_mode": "exact",
 
119
  "path": "model_output.classification.iab_content.tier2.label"
120
  },
121
  {
122
+ "actual": null,
123
  "expected": "Sales",
124
  "path": "model_output.classification.iab_content.tier3.label"
125
  }
 
133
  "actual": {
134
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
135
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
136
+ "model_output.classification.iab_content.tier2.label": null,
137
+ "model_output.classification.iab_content.tier3.label": null
138
  },
139
  "expected": {
140
  "model_output.classification.iab_content.mapping_mode": "exact",
 
155
  "path": "model_output.classification.iab_content.mapping_mode"
156
  },
157
  {
158
+ "actual": null,
159
  "expected": "Business",
160
  "path": "model_output.classification.iab_content.tier2.label"
161
  },
162
  {
163
+ "actual": null,
164
  "expected": "Sales",
165
  "path": "model_output.classification.iab_content.tier3.label"
166
  }
 
205
  "actual": {
206
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
207
  "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
208
+ "model_output.classification.iab_content.tier2.label": null,
209
  "model_output.classification.iab_content.tier3.label": null
210
  },
211
  "expected": {
 
227
  "path": "model_output.classification.iab_content.mapping_mode"
228
  },
229
  {
230
+ "actual": null,
231
  "expected": "Business",
232
  "path": "model_output.classification.iab_content.tier2.label"
233
  },
 
245
  {
246
  "actual": {
247
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
248
+ "model_output.classification.iab_content.tier1.label": "Careers",
249
+ "model_output.classification.iab_content.tier2.label": null,
250
  "model_output.classification.iab_content.tier3.label": null
251
  },
252
  "expected": {
 
258
  "id": "marketing-tools-medium",
259
  "mismatches": [
260
  {
261
+ "actual": "Careers",
262
  "expected": "Business and Finance",
263
  "path": "model_output.classification.iab_content.tier1.label"
264
  },
 
268
  "path": "model_output.classification.iab_content.mapping_mode"
269
  },
270
  {
271
+ "actual": null,
272
  "expected": "Business",
273
  "path": "model_output.classification.iab_content.tier2.label"
274
  },
 
286
  {
287
  "actual": {
288
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
289
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
290
+ "model_output.classification.iab_content.tier2.label": null,
291
+ "model_output.classification.iab_content.tier3.label": null
292
  },
293
  "expected": {
294
  "model_output.classification.iab_content.mapping_mode": "exact",
 
299
  "id": "marketing-tools-hard",
300
  "mismatches": [
301
  {
302
+ "actual": "Personal Finance",
303
  "expected": "Business and Finance",
304
  "path": "model_output.classification.iab_content.tier1.label"
305
  },
 
309
  "path": "model_output.classification.iab_content.mapping_mode"
310
  },
311
  {
312
+ "actual": null,
313
  "expected": "Business",
314
  "path": "model_output.classification.iab_content.tier2.label"
315
  },
316
  {
317
+ "actual": null,
318
  "expected": "Marketing and Advertising",
319
  "path": "model_output.classification.iab_content.tier3.label"
320
  }
 
327
  {
328
  "actual": {
329
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
330
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
331
+ "model_output.classification.iab_content.tier2.label": "Computing",
332
  "model_output.classification.iab_content.tier3.label": null
333
  },
334
  "expected": {
 
340
  "id": "business-it-easy",
341
  "mismatches": [
342
  {
343
+ "actual": "Technology & Computing",
344
  "expected": "Business and Finance",
345
  "path": "model_output.classification.iab_content.tier1.label"
346
  },
 
350
  "path": "model_output.classification.iab_content.mapping_mode"
351
  },
352
  {
353
+ "actual": "Computing",
354
  "expected": "Business",
355
  "path": "model_output.classification.iab_content.tier2.label"
356
  },
 
368
  {
369
  "actual": {
370
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
371
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
372
+ "model_output.classification.iab_content.tier2.label": null,
373
  "model_output.classification.iab_content.tier3.label": null
374
  },
375
  "expected": {
 
380
  },
381
  "id": "business-it-medium",
382
  "mismatches": [
383
+ {
384
+ "actual": "Personal Finance",
385
+ "expected": "Business and Finance",
386
+ "path": "model_output.classification.iab_content.tier1.label"
387
+ },
388
  {
389
  "actual": "nearest_equivalent",
390
  "expected": "exact",
391
  "path": "model_output.classification.iab_content.mapping_mode"
392
  },
393
+ {
394
+ "actual": null,
395
+ "expected": "Business",
396
+ "path": "model_output.classification.iab_content.tier2.label"
397
+ },
398
  {
399
  "actual": null,
400
  "expected": "Business I.T.",
 
408
  },
409
  {
410
  "actual": {
411
+ "model_output.classification.iab_content.mapping_mode": "exact",
412
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
413
  "model_output.classification.iab_content.tier2.label": "Computing",
414
+ "model_output.classification.iab_content.tier3.label": "Information and Network Security"
415
  },
416
  "expected": {
417
  "model_output.classification.iab_content.mapping_mode": "exact",
 
426
  "expected": "Business and Finance",
427
  "path": "model_output.classification.iab_content.tier1.label"
428
  },
 
 
 
 
 
429
  {
430
  "actual": "Computing",
431
  "expected": "Business",
432
  "path": "model_output.classification.iab_content.tier2.label"
433
  },
434
  {
435
+ "actual": "Information and Network Security",
436
  "expected": "Business I.T.",
437
  "path": "model_output.classification.iab_content.tier3.label"
438
  }
 
444
  },
445
  {
446
  "actual": {
447
+ "model_output.classification.iab_content.mapping_mode": "exact",
448
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
449
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
450
  },
451
  "expected": {
452
  "model_output.classification.iab_content.mapping_mode": "exact",
 
454
  "model_output.classification.iab_content.tier2.label": "Dining Out"
455
  },
456
  "id": "dining-out-easy",
457
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Dining Out.",
459
+ "pass": true,
460
  "status": "must_fix",
461
  "text": "Book a table for six tonight"
462
  },
463
  {
464
  "actual": {
465
+ "model_output.classification.iab_content.mapping_mode": "exact",
466
  "model_output.classification.iab_content.tier1.label": "Attractions",
467
+ "model_output.classification.iab_content.tier2.label": "Bars & Restaurants"
468
  },
469
  "expected": {
470
  "model_output.classification.iab_content.mapping_mode": "exact",
 
479
  "path": "model_output.classification.iab_content.tier1.label"
480
  },
481
  {
482
+ "actual": "Bars & Restaurants",
 
 
 
 
 
483
  "expected": "Dining Out",
484
  "path": "model_output.classification.iab_content.tier2.label"
485
  }
 
491
  },
492
  {
493
  "actual": {
494
+ "model_output.classification.iab_content.mapping_mode": "exact",
495
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
496
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
497
  },
498
  "expected": {
499
  "model_output.classification.iab_content.mapping_mode": "exact",
 
501
  "model_output.classification.iab_content.tier2.label": "Dining Out"
502
  },
503
  "id": "dining-out-hard",
504
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
505
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
506
+ "pass": true,
507
  "status": "must_fix",
508
  "text": "Need a place to eat tonight where I can make a reservation online"
509
  },
510
  {
511
  "actual": {
512
+ "model_output.classification.iab_content.mapping_mode": "exact",
513
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
514
+ "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
515
  },
516
  "expected": {
517
  "model_output.classification.iab_content.mapping_mode": "exact",
 
519
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
520
  },
521
  "id": "alcoholic-beverages-easy",
522
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  "notes": "Cross-vertical easy IAB mapping case for Food & Drink > Alcoholic Beverages.",
524
+ "pass": true,
525
  "status": "must_fix",
526
  "text": "Which whiskey cocktail should I order?"
527
  },
528
  {
529
  "actual": {
530
+ "model_output.classification.iab_content.mapping_mode": "exact",
531
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
532
+ "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
533
  },
534
  "expected": {
535
  "model_output.classification.iab_content.mapping_mode": "exact",
 
537
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
538
  },
539
  "id": "alcoholic-beverages-medium",
540
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
541
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Alcoholic Beverages.",
542
+ "pass": true,
543
  "status": "must_fix",
544
  "text": "Best vodka drinks for beginners"
545
  },
546
  {
547
  "actual": {
548
+ "model_output.classification.iab_content.mapping_mode": "exact",
549
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
550
+ "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
551
  },
552
  "expected": {
553
  "model_output.classification.iab_content.mapping_mode": "exact",
 
555
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
556
  },
557
  "id": "alcoholic-beverages-hard",
558
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
559
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Alcoholic Beverages.",
560
+ "pass": true,
561
  "status": "must_fix",
562
  "text": "Want a spirit-forward drink recommendation, not a restaurant suggestion"
563
  },
564
  {
565
  "actual": {
566
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
567
+ "model_output.classification.iab_content.tier1.label": "Sports",
568
  "model_output.classification.iab_content.tier2.label": null
569
  },
570
  "expected": {
 
575
  "id": "artificial-intelligence-easy",
576
  "mismatches": [
577
  {
578
+ "actual": "Sports",
579
  "expected": "Technology & Computing",
580
  "path": "model_output.classification.iab_content.tier1.label"
581
  },
 
597
  },
598
  {
599
  "actual": {
600
+ "model_output.classification.iab_content.mapping_mode": "exact",
601
+ "model_output.classification.iab_content.tier1.label": "Education",
602
+ "model_output.classification.iab_content.tier2.label": "Language Learning"
603
  },
604
  "expected": {
605
  "model_output.classification.iab_content.mapping_mode": "exact",
 
609
  "id": "artificial-intelligence-medium",
610
  "mismatches": [
611
  {
612
+ "actual": "Education",
613
  "expected": "Technology & Computing",
614
  "path": "model_output.classification.iab_content.tier1.label"
615
  },
616
  {
617
+ "actual": "Language Learning",
 
 
 
 
 
618
  "expected": "Artificial Intelligence",
619
  "path": "model_output.classification.iab_content.tier2.label"
620
  }
 
626
  },
627
  {
628
  "actual": {
629
+ "model_output.classification.iab_content.mapping_mode": "exact",
630
  "model_output.classification.iab_content.tier1.label": "Education",
631
  "model_output.classification.iab_content.tier2.label": "Language Learning"
632
  },
 
642
  "expected": "Technology & Computing",
643
  "path": "model_output.classification.iab_content.tier1.label"
644
  },
 
 
 
 
 
645
  {
646
  "actual": "Language Learning",
647
  "expected": "Artificial Intelligence",
 
658
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
659
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
660
  "model_output.classification.iab_content.tier2.label": "Computing",
661
+ "model_output.classification.iab_content.tier3.label": null
662
  },
663
  "expected": {
664
  "model_output.classification.iab_content.mapping_mode": "exact",
 
672
  "actual": "nearest_equivalent",
673
  "expected": "exact",
674
  "path": "model_output.classification.iab_content.mapping_mode"
675
+ },
676
+ {
677
+ "actual": null,
678
+ "expected": "Software and Applications",
679
+ "path": "model_output.classification.iab_content.tier3.label"
680
  }
681
  ],
682
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
 
689
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
690
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
691
  "model_output.classification.iab_content.tier2.label": "Computing",
692
+ "model_output.classification.iab_content.tier3.label": null
693
  },
694
  "expected": {
695
  "model_output.classification.iab_content.mapping_mode": "exact",
 
703
  "actual": "nearest_equivalent",
704
  "expected": "exact",
705
  "path": "model_output.classification.iab_content.mapping_mode"
706
+ },
707
+ {
708
+ "actual": null,
709
+ "expected": "Software and Applications",
710
+ "path": "model_output.classification.iab_content.tier3.label"
711
  }
712
  ],
713
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
 
718
  {
719
  "actual": {
720
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
721
+ "model_output.classification.iab_content.tier1.label": "Business and Finance",
722
+ "model_output.classification.iab_content.tier2.label": null,
723
+ "model_output.classification.iab_content.tier3.label": null
724
  },
725
  "expected": {
726
  "model_output.classification.iab_content.mapping_mode": "exact",
 
730
  },
731
  "id": "software-apps-hard",
732
  "mismatches": [
733
+ {
734
+ "actual": "Business and Finance",
735
+ "expected": "Technology & Computing",
736
+ "path": "model_output.classification.iab_content.tier1.label"
737
+ },
738
  {
739
  "actual": "nearest_equivalent",
740
  "expected": "exact",
741
  "path": "model_output.classification.iab_content.mapping_mode"
742
+ },
743
+ {
744
+ "actual": null,
745
+ "expected": "Computing",
746
+ "path": "model_output.classification.iab_content.tier2.label"
747
+ },
748
+ {
749
+ "actual": null,
750
+ "expected": "Software and Applications",
751
+ "path": "model_output.classification.iab_content.tier3.label"
752
  }
753
  ],
754
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
 
758
  },
759
  {
760
  "actual": {
761
+ "model_output.classification.iab_content.mapping_mode": "exact",
762
  "model_output.classification.iab_content.tier1.label": "Careers",
763
  "model_output.classification.iab_content.tier2.label": "Remote Working",
764
  "model_output.classification.iab_content.tier3.label": null,
 
778
  "expected": "Technology & Computing",
779
  "path": "model_output.classification.iab_content.tier1.label"
780
  },
 
 
 
 
 
781
  {
782
  "actual": "Remote Working",
783
  "expected": "Computing",
 
804
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
805
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
806
  "model_output.classification.iab_content.tier2.label": "Computing",
807
+ "model_output.classification.iab_content.tier3.label": null,
808
  "model_output.classification.iab_content.tier4.label": null
809
  },
810
  "expected": {
 
822
  "path": "model_output.classification.iab_content.mapping_mode"
823
  },
824
  {
825
+ "actual": null,
826
  "expected": "Software and Applications",
827
  "path": "model_output.classification.iab_content.tier3.label"
828
  },
 
841
  "actual": {
842
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
843
  "model_output.classification.iab_content.tier1.label": "Careers",
844
+ "model_output.classification.iab_content.tier2.label": null,
845
  "model_output.classification.iab_content.tier3.label": null,
846
  "model_output.classification.iab_content.tier4.label": null
847
  },
 
865
  "path": "model_output.classification.iab_content.mapping_mode"
866
  },
867
  {
868
+ "actual": null,
869
  "expected": "Computing",
870
  "path": "model_output.classification.iab_content.tier2.label"
871
  },
 
928
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
929
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
930
  "model_output.classification.iab_content.tier2.label": "Computing",
931
+ "model_output.classification.iab_content.tier3.label": null,
932
  "model_output.classification.iab_content.tier4.label": null
933
  },
934
  "expected": {
 
945
  "expected": "exact",
946
  "path": "model_output.classification.iab_content.mapping_mode"
947
  },
948
+ {
949
+ "actual": null,
950
+ "expected": "Internet",
951
+ "path": "model_output.classification.iab_content.tier3.label"
952
+ },
953
  {
954
  "actual": null,
955
  "expected": "Web Hosting",
 
966
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
967
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
968
  "model_output.classification.iab_content.tier2.label": "Computing",
969
+ "model_output.classification.iab_content.tier3.label": null,
970
  "model_output.classification.iab_content.tier4.label": null
971
  },
972
  "expected": {
 
985
  },
986
  {
987
  "actual": null,
988
+ "expected": "Internet",
989
+ "path": "model_output.classification.iab_content.tier3.label"
990
+ },
991
+ {
992
+ "actual": null,
993
+ "expected": "Web Hosting",
994
+ "path": "model_output.classification.iab_content.tier4.label"
995
+ }
996
  ],
997
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
998
  "pass": false,
 
1003
  "actual": {
1004
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1005
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1006
+ "model_output.classification.iab_content.tier2.label": null,
1007
+ "model_output.classification.iab_content.tier3.label": null
1008
  },
1009
  "expected": {
1010
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1018
  "actual": "nearest_equivalent",
1019
  "expected": "exact",
1020
  "path": "model_output.classification.iab_content.mapping_mode"
1021
+ },
1022
+ {
1023
+ "actual": null,
1024
+ "expected": "Computing",
1025
+ "path": "model_output.classification.iab_content.tier2.label"
1026
+ },
1027
+ {
1028
+ "actual": null,
1029
+ "expected": "Laptops",
1030
+ "path": "model_output.classification.iab_content.tier3.label"
1031
  }
1032
  ],
1033
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
 
1037
  },
1038
  {
1039
  "actual": {
1040
+ "model_output.classification.iab_content.mapping_mode": "exact",
1041
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1042
  "model_output.classification.iab_content.tier2.label": "Computing",
1043
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
1049
  "model_output.classification.iab_content.tier3.label": "Laptops"
1050
  },
1051
  "id": "laptops-medium",
1052
+ "mismatches": [],
 
 
 
 
 
 
1053
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Laptops.",
1054
+ "pass": true,
1055
  "status": "must_fix",
1056
  "text": "Best laptop for work and study under 1200"
1057
  },
 
1060
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1061
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1062
  "model_output.classification.iab_content.tier2.label": "Computing",
1063
+ "model_output.classification.iab_content.tier3.label": null
1064
  },
1065
  "expected": {
1066
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1074
  "actual": "nearest_equivalent",
1075
  "expected": "exact",
1076
  "path": "model_output.classification.iab_content.mapping_mode"
1077
+ },
1078
+ {
1079
+ "actual": null,
1080
+ "expected": "Laptops",
1081
+ "path": "model_output.classification.iab_content.tier3.label"
1082
  }
1083
  ],
1084
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
 
1122
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1123
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1124
  "model_output.classification.iab_content.tier2.label": "Computing",
1125
+ "model_output.classification.iab_content.tier3.label": null
1126
  },
1127
  "expected": {
1128
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1138
  "path": "model_output.classification.iab_content.mapping_mode"
1139
  },
1140
  {
1141
+ "actual": null,
1142
  "expected": "Desktops",
1143
  "path": "model_output.classification.iab_content.tier3.label"
1144
  }
 
1150
  },
1151
  {
1152
  "actual": {
1153
+ "model_output.classification.iab_content.mapping_mode": "exact",
1154
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1155
  "model_output.classification.iab_content.tier2.label": "Computing",
1156
+ "model_output.classification.iab_content.tier3.label": "Desktops"
1157
  },
1158
  "expected": {
1159
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1162
  "model_output.classification.iab_content.tier3.label": "Desktops"
1163
  },
1164
  "id": "desktops-hard",
1165
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1166
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
1167
+ "pass": true,
1168
  "status": "must_fix",
1169
  "text": "Need a desktop PC with strong performance for creative work"
1170
  },
1171
  {
1172
  "actual": {
1173
+ "model_output.classification.iab_content.mapping_mode": "exact",
1174
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1175
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1176
+ "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1177
  },
1178
  "expected": {
1179
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1184
  "id": "smartphones-easy",
1185
  "mismatches": [
1186
  {
1187
+ "actual": "Wearable Technology",
 
 
 
 
 
1188
  "expected": "Smartphones",
1189
  "path": "model_output.classification.iab_content.tier3.label"
1190
  }
 
1196
  },
1197
  {
1198
  "actual": {
1199
+ "model_output.classification.iab_content.mapping_mode": "exact",
1200
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1201
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1202
+ "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1203
  },
1204
  "expected": {
1205
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1210
  "id": "smartphones-medium",
1211
  "mismatches": [
1212
  {
1213
+ "actual": "Wearable Technology",
 
 
 
 
 
1214
  "expected": "Smartphones",
1215
  "path": "model_output.classification.iab_content.tier3.label"
1216
  }
 
1222
  },
1223
  {
1224
  "actual": {
1225
+ "model_output.classification.iab_content.mapping_mode": "exact",
1226
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1227
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1228
+ "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1229
  },
1230
  "expected": {
1231
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1236
  "id": "smartphones-hard",
1237
  "mismatches": [
1238
  {
1239
+ "actual": "Wearable Technology",
1240
+ "expected": "Smartphones",
1241
+ "path": "model_output.classification.iab_content.tier3.label"
1242
  }
1243
  ],
1244
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
 
1248
  },
1249
  {
1250
  "actual": {
1251
+ "model_output.classification.iab_content.mapping_mode": "exact",
1252
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1253
  },
1254
  "expected": {
 
1256
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1257
  },
1258
  "id": "style-fashion-parent-easy",
1259
+ "mismatches": [
1260
+ {
1261
+ "actual": "exact",
1262
+ "expected": "nearest_equivalent",
1263
+ "path": "model_output.classification.iab_content.mapping_mode"
1264
+ }
1265
+ ],
1266
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion.",
1267
+ "pass": false,
1268
  "status": "must_fix",
1269
  "text": "Best shoes under 100 dollars"
1270
  },
1271
  {
1272
  "actual": {
1273
+ "model_output.classification.iab_content.mapping_mode": "exact",
1274
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1275
  },
1276
  "expected": {
 
1278
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1279
  },
1280
  "id": "style-fashion-parent-medium",
1281
+ "mismatches": [
1282
+ {
1283
+ "actual": "exact",
1284
+ "expected": "nearest_equivalent",
1285
+ "path": "model_output.classification.iab_content.mapping_mode"
1286
+ }
1287
+ ],
1288
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion.",
1289
+ "pass": false,
1290
  "status": "must_fix",
1291
  "text": "Affordable fashion accessories for everyday wear"
1292
  },
1293
  {
1294
  "actual": {
1295
+ "model_output.classification.iab_content.mapping_mode": "exact",
1296
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1297
  },
1298
  "expected": {
 
1300
  "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1301
  },
1302
  "id": "style-fashion-parent-hard",
1303
+ "mismatches": [
1304
+ {
1305
+ "actual": "exact",
1306
+ "expected": "nearest_equivalent",
1307
+ "path": "model_output.classification.iab_content.mapping_mode"
1308
+ }
1309
+ ],
1310
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion.",
1311
+ "pass": false,
1312
  "status": "must_fix",
1313
  "text": "Need style recommendations for clothing and footwear without a specific brand in mind"
1314
  },
1315
  {
1316
  "actual": {
1317
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1318
+ "model_output.classification.iab_content.tier1.label": "Sports",
1319
+ "model_output.classification.iab_content.tier2.label": null,
1320
+ "model_output.classification.iab_content.tier3.label": null
1321
  },
1322
  "expected": {
1323
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1327
  },
1328
  "id": "womens-shoes-easy",
1329
  "mismatches": [
1330
+ {
1331
+ "actual": "Sports",
1332
+ "expected": "Style & Fashion",
1333
+ "path": "model_output.classification.iab_content.tier1.label"
1334
+ },
1335
  {
1336
  "actual": "nearest_equivalent",
1337
  "expected": "exact",
1338
  "path": "model_output.classification.iab_content.mapping_mode"
1339
+ },
1340
+ {
1341
+ "actual": null,
1342
+ "expected": "Women's Fashion",
1343
+ "path": "model_output.classification.iab_content.tier2.label"
1344
+ },
1345
+ {
1346
+ "actual": null,
1347
+ "expected": "Women's Shoes and Footwear",
1348
+ "path": "model_output.classification.iab_content.tier3.label"
1349
  }
1350
  ],
1351
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
 
1355
  },
1356
  {
1357
  "actual": {
1358
+ "model_output.classification.iab_content.mapping_mode": "exact",
1359
+ "model_output.classification.iab_content.tier1.label": "Sports",
1360
+ "model_output.classification.iab_content.tier2.label": "Walking",
1361
+ "model_output.classification.iab_content.tier3.label": null
1362
  },
1363
  "expected": {
1364
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1369
  "id": "womens-shoes-medium",
1370
  "mismatches": [
1371
  {
1372
+ "actual": "Sports",
1373
+ "expected": "Style & Fashion",
1374
+ "path": "model_output.classification.iab_content.tier1.label"
1375
+ },
1376
+ {
1377
+ "actual": "Walking",
1378
+ "expected": "Women's Fashion",
1379
+ "path": "model_output.classification.iab_content.tier2.label"
1380
+ },
1381
+ {
1382
+ "actual": null,
1383
+ "expected": "Women's Shoes and Footwear",
1384
+ "path": "model_output.classification.iab_content.tier3.label"
1385
  }
1386
  ],
1387
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
 
1391
  },
1392
  {
1393
  "actual": {
1394
+ "model_output.classification.iab_content.mapping_mode": "exact",
1395
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1396
  "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1397
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
 
1403
  "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1404
  },
1405
  "id": "womens-shoes-hard",
1406
+ "mismatches": [],
 
 
 
 
 
 
1407
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Women's Fashion > Women's Shoes and Footwear.",
1408
+ "pass": true,
1409
  "status": "must_fix",
1410
  "text": "Need women's footwear for commuting that looks polished but feels comfortable"
1411
  },
 
1413
  "actual": {
1414
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1415
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1416
+ "model_output.classification.iab_content.tier2.label": null,
1417
+ "model_output.classification.iab_content.tier3.label": null
1418
  },
1419
  "expected": {
1420
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1428
  "actual": "nearest_equivalent",
1429
  "expected": "exact",
1430
  "path": "model_output.classification.iab_content.mapping_mode"
1431
+ },
1432
+ {
1433
+ "actual": null,
1434
+ "expected": "Men's Fashion",
1435
+ "path": "model_output.classification.iab_content.tier2.label"
1436
+ },
1437
+ {
1438
+ "actual": null,
1439
+ "expected": "Men's Shoes and Footwear",
1440
+ "path": "model_output.classification.iab_content.tier3.label"
1441
  }
1442
  ],
1443
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
 
1447
  },
1448
  {
1449
  "actual": {
1450
+ "model_output.classification.iab_content.mapping_mode": "exact",
1451
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1452
  "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1453
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
 
1459
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1460
  },
1461
  "id": "mens-shoes-medium",
1462
+ "mismatches": [],
 
 
 
 
 
 
1463
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
1464
+ "pass": true,
1465
  "status": "must_fix",
1466
  "text": "Good men's dress shoes for office use"
1467
  },
 
1469
  "actual": {
1470
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1471
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1472
+ "model_output.classification.iab_content.tier2.label": null,
1473
+ "model_output.classification.iab_content.tier3.label": null
1474
  },
1475
  "expected": {
1476
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1484
  "actual": "nearest_equivalent",
1485
  "expected": "exact",
1486
  "path": "model_output.classification.iab_content.mapping_mode"
1487
+ },
1488
+ {
1489
+ "actual": null,
1490
+ "expected": "Men's Fashion",
1491
+ "path": "model_output.classification.iab_content.tier2.label"
1492
+ },
1493
+ {
1494
+ "actual": null,
1495
+ "expected": "Men's Shoes and Footwear",
1496
+ "path": "model_output.classification.iab_content.tier3.label"
1497
  }
1498
  ],
1499
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
 
1503
  },
1504
  {
1505
  "actual": {
1506
+ "model_output.classification.iab_content.mapping_mode": "exact",
1507
+ "model_output.classification.iab_content.tier1.label": "Travel",
1508
+ "model_output.classification.iab_content.tier2.label": "Travel Type",
1509
+ "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1510
  },
1511
  "expected": {
1512
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1515
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1516
  },
1517
  "id": "hotels-easy",
1518
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1519
  "notes": "Cross-vertical easy IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1520
+ "pass": true,
1521
  "status": "must_fix",
1522
  "text": "Need a hotel in Chicago for two nights"
1523
  },
1524
  {
1525
  "actual": {
1526
+ "model_output.classification.iab_content.mapping_mode": "exact",
1527
  "model_output.classification.iab_content.tier1.label": "Travel",
1528
  "model_output.classification.iab_content.tier2.label": "Travel Type",
1529
+ "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1530
  },
1531
  "expected": {
1532
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1535
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1536
  },
1537
  "id": "hotels-medium",
1538
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1539
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1540
+ "pass": true,
1541
  "status": "must_fix",
1542
  "text": "Best hotels near Times Square for a weekend trip"
1543
  },
1544
  {
1545
  "actual": {
1546
+ "model_output.classification.iab_content.mapping_mode": "exact",
1547
  "model_output.classification.iab_content.tier1.label": "Travel",
1548
  "model_output.classification.iab_content.tier2.label": null,
1549
  "model_output.classification.iab_content.tier3.label": null
 
1556
  },
1557
  "id": "hotels-hard",
1558
  "mismatches": [
 
 
 
 
 
1559
  {
1560
  "actual": null,
1561
  "expected": "Travel Type",
 
1574
  },
1575
  {
1576
  "actual": {
1577
+ "model_output.classification.iab_content.mapping_mode": "exact",
1578
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1579
  "model_output.classification.iab_content.tier2.label": "Apartments"
1580
  },
 
1585
  },
1586
  "id": "real-estate-rentals-easy",
1587
  "mismatches": [
 
 
 
 
 
1588
  {
1589
  "actual": "Apartments",
1590
  "expected": "Real Estate Renting and Leasing",
 
1598
  },
1599
  {
1600
  "actual": {
1601
+ "model_output.classification.iab_content.mapping_mode": "exact",
1602
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1603
  "model_output.classification.iab_content.tier2.label": "Apartments"
1604
  },
 
1609
  },
1610
  "id": "real-estate-rentals-medium",
1611
  "mismatches": [
1612
+ {
1613
+ "actual": "exact",
1614
+ "expected": "nearest_equivalent",
1615
+ "path": "model_output.classification.iab_content.mapping_mode"
1616
+ },
1617
  {
1618
  "actual": "Apartments",
1619
  "expected": "Real Estate Renting and Leasing",
 
1627
  },
1628
  {
1629
  "actual": {
1630
+ "model_output.classification.iab_content.mapping_mode": "exact",
1631
  "model_output.classification.iab_content.tier1.label": "Real Estate",
1632
+ "model_output.classification.iab_content.tier2.label": "Real Estate Renting and Leasing"
1633
  },
1634
  "expected": {
1635
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1637
  "model_output.classification.iab_content.tier2.label": "Real Estate Renting and Leasing"
1638
  },
1639
  "id": "real-estate-rentals-hard",
1640
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1641
  "notes": "Cross-vertical hard IAB mapping case for Real Estate > Real Estate Renting and Leasing.",
1642
+ "pass": true,
1643
  "status": "must_fix",
1644
  "text": "Need rental listings for a short move, not home-buying advice"
1645
  },
 
1647
  "actual": {
1648
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1649
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1650
+ "model_output.classification.iab_content.tier2.label": null,
1651
+ "model_output.classification.iab_content.tier3.label": null
1652
  },
1653
  "expected": {
1654
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1662
  "actual": "nearest_equivalent",
1663
  "expected": "exact",
1664
  "path": "model_output.classification.iab_content.mapping_mode"
1665
+ },
1666
+ {
1667
+ "actual": null,
1668
+ "expected": "Fitness and Exercise",
1669
+ "path": "model_output.classification.iab_content.tier2.label"
1670
+ },
1671
+ {
1672
+ "actual": null,
1673
+ "expected": "Running and Jogging",
1674
+ "path": "model_output.classification.iab_content.tier3.label"
1675
  }
1676
  ],
1677
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
 
1681
  },
1682
  {
1683
  "actual": {
1684
+ "model_output.classification.iab_content.mapping_mode": "exact",
1685
+ "model_output.classification.iab_content.tier1.label": "Sports",
1686
+ "model_output.classification.iab_content.tier2.label": "Walking",
1687
+ "model_output.classification.iab_content.tier3.label": null
1688
  },
1689
  "expected": {
1690
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1695
  "id": "running-and-jogging-medium",
1696
  "mismatches": [
1697
  {
1698
+ "actual": "Sports",
1699
+ "expected": "Healthy Living",
1700
+ "path": "model_output.classification.iab_content.tier1.label"
1701
+ },
1702
+ {
1703
+ "actual": "Walking",
1704
+ "expected": "Fitness and Exercise",
1705
+ "path": "model_output.classification.iab_content.tier2.label"
1706
+ },
1707
+ {
1708
+ "actual": null,
1709
+ "expected": "Running and Jogging",
1710
+ "path": "model_output.classification.iab_content.tier3.label"
1711
  }
1712
  ],
1713
  "notes": "Cross-vertical medium IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
 
1717
  },
1718
  {
1719
  "actual": {
1720
+ "model_output.classification.iab_content.mapping_mode": "exact",
1721
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1722
  "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1723
+ "model_output.classification.iab_content.tier3.label": null
1724
  },
1725
  "expected": {
1726
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1731
  "id": "running-and-jogging-hard",
1732
  "mismatches": [
1733
  {
1734
+ "actual": null,
1735
+ "expected": "Running and Jogging",
1736
+ "path": "model_output.classification.iab_content.tier3.label"
1737
  }
1738
  ],
1739
  "notes": "Cross-vertical hard IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
 
1743
  },
1744
  {
1745
  "actual": {
1746
+ "model_output.classification.iab_content.mapping_mode": "exact",
1747
  "model_output.classification.iab_content.tier1.label": "Sports",
1748
+ "model_output.classification.iab_content.tier2.label": "Soccer"
1749
  },
1750
  "expected": {
1751
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1753
  "model_output.classification.iab_content.tier2.label": "Soccer"
1754
  },
1755
  "id": "soccer-easy",
1756
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1757
  "notes": "Cross-vertical easy IAB mapping case for Sports > Soccer.",
1758
+ "pass": true,
1759
  "status": "must_fix",
1760
  "text": "How do offside rules work in soccer?"
1761
  },
1762
  {
1763
  "actual": {
1764
+ "model_output.classification.iab_content.mapping_mode": "exact",
1765
  "model_output.classification.iab_content.tier1.label": "Sports",
1766
+ "model_output.classification.iab_content.tier2.label": "Soccer"
1767
  },
1768
  "expected": {
1769
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1771
  "model_output.classification.iab_content.tier2.label": "Soccer"
1772
  },
1773
  "id": "soccer-medium",
1774
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1775
  "notes": "Cross-vertical medium IAB mapping case for Sports > Soccer.",
1776
+ "pass": true,
1777
  "status": "must_fix",
1778
  "text": "Best soccer drills for beginner players"
1779
  },
1780
  {
1781
  "actual": {
1782
+ "model_output.classification.iab_content.mapping_mode": "exact",
1783
  "model_output.classification.iab_content.tier1.label": "Sports",
1784
+ "model_output.classification.iab_content.tier2.label": "Soccer"
1785
  },
1786
  "expected": {
1787
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1789
  "model_output.classification.iab_content.tier2.label": "Soccer"
1790
  },
1791
  "id": "soccer-hard",
1792
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1793
  "notes": "Cross-vertical hard IAB mapping case for Sports > Soccer.",
1794
+ "pass": true,
1795
  "status": "must_fix",
1796
  "text": "Need help understanding football tactics for the Premier League, not fantasy sports"
1797
  },
1798
  {
1799
  "actual": {
1800
+ "model_output.classification.iab_content.mapping_mode": "exact",
1801
  "model_output.classification.iab_content.tier1.label": "Genres",
1802
  "model_output.classification.iab_content.tier2.label": "Fantasy"
1803
  },
 
1813
  "expected": "Books and Literature",
1814
  "path": "model_output.classification.iab_content.tier1.label"
1815
  },
 
 
 
 
 
1816
  {
1817
  "actual": "Fantasy",
1818
  "expected": "Fiction",
 
1826
  },
1827
  {
1828
  "actual": {
1829
+ "model_output.classification.iab_content.mapping_mode": "exact",
1830
  "model_output.classification.iab_content.tier1.label": "Books and Literature",
1831
  "model_output.classification.iab_content.tier2.label": "Fiction"
1832
  },
 
1836
  "model_output.classification.iab_content.tier2.label": "Fiction"
1837
  },
1838
  "id": "fiction-medium",
1839
+ "mismatches": [
1840
+ {
1841
+ "actual": "exact",
1842
+ "expected": "nearest_equivalent",
1843
+ "path": "model_output.classification.iab_content.mapping_mode"
1844
+ }
1845
+ ],
1846
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
1847
+ "pass": false,
1848
  "status": "must_fix",
1849
  "text": "Best fiction books for a long flight"
1850
  },
1851
  {
1852
  "actual": {
1853
+ "model_output.classification.iab_content.mapping_mode": "exact",
1854
  "model_output.classification.iab_content.tier1.label": "Books and Literature",
1855
+ "model_output.classification.iab_content.tier2.label": "Fiction"
1856
  },
1857
  "expected": {
1858
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1860
  "model_output.classification.iab_content.tier2.label": "Fiction"
1861
  },
1862
  "id": "fiction-hard",
1863
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1864
  "notes": "Cross-vertical hard IAB mapping case for Books and Literature > Fiction.",
1865
+ "pass": true,
1866
  "status": "must_fix",
1867
  "text": "Looking for a character-driven novel, not comics or poetry"
1868
  },
1869
  {
1870
  "actual": {
1871
+ "model_output.classification.iab_content.mapping_mode": "exact",
1872
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1873
  "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1874
  },
 
1879
  },
1880
  "id": "home-improvement-easy",
1881
  "mismatches": [
 
 
 
 
 
1882
  {
1883
  "actual": "Remodeling & Construction",
1884
  "expected": "Home Improvement",
 
1893
  {
1894
  "actual": {
1895
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1896
+ "model_output.classification.iab_content.tier1.label": "Home & Garden",
1897
+ "model_output.classification.iab_content.tier2.label": null
1898
  },
1899
  "expected": {
1900
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1903
  },
1904
  "id": "home-improvement-medium",
1905
  "mismatches": [
 
 
 
 
 
1906
  {
1907
  "actual": "nearest_equivalent",
1908
  "expected": "exact",
1909
  "path": "model_output.classification.iab_content.mapping_mode"
1910
  },
1911
  {
1912
+ "actual": null,
1913
  "expected": "Home Improvement",
1914
  "path": "model_output.classification.iab_content.tier2.label"
1915
  }
 
1921
  },
1922
  {
1923
  "actual": {
1924
+ "model_output.classification.iab_content.mapping_mode": "exact",
1925
+ "model_output.classification.iab_content.tier1.label": "Real Estate",
1926
+ "model_output.classification.iab_content.tier2.label": "Houses"
1927
  },
1928
  "expected": {
1929
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1933
  "id": "home-improvement-hard",
1934
  "mismatches": [
1935
  {
1936
+ "actual": "Real Estate",
1937
+ "expected": "Home & Garden",
1938
+ "path": "model_output.classification.iab_content.tier1.label"
1939
  },
1940
  {
1941
+ "actual": "Houses",
1942
  "expected": "Home Improvement",
1943
  "path": "model_output.classification.iab_content.tier2.label"
1944
  }
 
1951
  {
1952
  "actual": {
1953
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1954
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1955
+ "model_output.classification.iab_content.tier2.label": null
1956
  },
1957
  "expected": {
1958
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1961
  },
1962
  "id": "online-education-easy",
1963
  "mismatches": [
1964
+ {
1965
+ "actual": "Technology & Computing",
1966
+ "expected": "Education",
1967
+ "path": "model_output.classification.iab_content.tier1.label"
1968
+ },
1969
  {
1970
  "actual": "nearest_equivalent",
1971
  "expected": "exact",
1972
  "path": "model_output.classification.iab_content.mapping_mode"
1973
+ },
1974
+ {
1975
+ "actual": null,
1976
+ "expected": "Online Education",
1977
+ "path": "model_output.classification.iab_content.tier2.label"
1978
  }
1979
  ],
1980
  "notes": "Cross-vertical easy IAB mapping case for Education > Online Education.",
 
1984
  },
1985
  {
1986
  "actual": {
1987
+ "model_output.classification.iab_content.mapping_mode": "exact",
1988
  "model_output.classification.iab_content.tier1.label": "Careers",
1989
  "model_output.classification.iab_content.tier2.label": "Remote Working"
1990
  },
 
2000
  "expected": "Education",
2001
  "path": "model_output.classification.iab_content.tier1.label"
2002
  },
 
 
 
 
 
2003
  {
2004
  "actual": "Remote Working",
2005
  "expected": "Online Education",
 
2014
  {
2015
  "actual": {
2016
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2017
+ "model_output.classification.iab_content.tier1.label": "Healthy Living",
2018
+ "model_output.classification.iab_content.tier2.label": null
2019
  },
2020
  "expected": {
2021
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2025
  "id": "online-education-hard",
2026
  "mismatches": [
2027
  {
2028
+ "actual": "Healthy Living",
2029
  "expected": "Education",
2030
  "path": "model_output.classification.iab_content.tier1.label"
2031
  },
 
2035
  "path": "model_output.classification.iab_content.mapping_mode"
2036
  },
2037
  {
2038
+ "actual": null,
2039
  "expected": "Online Education",
2040
  "path": "model_output.classification.iab_content.tier2.label"
2041
  }
 
2047
  },
2048
  {
2049
  "actual": {
2050
+ "model_output.classification.iab_content.mapping_mode": "exact",
2051
  "model_output.classification.iab_content.tier1.label": "Education",
2052
  "model_output.classification.iab_content.tier2.label": "College Education",
2053
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
 
2059
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2060
  },
2061
  "id": "postgraduate-education-easy",
2062
+ "mismatches": [],
 
 
 
 
 
 
2063
  "notes": "Cross-vertical easy IAB mapping case for Education > College Education > Postgraduate Education.",
2064
+ "pass": true,
2065
  "status": "must_fix",
2066
  "text": "best universities to study masters"
2067
  },
2068
  {
2069
  "actual": {
2070
+ "model_output.classification.iab_content.mapping_mode": "exact",
2071
  "model_output.classification.iab_content.tier1.label": "Education",
2072
  "model_output.classification.iab_content.tier2.label": "College Education",
2073
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
 
2079
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2080
  },
2081
  "id": "postgraduate-education-medium",
2082
+ "mismatches": [],
 
 
 
 
 
 
2083
  "notes": "Cross-vertical medium IAB mapping case for Education > College Education > Postgraduate Education.",
2084
+ "pass": true,
2085
  "status": "must_fix",
2086
  "text": "which graduate schools have strong data science programs"
2087
  },
2088
  {
2089
  "actual": {
2090
+ "model_output.classification.iab_content.mapping_mode": "exact",
2091
  "model_output.classification.iab_content.tier1.label": "Education",
2092
  "model_output.classification.iab_content.tier2.label": "College Education",
2093
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
 
2099
  "model_output.classification.iab_content.tier3.label": "Postgraduate Education"
2100
  },
2101
  "id": "postgraduate-education-hard",
2102
+ "mismatches": [],
 
 
 
 
 
 
2103
  "notes": "Cross-vertical hard IAB mapping case for Education > College Education > Postgraduate Education.",
2104
+ "pass": true,
2105
  "status": "must_fix",
2106
  "text": "need postgraduate options for a master's degree, not short online courses"
2107
  },
 
2129
  },
2130
  {
2131
  "actual": {
2132
+ "model_output.classification.iab_content.mapping_mode": "exact",
2133
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2134
  },
2135
  "expected": {
 
2137
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2138
  },
2139
  "id": "medical-health-medium",
2140
+ "mismatches": [],
 
 
 
 
 
 
2141
  "notes": "Cross-vertical medium IAB mapping case for Medical Health.",
2142
+ "pass": true,
2143
  "status": "must_fix",
2144
  "text": "when should i see a doctor for persistent knee pain"
2145
  },
2146
  {
2147
  "actual": {
2148
+ "model_output.classification.iab_content.mapping_mode": "exact",
2149
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2150
  },
2151
  "expected": {
 
2153
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2154
  },
2155
  "id": "medical-health-hard",
2156
+ "mismatches": [],
 
 
 
 
 
 
2157
  "notes": "Cross-vertical hard IAB mapping case for Medical Health.",
2158
+ "pass": true,
2159
  "status": "must_fix",
2160
  "text": "need medical advice about symptoms, not wellness or fitness tips"
2161
  },
2162
  {
2163
  "actual": {
2164
+ "model_output.classification.iab_content.mapping_mode": "exact",
2165
  "model_output.classification.iab_content.tier1.label": "Careers",
2166
  "model_output.classification.iab_content.tier2.label": "Remote Working"
2167
  },
 
2172
  },
2173
  "id": "careers-job-search-easy",
2174
  "mismatches": [
 
 
 
 
 
2175
  {
2176
  "actual": "Remote Working",
2177
  "expected": "Job Search",
 
2187
  "actual": {
2188
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2189
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
2190
+ "model_output.classification.iab_content.tier2.label": "Business"
2191
  },
2192
  "expected": {
2193
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2207
  "path": "model_output.classification.iab_content.mapping_mode"
2208
  },
2209
  {
2210
+ "actual": "Business",
2211
  "expected": "Job Search",
2212
  "path": "model_output.classification.iab_content.tier2.label"
2213
  }
 
2220
  {
2221
  "actual": {
2222
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2223
+ "model_output.classification.iab_content.tier1.label": "Genres",
2224
+ "model_output.classification.iab_content.tier2.label": null
2225
  },
2226
  "expected": {
2227
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2231
  "id": "careers-job-search-hard",
2232
  "mismatches": [
2233
  {
2234
+ "actual": "Genres",
2235
  "expected": "Careers",
2236
  "path": "model_output.classification.iab_content.tier1.label"
2237
  },
 
2241
  "path": "model_output.classification.iab_content.mapping_mode"
2242
  },
2243
  {
2244
+ "actual": null,
2245
  "expected": "Job Search",
2246
  "path": "model_output.classification.iab_content.tier2.label"
2247
  }
 
2254
  {
2255
  "actual": {
2256
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2257
+ "model_output.classification.iab_content.tier1.label": "Personal Celebrations & Life Events",
2258
  "model_output.classification.iab_content.tier2.label": null
2259
  },
2260
  "expected": {
 
2264
  },
2265
  "id": "personal-finance-easy",
2266
  "mismatches": [
2267
+ {
2268
+ "actual": "Personal Celebrations & Life Events",
2269
+ "expected": "Personal Finance",
2270
+ "path": "model_output.classification.iab_content.tier1.label"
2271
+ },
2272
  {
2273
  "actual": "nearest_equivalent",
2274
  "expected": "exact",
 
2287
  },
2288
  {
2289
  "actual": {
2290
+ "model_output.classification.iab_content.mapping_mode": "exact",
2291
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2292
  "model_output.classification.iab_content.tier2.label": null
2293
  },
 
2298
  },
2299
  "id": "personal-finance-medium",
2300
  "mismatches": [
 
 
 
 
 
2301
  {
2302
  "actual": null,
2303
  "expected": "Financial Planning",
 
2311
  },
2312
  {
2313
  "actual": {
2314
+ "model_output.classification.iab_content.mapping_mode": "exact",
2315
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
2316
  "model_output.classification.iab_content.tier2.label": "Retirement Planning"
2317
  },
 
2322
  },
2323
  "id": "personal-finance-hard",
2324
  "mismatches": [
 
 
 
 
 
2325
  {
2326
  "actual": "Retirement Planning",
2327
  "expected": "Financial Planning",
 
2335
  },
2336
  {
2337
  "actual": {
2338
+ "model_output.classification.iab_content.mapping_mode": "exact",
2339
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2340
  "model_output.classification.iab_content.tier2.label": "Parenting"
2341
  },
 
2345
  "model_output.classification.iab_content.tier2.label": "Parenting"
2346
  },
2347
  "id": "parenting-easy",
2348
+ "mismatches": [],
 
 
 
 
 
 
2349
  "notes": "Cross-vertical easy IAB mapping case for Family and Relationships > Parenting.",
2350
+ "pass": true,
2351
  "status": "must_fix",
2352
  "text": "tips for parenting a toddler"
2353
  },
2354
  {
2355
  "actual": {
2356
+ "model_output.classification.iab_content.mapping_mode": "exact",
2357
+ "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
2358
+ "model_output.classification.iab_content.tier2.label": "Content Production"
2359
  },
2360
  "expected": {
2361
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2365
  "id": "parenting-medium",
2366
  "mismatches": [
2367
  {
2368
+ "actual": "Hobbies & Interests",
2369
  "expected": "Family and Relationships",
2370
  "path": "model_output.classification.iab_content.tier1.label"
2371
  },
2372
  {
2373
+ "actual": "Content Production",
 
 
 
 
 
2374
  "expected": "Parenting",
2375
  "path": "model_output.classification.iab_content.tier2.label"
2376
  }
 
2382
  },
2383
  {
2384
  "actual": {
2385
+ "model_output.classification.iab_content.mapping_mode": "exact",
2386
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2387
  "model_output.classification.iab_content.tier2.label": "Parenting"
2388
  },
 
2392
  "model_output.classification.iab_content.tier2.label": "Parenting"
2393
  },
2394
  "id": "parenting-hard",
2395
+ "mismatches": [],
 
 
 
 
 
 
2396
  "notes": "Cross-vertical hard IAB mapping case for Family and Relationships > Parenting.",
2397
+ "pass": true,
2398
  "status": "must_fix",
2399
  "text": "need parenting advice for a child starting preschool"
2400
  },
2401
  {
2402
  "actual": {
2403
+ "model_output.classification.iab_content.mapping_mode": "exact",
2404
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2405
  "model_output.classification.iab_content.tier2.label": "Gardening"
2406
  },
 
2410
  "model_output.classification.iab_content.tier2.label": "Gardening"
2411
  },
2412
  "id": "gardening-easy",
2413
+ "mismatches": [],
 
 
 
 
 
 
2414
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Gardening.",
2415
+ "pass": true,
2416
  "status": "must_fix",
2417
  "text": "best plants for a small balcony garden"
2418
  },
2419
  {
2420
  "actual": {
2421
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2422
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
2423
  "model_output.classification.iab_content.tier2.label": null
2424
  },
2425
  "expected": {
 
2430
  "id": "gardening-medium",
2431
  "mismatches": [
2432
  {
2433
+ "actual": "Food & Drink",
2434
  "expected": "Home & Garden",
2435
  "path": "model_output.classification.iab_content.tier1.label"
2436
  },
 
2452
  },
2453
  {
2454
  "actual": {
2455
+ "model_output.classification.iab_content.mapping_mode": "exact",
2456
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
2457
+ "model_output.classification.iab_content.tier2.label": "Gardening"
2458
  },
2459
  "expected": {
2460
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2462
  "model_output.classification.iab_content.tier2.label": "Gardening"
2463
  },
2464
  "id": "gardening-hard",
2465
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
2466
  "notes": "Cross-vertical hard IAB mapping case for Home & Garden > Gardening.",
2467
+ "pass": true,
2468
  "status": "must_fix",
2469
  "text": "need gardening advice for a shady backyard, not interior decor ideas"
2470
  },
2471
  {
2472
  "actual": {
2473
+ "model_output.classification.iab_content.mapping_mode": "exact",
2474
+ "model_output.classification.iab_content.tier1.label": "Entertainment",
2475
+ "model_output.classification.iab_content.tier2.label": "Movies"
2476
  },
2477
  "expected": {
2478
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2480
  "model_output.classification.iab_content.tier2.label": "Movies"
2481
  },
2482
  "id": "movies-easy",
2483
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2484
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
2485
+ "pass": true,
2486
  "status": "must_fix",
2487
  "text": "What movie should we watch tonight?"
2488
  },
2489
  {
2490
  "actual": {
2491
+ "model_output.classification.iab_content.mapping_mode": "exact",
2492
  "model_output.classification.iab_content.tier1.label": "Genres",
2493
  "model_output.classification.iab_content.tier2.label": "Horror"
2494
  },
 
2504
  "expected": "Entertainment",
2505
  "path": "model_output.classification.iab_content.tier1.label"
2506
  },
 
 
 
 
 
2507
  {
2508
  "actual": "Horror",
2509
  "expected": "Movies",
 
2517
  },
2518
  {
2519
  "actual": {
2520
+ "model_output.classification.iab_content.mapping_mode": "exact",
2521
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2522
+ "model_output.classification.iab_content.tier2.label": "Movies"
2523
  },
2524
  "expected": {
2525
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2527
  "model_output.classification.iab_content.tier2.label": "Movies"
2528
  },
2529
  "id": "movies-hard",
2530
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
2531
  "notes": "Cross-vertical hard IAB mapping case for Entertainment > Movies.",
2532
+ "pass": true,
2533
  "status": "must_fix",
2534
  "text": "Looking for film recommendations, not TV shows or music"
2535
  }
artifacts/evaluation/latest/iab_quality_target_eval.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 12,
5
- "passed": 0,
6
  "total": 12
7
  }
8
  },
9
- "cases_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/iab_mapping_cases.json",
10
  "count": 12,
11
- "failed": 12,
12
- "passed": 0,
13
  "results": [
14
  {
15
  "actual": {
@@ -42,7 +42,7 @@
42
  },
43
  {
44
  "actual": {
45
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
46
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
47
  "model_output.classification.iab_content.tier2.label": "Computing",
48
  "model_output.classification.iab_content.tier3.label": "Laptops"
@@ -54,15 +54,9 @@
54
  "model_output.classification.iab_content.tier3.label": "Laptops"
55
  },
56
  "id": "laptop-buying-maps-to-laptops",
57
- "mismatches": [
58
- {
59
- "actual": "nearest_equivalent",
60
- "expected": "exact",
61
- "path": "model_output.classification.iab_content.mapping_mode"
62
- }
63
- ],
64
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
65
- "pass": false,
66
  "status": "must_fix",
67
  "text": "Which laptop to buy in 2026"
68
  },
@@ -70,7 +64,7 @@
70
  "actual": {
71
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
72
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
73
- "model_output.classification.iab_content.tier2.label": "Computing",
74
  "model_output.classification.iab_content.tier3.label": null
75
  },
76
  "expected": {
@@ -81,6 +75,11 @@
81
  },
82
  "id": "labtop-buying-maps-to-laptops",
83
  "mismatches": [
 
 
 
 
 
84
  {
85
  "actual": null,
86
  "expected": "Laptops",
@@ -102,7 +101,7 @@
102
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
103
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
104
  "model_output.classification.iab_content.tier2.label": "Computing",
105
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
106
  },
107
  "expected": {
108
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -123,7 +122,7 @@
123
  "path": "model_output.classification.iab_content.tier2.label"
124
  },
125
  {
126
- "actual": "Software and Applications",
127
  "expected": "Sales",
128
  "path": "model_output.classification.iab_content.tier3.label"
129
  }
@@ -137,8 +136,8 @@
137
  "actual": {
138
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
139
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
140
- "model_output.classification.iab_content.tier2.label": "Computing",
141
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
142
  },
143
  "expected": {
144
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -154,12 +153,12 @@
154
  "path": "model_output.classification.iab_content.tier1.label"
155
  },
156
  {
157
- "actual": "Computing",
158
  "expected": "Business",
159
  "path": "model_output.classification.iab_content.tier2.label"
160
  },
161
  {
162
- "actual": "Software and Applications",
163
  "expected": "Sales",
164
  "path": "model_output.classification.iab_content.tier3.label"
165
  },
@@ -177,8 +176,8 @@
177
  {
178
  "actual": {
179
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
180
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
181
- "model_output.classification.iab_content.tier2.label": "Content Production",
182
  "model_output.classification.iab_content.tier3.label": null
183
  },
184
  "expected": {
@@ -190,12 +189,12 @@
190
  "id": "marketing-tools-map-to-marketing",
191
  "mismatches": [
192
  {
193
- "actual": "Hobbies & Interests",
194
  "expected": "Business and Finance",
195
  "path": "model_output.classification.iab_content.tier1.label"
196
  },
197
  {
198
- "actual": "Content Production",
199
  "expected": "Business",
200
  "path": "model_output.classification.iab_content.tier2.label"
201
  },
@@ -218,8 +217,8 @@
218
  {
219
  "actual": {
220
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
221
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
222
- "model_output.classification.iab_content.tier2.label": "Computing"
223
  },
224
  "expected": {
225
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -229,7 +228,12 @@
229
  "id": "ml-explanation-maps-to-ai",
230
  "mismatches": [
231
  {
232
- "actual": "Computing",
 
 
 
 
 
233
  "expected": "Artificial Intelligence",
234
  "path": "model_output.classification.iab_content.tier2.label"
235
  },
@@ -249,7 +253,7 @@
249
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
250
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
251
  "model_output.classification.iab_content.tier2.label": "Computing",
252
- "model_output.classification.iab_content.tier3.label": "Internet"
253
  },
254
  "expected": {
255
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -270,7 +274,7 @@
270
  "path": "model_output.classification.iab_content.tier2.label"
271
  },
272
  {
273
- "actual": "Internet",
274
  "expected": "Business I.T.",
275
  "path": "model_output.classification.iab_content.tier3.label"
276
  }
@@ -282,7 +286,7 @@
282
  },
283
  {
284
  "actual": {
285
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
286
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
287
  "model_output.classification.iab_content.tier2.label": "Dining Out"
288
  },
@@ -292,23 +296,17 @@
292
  "model_output.classification.iab_content.tier2.label": "Dining Out"
293
  },
294
  "id": "restaurant-booking-maps-to-dining-out",
295
- "mismatches": [
296
- {
297
- "actual": "nearest_equivalent",
298
- "expected": "exact",
299
- "path": "model_output.classification.iab_content.mapping_mode"
300
- }
301
- ],
302
  "notes": "Generic dining requests should not inherit the repo's business default.",
303
- "pass": false,
304
  "status": "must_fix",
305
  "text": "Book a table for 2 tonight"
306
  },
307
  {
308
  "actual": {
309
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
310
- "model_output.classification.iab_content.tier1.label": "Business and Finance",
311
- "model_output.classification.iab_content.tier2.label": "Business",
312
  "model_output.classification.iab_content.tier3.label": null
313
  },
314
  "expected": {
@@ -320,12 +318,12 @@
320
  "id": "trial-signup-maps-to-software",
321
  "mismatches": [
322
  {
323
- "actual": "Business and Finance",
324
  "expected": "Technology & Computing",
325
  "path": "model_output.classification.iab_content.tier1.label"
326
  },
327
  {
328
- "actual": "Business",
329
  "expected": "Computing",
330
  "path": "model_output.classification.iab_content.tier2.label"
331
  },
@@ -342,10 +340,10 @@
342
  },
343
  {
344
  "actual": {
345
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
346
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
347
- "model_output.classification.iab_content.tier2.label": "Computing",
348
- "model_output.classification.iab_content.tier3.label": "Software and Applications",
349
  "model_output.classification.iab_content.tier4.label": null
350
  },
351
  "expected": {
@@ -358,7 +356,17 @@
358
  "id": "communication-software-maps-to-tier4",
359
  "mismatches": [
360
  {
361
- "actual": "Software and Applications",
 
 
 
 
 
 
 
 
 
 
362
  "expected": "Computer Software and Applications",
363
  "path": "model_output.classification.iab_content.tier3.label"
364
  },
@@ -366,11 +374,6 @@
366
  "actual": null,
367
  "expected": "Communication",
368
  "path": "model_output.classification.iab_content.tier4.label"
369
- },
370
- {
371
- "actual": "nearest_equivalent",
372
- "expected": "exact",
373
- "path": "model_output.classification.iab_content.mapping_mode"
374
  }
375
  ],
376
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
@@ -380,9 +383,9 @@
380
  },
381
  {
382
  "actual": {
383
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
384
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
385
- "model_output.classification.iab_content.tier2.label": null
386
  },
387
  "expected": {
388
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -390,20 +393,9 @@
390
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
391
  },
392
  "id": "vodka-query-maps-to-alcoholic-beverages",
393
- "mismatches": [
394
- {
395
- "actual": null,
396
- "expected": "Alcoholic Beverages",
397
- "path": "model_output.classification.iab_content.tier2.label"
398
- },
399
- {
400
- "actual": "nearest_equivalent",
401
- "expected": "exact",
402
- "path": "model_output.classification.iab_content.mapping_mode"
403
- }
404
- ],
405
  "notes": "Food and beverage prompts should not fall through to the business default.",
406
- "pass": false,
407
  "status": "must_fix",
408
  "text": "what is best vodka drink should i try"
409
  }
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 9,
5
+ "passed": 3,
6
  "total": 12
7
  }
8
  },
9
+ "cases_path": "/content/agentic-intent-classifier/examples/iab_mapping_cases.json",
10
  "count": 12,
11
+ "failed": 9,
12
+ "passed": 3,
13
  "results": [
14
  {
15
  "actual": {
 
42
  },
43
  {
44
  "actual": {
45
+ "model_output.classification.iab_content.mapping_mode": "exact",
46
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
47
  "model_output.classification.iab_content.tier2.label": "Computing",
48
  "model_output.classification.iab_content.tier3.label": "Laptops"
 
54
  "model_output.classification.iab_content.tier3.label": "Laptops"
55
  },
56
  "id": "laptop-buying-maps-to-laptops",
57
+ "mismatches": [],
 
 
 
 
 
 
58
  "notes": "Laptop shopping should resolve into the laptops branch, not business sales.",
59
+ "pass": true,
60
  "status": "must_fix",
61
  "text": "Which laptop to buy in 2026"
62
  },
 
64
  "actual": {
65
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
66
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
67
+ "model_output.classification.iab_content.tier2.label": null,
68
  "model_output.classification.iab_content.tier3.label": null
69
  },
70
  "expected": {
 
75
  },
76
  "id": "labtop-buying-maps-to-laptops",
77
  "mismatches": [
78
+ {
79
+ "actual": null,
80
+ "expected": "Computing",
81
+ "path": "model_output.classification.iab_content.tier2.label"
82
+ },
83
  {
84
  "actual": null,
85
  "expected": "Laptops",
 
101
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
102
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
103
  "model_output.classification.iab_content.tier2.label": "Computing",
104
+ "model_output.classification.iab_content.tier3.label": null
105
  },
106
  "expected": {
107
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
122
  "path": "model_output.classification.iab_content.tier2.label"
123
  },
124
  {
125
+ "actual": null,
126
  "expected": "Sales",
127
  "path": "model_output.classification.iab_content.tier3.label"
128
  }
 
136
  "actual": {
137
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
138
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
139
+ "model_output.classification.iab_content.tier2.label": null,
140
+ "model_output.classification.iab_content.tier3.label": null
141
  },
142
  "expected": {
143
  "model_output.classification.iab_content.mapping_mode": "exact",
 
153
  "path": "model_output.classification.iab_content.tier1.label"
154
  },
155
  {
156
+ "actual": null,
157
  "expected": "Business",
158
  "path": "model_output.classification.iab_content.tier2.label"
159
  },
160
  {
161
+ "actual": null,
162
  "expected": "Sales",
163
  "path": "model_output.classification.iab_content.tier3.label"
164
  },
 
176
  {
177
  "actual": {
178
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
179
+ "model_output.classification.iab_content.tier1.label": "Careers",
180
+ "model_output.classification.iab_content.tier2.label": null,
181
  "model_output.classification.iab_content.tier3.label": null
182
  },
183
  "expected": {
 
189
  "id": "marketing-tools-map-to-marketing",
190
  "mismatches": [
191
  {
192
+ "actual": "Careers",
193
  "expected": "Business and Finance",
194
  "path": "model_output.classification.iab_content.tier1.label"
195
  },
196
  {
197
+ "actual": null,
198
  "expected": "Business",
199
  "path": "model_output.classification.iab_content.tier2.label"
200
  },
 
217
  {
218
  "actual": {
219
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
220
+ "model_output.classification.iab_content.tier1.label": "Sports",
221
+ "model_output.classification.iab_content.tier2.label": null
222
  },
223
  "expected": {
224
  "model_output.classification.iab_content.mapping_mode": "exact",
 
228
  "id": "ml-explanation-maps-to-ai",
229
  "mismatches": [
230
  {
231
+ "actual": "Sports",
232
+ "expected": "Technology & Computing",
233
+ "path": "model_output.classification.iab_content.tier1.label"
234
+ },
235
+ {
236
+ "actual": null,
237
  "expected": "Artificial Intelligence",
238
  "path": "model_output.classification.iab_content.tier2.label"
239
  },
 
253
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
254
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
255
  "model_output.classification.iab_content.tier2.label": "Computing",
256
+ "model_output.classification.iab_content.tier3.label": null
257
  },
258
  "expected": {
259
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
274
  "path": "model_output.classification.iab_content.tier2.label"
275
  },
276
  {
277
+ "actual": null,
278
  "expected": "Business I.T.",
279
  "path": "model_output.classification.iab_content.tier3.label"
280
  }
 
286
  },
287
  {
288
  "actual": {
289
+ "model_output.classification.iab_content.mapping_mode": "exact",
290
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
291
  "model_output.classification.iab_content.tier2.label": "Dining Out"
292
  },
 
296
  "model_output.classification.iab_content.tier2.label": "Dining Out"
297
  },
298
  "id": "restaurant-booking-maps-to-dining-out",
299
+ "mismatches": [],
 
 
 
 
 
 
300
  "notes": "Generic dining requests should not inherit the repo's business default.",
301
+ "pass": true,
302
  "status": "must_fix",
303
  "text": "Book a table for 2 tonight"
304
  },
305
  {
306
  "actual": {
307
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
308
+ "model_output.classification.iab_content.tier1.label": "Sports",
309
+ "model_output.classification.iab_content.tier2.label": null,
310
  "model_output.classification.iab_content.tier3.label": null
311
  },
312
  "expected": {
 
318
  "id": "trial-signup-maps-to-software",
319
  "mismatches": [
320
  {
321
+ "actual": "Sports",
322
  "expected": "Technology & Computing",
323
  "path": "model_output.classification.iab_content.tier1.label"
324
  },
325
  {
326
+ "actual": null,
327
  "expected": "Computing",
328
  "path": "model_output.classification.iab_content.tier2.label"
329
  },
 
340
  },
341
  {
342
  "actual": {
343
+ "model_output.classification.iab_content.mapping_mode": "exact",
344
+ "model_output.classification.iab_content.tier1.label": "Careers",
345
+ "model_output.classification.iab_content.tier2.label": "Remote Working",
346
+ "model_output.classification.iab_content.tier3.label": null,
347
  "model_output.classification.iab_content.tier4.label": null
348
  },
349
  "expected": {
 
356
  "id": "communication-software-maps-to-tier4",
357
  "mismatches": [
358
  {
359
+ "actual": "Careers",
360
+ "expected": "Technology & Computing",
361
+ "path": "model_output.classification.iab_content.tier1.label"
362
+ },
363
+ {
364
+ "actual": "Remote Working",
365
+ "expected": "Computing",
366
+ "path": "model_output.classification.iab_content.tier2.label"
367
+ },
368
+ {
369
+ "actual": null,
370
  "expected": "Computer Software and Applications",
371
  "path": "model_output.classification.iab_content.tier3.label"
372
  },
 
374
  "actual": null,
375
  "expected": "Communication",
376
  "path": "model_output.classification.iab_content.tier4.label"
 
 
 
 
 
377
  }
378
  ],
379
  "notes": "Full taxonomy support should preserve the tier4 communication branch.",
 
383
  },
384
  {
385
  "actual": {
386
+ "model_output.classification.iab_content.mapping_mode": "exact",
387
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
388
+ "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
389
  },
390
  "expected": {
391
  "model_output.classification.iab_content.mapping_mode": "exact",
 
393
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
394
  },
395
  "id": "vodka-query-maps-to-alcoholic-beverages",
396
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
397
  "notes": "Food and beverage prompts should not fall through to the business default.",
398
+ "pass": true,
399
  "status": "must_fix",
400
  "text": "what is best vodka drink should i try"
401
  }
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,13,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
4
- comparison,2,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,1,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,0,0,0,14,1,0,0,0,0,0,0,0,0,0,0,0,0
7
- provider_selection,0,0,1,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0
8
- signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,0,14,0,0,0,0,1,0,0,0,0,0
10
- booking,0,0,0,0,0,0,1,0,13,0,0,1,0,0,0,0,0,0
11
- download,0,0,0,0,0,0,0,0,0,14,0,1,0,0,0,0,0,0
12
- contact_sales,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0
13
- task_execution,0,0,0,0,0,0,1,0,0,0,0,17,0,0,0,0,0,0
14
- onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,1,16,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,1
16
- account_help,0,0,0,0,0,0,2,0,0,0,0,0,0,0,12,1,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0
18
- follow_up,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,13,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4
+ comparison,1,0,11,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0
5
  evaluation,1,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,0,1,1,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0
7
+ provider_selection,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0
8
+ signup,0,0,0,0,0,0,14,0,0,1,0,0,0,0,1,0,0,0
9
+ purchase,0,0,0,0,0,0,1,13,0,0,0,1,0,0,0,0,0,0
10
+ booking,0,0,0,0,0,0,5,0,8,0,1,1,0,0,0,0,0,0
11
+ download,0,0,0,0,0,0,0,0,0,13,0,1,0,0,1,0,0,0
12
+ contact_sales,0,0,0,1,0,0,0,0,0,0,14,0,0,0,0,0,0,0
13
+ task_execution,0,0,0,0,0,0,0,0,0,0,0,17,1,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,1,15,1,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0
16
+ account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,12,2,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,12,0,0
18
+ follow_up,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,13,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json CHANGED
@@ -1,159 +1,159 @@
1
  {
2
- "accepted_accuracy": 0.9386,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.9386,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
- "accepted_accuracy": 0.9565,
11
  "accepted_coverage": 1.0,
12
- "accuracy": 0.9565,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
- "macro_f1": 0.9579
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.8901,
19
- "accepted_coverage": 1.0,
20
- "accuracy": 0.8901,
21
  "count": 91,
22
- "fallback_rate": 0.0,
23
- "macro_f1": 0.8913
24
  },
25
  "medium": {
26
- "accepted_accuracy": 0.9681,
27
- "accepted_coverage": 1.0,
28
- "accuracy": 0.9681,
29
  "count": 94,
30
- "fallback_rate": 0.0,
31
- "macro_f1": 0.9671
32
  }
33
  },
34
- "fallback_rate": 0.0,
35
  "head": "intent_subtype",
36
- "macro_f1": 0.9401,
37
  "per_class_metrics": {
38
  "account_help": {
39
- "f1-score": 0.8888888888888888,
40
- "precision": 1.0,
41
  "recall": 0.8,
42
  "support": 15.0
43
  },
44
- "accuracy": 0.9386281588447654,
45
  "billing_help": {
46
- "f1-score": 0.967741935483871,
47
- "precision": 0.9375,
48
- "recall": 1.0,
49
  "support": 15.0
50
  },
51
  "booking": {
52
- "f1-score": 0.9285714285714286,
53
  "precision": 1.0,
54
- "recall": 0.8666666666666667,
55
  "support": 15.0
56
  },
57
  "comparison": {
58
- "f1-score": 0.896551724137931,
59
- "precision": 0.9285714285714286,
60
- "recall": 0.8666666666666667,
61
  "support": 15.0
62
  },
63
  "contact_sales": {
64
- "f1-score": 1.0,
65
- "precision": 1.0,
66
- "recall": 1.0,
67
- "support": 15.0
68
- },
69
- "deal_seeking": {
70
  "f1-score": 0.9333333333333333,
71
  "precision": 0.9333333333333333,
72
  "recall": 0.9333333333333333,
73
  "support": 15.0
74
  },
75
- "download": {
76
- "f1-score": 0.9655172413793104,
77
  "precision": 1.0,
78
- "recall": 0.9333333333333333,
 
 
 
 
 
 
79
  "support": 15.0
80
  },
81
  "education": {
82
- "f1-score": 0.9090909090909091,
83
- "precision": 0.8333333333333334,
84
  "recall": 1.0,
85
  "support": 15.0
86
  },
87
  "emotional_reflection": {
88
- "f1-score": 0.967741935483871,
89
- "precision": 0.9375,
90
  "recall": 1.0,
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
- "f1-score": 0.9655172413793104,
95
- "precision": 1.0,
96
  "recall": 0.9333333333333333,
97
  "support": 15.0
98
  },
99
  "follow_up": {
100
- "f1-score": 1.0,
101
  "precision": 1.0,
102
- "recall": 1.0,
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
- "f1-score": 0.9401067100194944,
107
- "precision": 0.9476910208527856,
108
- "recall": 0.9383215323166303,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
- "f1-score": 0.9411764705882353,
113
- "precision": 0.9411764705882353,
114
- "recall": 0.9411764705882353,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
- "f1-score": 0.9285714285714286,
119
- "precision": 1.0,
120
  "recall": 0.8666666666666667,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
124
- "f1-score": 0.9375,
125
- "precision": 0.9375,
126
- "recall": 0.9375,
127
  "support": 16.0
128
  },
129
  "purchase": {
130
- "f1-score": 0.9655172413793104,
131
  "precision": 1.0,
132
- "recall": 0.9333333333333333,
133
  "support": 15.0
134
  },
135
  "signup": {
136
- "f1-score": 0.8888888888888888,
137
- "precision": 0.8,
138
- "recall": 1.0,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
- "f1-score": 0.8717948717948718,
143
- "precision": 0.8095238095238095,
144
  "recall": 0.9444444444444444,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
- "f1-score": 0.9655172413793104,
149
- "precision": 1.0,
150
- "recall": 0.9333333333333333,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
- "f1-score": 0.9391802821325396,
155
- "precision": 0.9455776173285199,
156
- "recall": 0.9386281588447654,
157
  "support": 277.0
158
  }
159
  },
 
1
  {
2
+ "accepted_accuracy": 0.8967,
3
+ "accepted_coverage": 0.9783,
4
+ "accuracy": 0.8773,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/subtype_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
+ "accepted_accuracy": 0.913,
11
  "accepted_coverage": 1.0,
12
+ "accuracy": 0.913,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
+ "macro_f1": 0.9111
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.814,
19
+ "accepted_coverage": 0.9451,
20
+ "accuracy": 0.7692,
21
  "count": 91,
22
+ "fallback_rate": 0.0549,
23
+ "macro_f1": 0.7704
24
  },
25
  "medium": {
26
+ "accepted_accuracy": 0.957,
27
+ "accepted_coverage": 0.9894,
28
+ "accuracy": 0.9468,
29
  "count": 94,
30
+ "fallback_rate": 0.0106,
31
+ "macro_f1": 0.9453
32
  }
33
  },
34
+ "fallback_rate": 0.0217,
35
  "head": "intent_subtype",
36
+ "macro_f1": 0.8767,
37
  "per_class_metrics": {
38
  "account_help": {
39
+ "f1-score": 0.7741935483870968,
40
+ "precision": 0.75,
41
  "recall": 0.8,
42
  "support": 15.0
43
  },
44
+ "accuracy": 0.8772563176895307,
45
  "billing_help": {
46
+ "f1-score": 0.8,
47
+ "precision": 0.8,
48
+ "recall": 0.8,
49
  "support": 15.0
50
  },
51
  "booking": {
52
+ "f1-score": 0.6956521739130435,
53
  "precision": 1.0,
54
+ "recall": 0.5333333333333333,
55
  "support": 15.0
56
  },
57
  "comparison": {
58
+ "f1-score": 0.8148148148148148,
59
+ "precision": 0.9166666666666666,
60
+ "recall": 0.7333333333333333,
61
  "support": 15.0
62
  },
63
  "contact_sales": {
 
 
 
 
 
 
64
  "f1-score": 0.9333333333333333,
65
  "precision": 0.9333333333333333,
66
  "recall": 0.9333333333333333,
67
  "support": 15.0
68
  },
69
+ "deal_seeking": {
70
+ "f1-score": 0.9285714285714286,
71
  "precision": 1.0,
72
+ "recall": 0.8666666666666667,
73
+ "support": 15.0
74
+ },
75
+ "download": {
76
+ "f1-score": 0.896551724137931,
77
+ "precision": 0.9285714285714286,
78
+ "recall": 0.8666666666666667,
79
  "support": 15.0
80
  },
81
  "education": {
82
+ "f1-score": 0.9375,
83
+ "precision": 0.8823529411764706,
84
  "recall": 1.0,
85
  "support": 15.0
86
  },
87
  "emotional_reflection": {
88
+ "f1-score": 1.0,
89
+ "precision": 1.0,
90
  "recall": 1.0,
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
+ "f1-score": 0.8484848484848485,
95
+ "precision": 0.7777777777777778,
96
  "recall": 0.9333333333333333,
97
  "support": 15.0
98
  },
99
  "follow_up": {
100
+ "f1-score": 0.9285714285714286,
101
  "precision": 1.0,
102
+ "recall": 0.8666666666666667,
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
+ "f1-score": 0.8767381318695205,
107
+ "precision": 0.893520983060569,
108
+ "recall": 0.8760257806826435,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
+ "f1-score": 0.9090909090909091,
113
+ "precision": 0.9375,
114
+ "recall": 0.8823529411764706,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
+ "f1-score": 0.896551724137931,
119
+ "precision": 0.9285714285714286,
120
  "recall": 0.8666666666666667,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
124
+ "f1-score": 1.0,
125
+ "precision": 1.0,
126
+ "recall": 1.0,
127
  "support": 16.0
128
  },
129
  "purchase": {
130
+ "f1-score": 0.9285714285714286,
131
  "precision": 1.0,
132
+ "recall": 0.8666666666666667,
133
  "support": 15.0
134
  },
135
  "signup": {
136
+ "f1-score": 0.7777777777777778,
137
+ "precision": 0.7,
138
+ "recall": 0.875,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
+ "f1-score": 0.8292682926829268,
143
+ "precision": 0.7391304347826086,
144
  "recall": 0.9444444444444444,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
+ "f1-score": 0.8823529411764706,
149
+ "precision": 0.7894736842105263,
150
+ "recall": 1.0,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
+ "f1-score": 0.8765453432446891,
155
+ "precision": 0.8918521903635431,
156
+ "recall": 0.8772563176895307,
157
  "support": 277.0
158
  }
159
  },
artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
- comparison,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -15,5 +15,5 @@ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
- follow_up,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
+ follow_up,1,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_extended_cases_report.json CHANGED
@@ -2,16 +2,16 @@
2
  "accepted_accuracy": 0.8491,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8491,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.8146,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.6666666666666666,
14
- "precision": 0.6666666666666666,
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
@@ -29,9 +29,9 @@
29
  "support": 0.0
30
  },
31
  "comparison": {
32
- "f1-score": 0.6666666666666666,
33
  "precision": 1.0,
34
- "recall": 0.5,
35
  "support": 2.0
36
  },
37
  "contact_sales": {
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.8181818181818182,
45
- "precision": 0.6923076923076923,
46
  "recall": 1.0,
47
  "support": 9.0
48
  },
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 0.9333333333333333,
57
- "precision": 0.875,
58
  "recall": 1.0,
59
  "support": 7.0
60
  },
@@ -65,9 +65,9 @@
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.5,
69
- "precision": 1.0,
70
- "recall": 0.3333333333333333,
71
  "support": 3.0
72
  },
73
  "follow_up": {
@@ -77,9 +77,9 @@
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.4978114478114478,
81
- "precision": 0.531517094017094,
82
- "recall": 0.5092592592592592,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
@@ -89,8 +89,8 @@
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 1.0,
93
- "precision": 1.0,
94
  "recall": 1.0,
95
  "support": 6.0
96
  },
@@ -113,8 +113,8 @@
113
  "support": 0.0
114
  },
115
  "task_execution": {
116
- "f1-score": 1.0,
117
- "precision": 1.0,
118
  "recall": 1.0,
119
  "support": 1.0
120
  },
@@ -125,8 +125,8 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8404230989136648,
129
- "precision": 0.887215771649734,
130
  "recall": 0.8490566037735849,
131
  "support": 53.0
132
  }
 
2
  "accepted_accuracy": 0.8491,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8491,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.7764,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.8,
14
+ "precision": 1.0,
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
 
29
  "support": 0.0
30
  },
31
  "comparison": {
32
+ "f1-score": 1.0,
33
  "precision": 1.0,
34
+ "recall": 1.0,
35
  "support": 2.0
36
  },
37
  "contact_sales": {
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.9,
45
+ "precision": 0.8181818181818182,
46
  "recall": 1.0,
47
  "support": 9.0
48
  },
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.875,
57
+ "precision": 0.7777777777777778,
58
  "recall": 1.0,
59
  "support": 7.0
60
  },
 
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.0,
69
+ "precision": 0.0,
70
+ "recall": 0.0,
71
  "support": 3.0
72
  },
73
  "follow_up": {
 
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.474472286972287,
81
+ "precision": 0.46035754369087706,
82
+ "recall": 0.5185185185185186,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.9230769230769231,
93
+ "precision": 0.8571428571428571,
94
  "recall": 1.0,
95
  "support": 6.0
96
  },
 
113
  "support": 0.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.6666666666666666,
117
+ "precision": 0.5,
118
  "recall": 1.0,
119
  "support": 1.0
120
  },
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.823438668249989,
129
+ "precision": 0.8324076342944268,
130
  "recall": 0.8490566037735849,
131
  "support": 53.0
132
  }
artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv CHANGED
@@ -2,8 +2,8 @@
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,0,2,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0
 
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,2,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_hard_cases_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.9468,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9468,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv",
6
  "count": 94,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.9191,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
@@ -15,7 +15,7 @@
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.9468085106382979,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
@@ -41,9 +41,9 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 1.0,
45
  "precision": 1.0,
46
- "recall": 1.0,
47
  "support": 3.0
48
  },
49
  "download": {
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 1.0,
57
- "precision": 1.0,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
@@ -65,9 +65,9 @@
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.7272727272727273,
69
- "precision": 0.8,
70
- "recall": 0.6666666666666666,
71
  "support": 6.0
72
  },
73
  "follow_up": {
@@ -77,9 +77,9 @@
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.7659288023895194,
81
- "precision": 0.7648148148148147,
82
- "recall": 0.786111111111111,
83
  "support": 94.0
84
  },
85
  "onboarding_setup": {
@@ -89,14 +89,14 @@
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.8888888888888888,
93
- "precision": 0.8,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
98
- "f1-score": 0.9473684210526315,
99
- "precision": 1.0,
100
  "recall": 0.9,
101
  "support": 10.0
102
  },
@@ -125,9 +125,9 @@
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.9478016938458051,
129
- "precision": 0.9578014184397163,
130
- "recall": 0.9468085106382979,
131
  "support": 94.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8936,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8936,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv",
6
  "count": 94,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.846,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
 
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.8936170212765957,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.8,
45
  "precision": 1.0,
46
+ "recall": 0.6666666666666666,
47
  "support": 3.0
48
  },
49
  "download": {
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9666666666666667,
57
+ "precision": 0.9354838709677419,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
 
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.0,
69
+ "precision": 0.0,
70
+ "recall": 0.0,
71
  "support": 6.0
72
  },
73
  "follow_up": {
 
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.7049919484702094,
81
+ "precision": 0.7038231780167263,
82
+ "recall": 0.7305555555555556,
83
  "support": 94.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.8,
93
+ "precision": 0.6666666666666666,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
98
+ "f1-score": 0.9,
99
+ "precision": 0.9,
100
  "recall": 0.9,
101
  "support": 10.0
102
  },
 
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8721091581868641,
129
+ "precision": 0.8648478609013955,
130
+ "recall": 0.8936170212765957,
131
  "support": 94.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,7,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
@@ -12,8 +12,8 @@ download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
- follow_up,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
 
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
+ follow_up,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_subtype_test_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.9,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.863,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 1.0,
@@ -15,7 +15,7 @@
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.9,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
@@ -29,8 +29,8 @@
29
  "support": 3.0
30
  },
31
  "comparison": {
32
- "f1-score": 1.0,
33
- "precision": 1.0,
34
  "recall": 1.0,
35
  "support": 3.0
36
  },
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.6666666666666666,
45
- "precision": 0.5,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 1.0,
57
- "precision": 1.0,
58
  "recall": 1.0,
59
  "support": 14.0
60
  },
@@ -65,8 +65,8 @@
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.4,
69
- "precision": 0.3333333333333333,
70
  "recall": 0.5,
71
  "support": 2.0
72
  },
@@ -77,21 +77,21 @@
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.6712084293224644,
81
- "precision": 0.6671296296296296,
82
- "recall": 0.6908670033670034,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 0.8888888888888888,
87
- "precision": 0.8,
88
  "recall": 1.0,
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.875,
93
- "precision": 0.875,
94
- "recall": 0.875,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
@@ -113,21 +113,21 @@
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 1.0,
117
- "precision": 1.0,
118
  "recall": 1.0,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 0.5,
123
- "precision": 0.5,
124
  "recall": 0.5,
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.9058084605453025,
129
- "precision": 0.9266666666666667,
130
- "recall": 0.9,
131
  "support": 70.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.9143,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9143,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8855,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 1.0,
 
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.9142857142857143,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
 
29
  "support": 3.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.8571428571428571,
33
+ "precision": 0.75,
34
  "recall": 1.0,
35
  "support": 3.0
36
  },
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.8,
45
+ "precision": 0.6666666666666666,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9655172413793104,
57
+ "precision": 0.9333333333333333,
58
  "recall": 1.0,
59
  "support": 14.0
60
  },
 
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.5,
69
+ "precision": 0.5,
70
  "recall": 0.5,
71
  "support": 2.0
72
  },
 
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.6887592108100274,
81
+ "precision": 0.7,
82
+ "recall": 0.6978114478114478,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 1.0,
87
+ "precision": 1.0,
88
  "recall": 1.0,
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 1.0,
93
+ "precision": 1.0,
94
+ "recall": 1.0,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
 
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8571428571428571,
117
+ "precision": 0.75,
118
  "recall": 1.0,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 0.6666666666666666,
123
+ "precision": 1.0,
124
  "recall": 0.5,
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.9126080539458813,
129
+ "precision": 0.9307142857142858,
130
+ "recall": 0.9142857142857143,
131
  "support": 70.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,31,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,1,2,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0
7
- provider_selection,0,0,0,0,0,24,0,0,0,0,0,0,0,0,0,0,1,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0
10
- booking,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
- contact_sales,0,0,0,0,0,0,2,0,0,0,7,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0
14
- onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0
18
- follow_up,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,32,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4
  comparison,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,3,6,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,0,0,0,0,10,1,0,0,0,0,0,0,0,0,0,0,0,0
7
+ provider_selection,0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,2,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0
10
+ booking,0,0,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
+ contact_sales,0,0,0,0,0,0,2,0,1,0,6,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,1,1,0,0,15,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,10,1,0,2,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,4,0,0
18
+ follow_up,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,31,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
artifacts/evaluation/latest/intent_subtype_train_report.json CHANGED
@@ -1,31 +1,31 @@
1
  {
2
- "accepted_accuracy": 0.9649,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9649,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.9649,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.9333333333333333,
14
- "precision": 0.875,
15
  "recall": 1.0,
16
  "support": 7.0
17
  },
18
- "accuracy": 0.9648562300319489,
19
  "billing_help": {
20
- "f1-score": 1.0,
21
  "precision": 1.0,
22
- "recall": 1.0,
23
  "support": 6.0
24
  },
25
  "booking": {
26
- "f1-score": 1.0,
27
- "precision": 1.0,
28
- "recall": 1.0,
29
  "support": 5.0
30
  },
31
  "comparison": {
@@ -35,26 +35,26 @@
35
  "support": 15.0
36
  },
37
  "contact_sales": {
38
- "f1-score": 0.875,
39
  "precision": 1.0,
40
- "recall": 0.7777777777777778,
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 1.0,
45
  "precision": 1.0,
46
- "recall": 1.0,
47
  "support": 11.0
48
  },
49
  "download": {
50
- "f1-score": 1.0,
51
- "precision": 1.0,
52
  "recall": 1.0,
53
  "support": 8.0
54
  },
55
  "education": {
56
- "f1-score": 0.9904761904761905,
57
- "precision": 0.9811320754716981,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
@@ -65,39 +65,39 @@
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.9032258064516129,
69
  "precision": 1.0,
70
- "recall": 0.8235294117647058,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.927536231884058,
75
- "precision": 0.9696969696969697,
76
- "recall": 0.8888888888888888,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.9649442256020961,
81
- "precision": 0.9689347311202658,
82
- "recall": 0.9651818334171275,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 1.0,
87
  "precision": 1.0,
88
- "recall": 1.0,
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.96875,
93
- "precision": 0.9393939393939394,
94
- "recall": 1.0,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
98
- "f1-score": 0.9795918367346939,
99
- "precision": 1.0,
100
- "recall": 0.96,
101
  "support": 25.0
102
  },
103
  "purchase": {
@@ -107,27 +107,27 @@
107
  "support": 6.0
108
  },
109
  "signup": {
110
- "f1-score": 0.9411764705882353,
111
- "precision": 0.8888888888888888,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.926829268292683,
117
- "precision": 0.8636363636363636,
118
  "recall": 1.0,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 0.9230769230769231,
123
- "precision": 0.9230769230769231,
124
- "recall": 0.9230769230769231,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.9643733669039578,
129
- "precision": 0.967429661617075,
130
- "recall": 0.9648562300319489,
131
  "support": 313.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.9042,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9042,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/subtype/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8789,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.875,
14
+ "precision": 0.7777777777777778,
15
  "recall": 1.0,
16
  "support": 7.0
17
  },
18
+ "accuracy": 0.9041533546325878,
19
  "billing_help": {
20
+ "f1-score": 0.8,
21
  "precision": 1.0,
22
+ "recall": 0.6666666666666666,
23
  "support": 6.0
24
  },
25
  "booking": {
26
+ "f1-score": 0.6,
27
+ "precision": 0.6,
28
+ "recall": 0.6,
29
  "support": 5.0
30
  },
31
  "comparison": {
 
35
  "support": 15.0
36
  },
37
  "contact_sales": {
38
+ "f1-score": 0.8,
39
  "precision": 1.0,
40
+ "recall": 0.6666666666666666,
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.9523809523809523,
45
  "precision": 1.0,
46
+ "recall": 0.9090909090909091,
47
  "support": 11.0
48
  },
49
  "download": {
50
+ "f1-score": 0.9411764705882353,
51
+ "precision": 0.8888888888888888,
52
  "recall": 1.0,
53
  "support": 8.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9719626168224299,
57
+ "precision": 0.9454545454545454,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
 
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.64,
69
  "precision": 1.0,
70
+ "recall": 0.47058823529411764,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.8611111111111112,
75
+ "precision": 0.8611111111111112,
76
+ "recall": 0.8611111111111112,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.8788540112011829,
81
+ "precision": 0.9092031425364758,
82
+ "recall": 0.8729694019289211,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 0.9375,
87
  "precision": 1.0,
88
+ "recall": 0.8823529411764706,
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.8955223880597015,
93
+ "precision": 0.8333333333333334,
94
+ "recall": 0.967741935483871,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
98
+ "f1-score": 0.9387755102040817,
99
+ "precision": 0.9583333333333334,
100
+ "recall": 0.92,
101
  "support": 25.0
102
  },
103
  "purchase": {
 
107
  "support": 6.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.8888888888888888,
111
+ "precision": 0.8,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8837209302325582,
117
+ "precision": 0.7916666666666666,
118
  "recall": 1.0,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 0.8333333333333334,
123
+ "precision": 0.9090909090909091,
124
+ "recall": 0.7692307692307693,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8995750585641842,
129
+ "precision": 0.9142834091715881,
130
+ "recall": 0.9041533546325878,
131
  "support": 313.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv CHANGED
@@ -1,12 +1,12 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4
- comparison,0,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -15,5 +15,5 @@ onboarding_setup,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
18
- follow_up,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,10,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
18
+ follow_up,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,9,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_subtype_val_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.875,
3
- "accepted_coverage": 1.0,
4
  "accuracy": 0.875,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/subtype/val.jsonl",
8
- "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.725,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
@@ -29,9 +29,9 @@
29
  "support": 3.0
30
  },
31
  "comparison": {
32
- "f1-score": 0.4,
33
  "precision": 1.0,
34
- "recall": 0.25,
35
  "support": 4.0
36
  },
37
  "contact_sales": {
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.5714285714285714,
45
- "precision": 0.4,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
@@ -71,20 +71,20 @@
71
  "support": 2.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.9523809523809523,
75
  "precision": 1.0,
76
- "recall": 0.9090909090909091,
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.6444203944203944,
81
- "precision": 0.6542087542087542,
82
- "recall": 0.687121212121212,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 0.8,
87
- "precision": 0.8,
88
  "recall": 0.8,
89
  "support": 5.0
90
  },
@@ -113,8 +113,8 @@
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 1.0,
117
- "precision": 1.0,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
@@ -125,8 +125,8 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8644047619047619,
129
- "precision": 0.8891666666666665,
130
  "recall": 0.875,
131
  "support": 80.0
132
  }
 
1
  {
2
+ "accepted_accuracy": 0.8734,
3
+ "accepted_coverage": 0.9875,
4
  "accuracy": 0.875,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/subtype/val.jsonl",
8
+ "fallback_rate": 0.0125,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.7429,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
 
29
  "support": 3.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.6666666666666666,
33
  "precision": 1.0,
34
+ "recall": 0.5,
35
  "support": 4.0
36
  },
37
  "contact_sales": {
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.6666666666666666,
45
+ "precision": 0.5,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
 
71
  "support": 2.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.9,
75
  "precision": 1.0,
76
+ "recall": 0.8181818181818182,
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.660381593714927,
81
+ "precision": 0.6597643097643098,
82
+ "recall": 0.695959595959596,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 0.8888888888888888,
87
+ "precision": 1.0,
88
  "recall": 0.8,
89
  "support": 5.0
90
  },
 
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8888888888888888,
117
+ "precision": 0.8,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8673611111111112,
129
+ "precision": 0.8841666666666667,
130
  "recall": 0.875,
131
  "support": 80.0
132
  }
artifacts/evaluation/latest/intent_type_difficulty_benchmark_report.json CHANGED
@@ -2,9 +2,9 @@
2
  "accepted_accuracy": 0.9867,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.9867,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 150,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/intent_type_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 1.0,
 
2
  "accepted_accuracy": 0.9867,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.9867,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 150,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/intent_type_benchmark.jsonl",
8
  "difficulty_breakdown": {
9
  "easy": {
10
  "accepted_accuracy": 1.0,
artifacts/evaluation/latest/intent_type_hard_cases_report.json CHANGED
@@ -2,9 +2,9 @@
2
  "accepted_accuracy": 1.0,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 1.0,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
 
2
  "accepted_accuracy": 1.0,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 1.0,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv CHANGED
@@ -1,7 +1,7 @@
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
- commercial,1,0,9,0,0,0,0,0,0,0
5
  transactional,0,0,0,8,0,0,0,0,0,0
6
  support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
 
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
+ commercial,0,0,10,0,0,0,0,0,0,0
5
  transactional,0,0,0,8,0,0,0,0,0,0
6
  support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
artifacts/evaluation/latest/intent_type_test_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9149,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9149,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv",
6
  "count": 47,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 0.9131,
11
  "per_class_metrics": {
12
- "accuracy": 0.9148936170212766,
13
  "ambiguous": {
14
  "f1-score": 0.875,
15
  "precision": 1.0,
@@ -23,9 +23,9 @@
23
  "support": 1.0
24
  },
25
  "commercial": {
26
- "f1-score": 0.9,
27
- "precision": 0.9,
28
- "recall": 0.9,
29
  "support": 10.0
30
  },
31
  "creative_generation": {
@@ -41,15 +41,15 @@
41
  "support": 1.0
42
  },
43
  "informational": {
44
- "f1-score": 0.8888888888888888,
45
- "precision": 0.8,
46
  "recall": 1.0,
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
- "f1-score": 0.9130555555555555,
51
- "precision": 0.9199999999999999,
52
- "recall": 0.9344444444444445,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
@@ -77,9 +77,9 @@
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 0.916016548463357,
81
- "precision": 0.9340425531914893,
82
- "recall": 0.9148936170212766,
83
  "support": 47.0
84
  }
85
  },
 
1
  {
2
+ "accepted_accuracy": 0.9362,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9362,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv",
6
  "count": 47,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
+ "macro_f1": 0.9235,
11
  "per_class_metrics": {
12
+ "accuracy": 0.9361702127659575,
13
  "ambiguous": {
14
  "f1-score": 0.875,
15
  "precision": 1.0,
 
23
  "support": 1.0
24
  },
25
  "commercial": {
26
+ "f1-score": 0.9523809523809523,
27
+ "precision": 0.9090909090909091,
28
+ "recall": 1.0,
29
  "support": 10.0
30
  },
31
  "creative_generation": {
 
41
  "support": 1.0
42
  },
43
  "informational": {
44
+ "f1-score": 0.9411764705882353,
45
+ "precision": 0.8888888888888888,
46
  "recall": 1.0,
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.9235224089635853,
51
+ "precision": 0.9297979797979797,
52
+ "recall": 0.9444444444444444,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
 
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.9360614458549377,
81
+ "precision": 0.9511068128089405,
82
+ "recall": 0.9361702127659575,
83
  "support": 47.0
84
  }
85
  },
artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv CHANGED
@@ -1,11 +1,11 @@
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,0,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
- commercial,1,0,11,0,0,0,0,0,0,0
5
  transactional,0,0,0,0,0,0,0,0,0,0
6
  support,0,0,0,0,0,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,0,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
- ambiguous,1,0,1,0,0,0,0,0,8,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
 
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,0,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
+ commercial,0,0,12,0,0,0,0,0,0,0
5
  transactional,0,0,0,0,0,0,0,0,0,0
6
  support,0,0,0,0,0,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,0,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
+ ambiguous,1,0,2,0,0,0,0,0,7,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
artifacts/evaluation/latest/intent_type_third_wave_cases_report.json CHANGED
@@ -2,18 +2,18 @@
2
  "accepted_accuracy": 0.8846,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8846,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv",
6
  "count": 26,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/third_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 0.8294,
11
  "per_class_metrics": {
12
  "accuracy": 0.8846153846153846,
13
  "ambiguous": {
14
- "f1-score": 0.8888888888888888,
15
  "precision": 1.0,
16
- "recall": 0.8,
17
  "support": 10.0
18
  },
19
  "chit_chat": {
@@ -23,9 +23,9 @@
23
  "support": 1.0
24
  },
25
  "commercial": {
26
- "f1-score": 0.9166666666666666,
27
- "precision": 0.9166666666666666,
28
- "recall": 0.9166666666666666,
29
  "support": 12.0
30
  },
31
  "creative_generation": {
@@ -47,9 +47,9 @@
47
  "support": 0.0
48
  },
49
  "macro avg": {
50
- "f1-score": 0.5805555555555555,
51
- "precision": 0.5916666666666666,
52
- "recall": 0.5716666666666667,
53
  "support": 26.0
54
  },
55
  "personal_reflection": {
@@ -77,8 +77,8 @@
77
  "support": 0.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 0.9188034188034189,
81
- "precision": 0.9615384615384616,
82
  "recall": 0.8846153846153846,
83
  "support": 26.0
84
  }
 
2
  "accepted_accuracy": 0.8846,
3
  "accepted_coverage": 1.0,
4
  "accuracy": 0.8846,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv",
6
  "count": 26,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/third_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
+ "macro_f1": 0.8209,
11
  "per_class_metrics": {
12
  "accuracy": 0.8846153846153846,
13
  "ambiguous": {
14
+ "f1-score": 0.8235294117647058,
15
  "precision": 1.0,
16
+ "recall": 0.7,
17
  "support": 10.0
18
  },
19
  "chit_chat": {
 
23
  "support": 1.0
24
  },
25
  "commercial": {
26
+ "f1-score": 0.9230769230769231,
27
+ "precision": 0.8571428571428571,
28
+ "recall": 1.0,
29
  "support": 12.0
30
  },
31
  "creative_generation": {
 
47
  "support": 0.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.5746606334841629,
51
+ "precision": 0.5857142857142857,
52
+ "recall": 0.5700000000000001,
53
  "support": 26.0
54
  },
55
  "personal_reflection": {
 
77
  "support": 0.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.8966237382526975,
81
+ "precision": 0.9340659340659341,
82
  "recall": 0.8846153846153846,
83
  "support": 26.0
84
  }
artifacts/evaluation/latest/intent_type_train_report.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "accepted_accuracy": 1.0,
3
- "accepted_coverage": 1.0,
4
  "accuracy": 1.0,
5
- "confusion_matrix_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv",
6
  "count": 183,
7
- "dataset_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/data/train.jsonl",
8
- "fallback_rate": 0.0,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
11
  "per_class_metrics": {
 
1
  {
2
  "accepted_accuracy": 1.0,
3
+ "accepted_coverage": 0.9945,
4
  "accuracy": 1.0,
5
+ "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv",
6
  "count": 183,
7
+ "dataset_path": "/content/agentic-intent-classifier/data/train.jsonl",
8
+ "fallback_rate": 0.0055,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
11
  "per_class_metrics": {
artifacts/evaluation/latest/intent_type_val_confusion_matrix.csv CHANGED
@@ -1,11 +1,11 @@
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
- commercial,0,0,10,0,0,0,0,0,0,0
5
  transactional,0,0,0,7,0,0,1,0,0,0
6
- support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
- ambiguous,0,0,0,0,0,0,0,0,9,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
 
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
+ commercial,0,1,9,0,0,0,0,0,0,0
5
  transactional,0,0,0,7,0,0,1,0,0,0
6
+ support,0,0,0,0,3,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
+ ambiguous,0,0,1,0,0,0,0,0,8,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1