manikumargouni commited on
Commit
37d98fb
·
verified ·
1 Parent(s): ba9485c

Upload folder using huggingface_hub

Browse files
Files changed (45) hide show
  1. COLAB_SETUP.md +10 -0
  2. HF_MODEL_CARD.md +63 -1
  3. README.md +462 -182
  4. artifacts/calibration/decision_phase.json +9 -9
  5. artifacts/calibration/iab_content.json +14 -14
  6. artifacts/calibration/intent_subtype.json +13 -13
  7. artifacts/calibration/intent_type.json +9 -9
  8. artifacts/evaluation/latest/combined_demo_benchmark.json +175 -159
  9. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv +3 -3
  10. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json +22 -22
  11. artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv +2 -2
  12. artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json +13 -13
  13. artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv +1 -1
  14. artifacts/evaluation/latest/decision_phase_test_report.json +14 -14
  15. artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv +3 -3
  16. artifacts/evaluation/latest/decision_phase_train_report.json +18 -18
  17. artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv +2 -2
  18. artifacts/evaluation/latest/decision_phase_val_report.json +14 -14
  19. artifacts/evaluation/latest/iab_behavior_lock_regression.json +53 -23
  20. artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json +52 -52
  21. artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json +58 -58
  22. artifacts/evaluation/latest/iab_content_extended_cases_report.json +20 -20
  23. artifacts/evaluation/latest/iab_content_hard_cases_report.json +7 -7
  24. artifacts/evaluation/latest/iab_content_test_report.json +29 -29
  25. artifacts/evaluation/latest/iab_content_train_report.json +29 -29
  26. artifacts/evaluation/latest/iab_content_val_report.json +29 -29
  27. artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json +337 -240
  28. artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json +217 -466
  29. artifacts/evaluation/latest/iab_quality_target_eval.json +36 -41
  30. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv +15 -15
  31. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json +60 -60
  32. artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv +1 -1
  33. artifacts/evaluation/latest/intent_subtype_extended_cases_report.json +14 -14
  34. artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv +2 -2
  35. artifacts/evaluation/latest/intent_subtype_hard_cases_report.json +17 -17
  36. artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv +3 -3
  37. artifacts/evaluation/latest/intent_subtype_test_report.json +23 -23
  38. artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv +10 -10
  39. artifacts/evaluation/latest/intent_subtype_train_report.json +43 -43
  40. artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv +2 -2
  41. artifacts/evaluation/latest/intent_subtype_val_report.json +22 -22
  42. artifacts/evaluation/latest/intent_type_hard_cases_report.json +2 -2
  43. artifacts/evaluation/latest/summary.json +0 -0
  44. training/run_full_training_pipeline.py +20 -1
  45. training/upload_to_hf.py +35 -1
COLAB_SETUP.md CHANGED
@@ -30,6 +30,16 @@ print(torch.cuda.is_available(), torch.cuda.get_device_name(0) if torch.cuda.is_
30
  %pip install -q -r requirements.txt
31
  ```
32
 
 
 
 
 
 
 
 
 
 
 
33
  If `requirements.txt` is missing, install manually:
34
 
35
  ```python
 
30
  %pip install -q -r requirements.txt
31
  ```
32
 
33
+ If you see Torch version conflicts like:
34
+
35
+ - `torchvision ... requires torch==2.10.0, but you have torch 2.11.0`
36
+
37
+ Pin matching versions (then restart the runtime):
38
+
39
+ ```python
40
+ %pip install -q -U torch==2.10.0 torchvision==0.25.0 torchaudio==2.10.0
41
+ ```
42
+
43
  If `requirements.txt` is missing, install manually:
44
 
45
  ```python
HF_MODEL_CARD.md CHANGED
@@ -43,7 +43,47 @@ Combines multitask intent modeling, supervised IAB content classification, and p
43
 
44
  ## Deployment Options
45
 
46
- ### 1. `transformers.pipeline()` one line anywhere
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  ```python
49
  from transformers import pipeline
@@ -142,6 +182,28 @@ clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
142
 
143
  ---
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  ## Example Output
146
 
147
  ```json
 
43
 
44
  ## Deployment Options
45
 
46
+ ### 0. Colab / Kaggle Quickstart (copy/paste)
47
+
48
+ ```python
49
+ !pip -q install -U pip
50
+ !pip -q install -U "torch==2.10.0" "torchvision==0.25.0" "torchaudio==2.10.0"
51
+ !pip -q install -U "transformers>=4.36.0" "huggingface_hub>=0.20.0" "safetensors>=0.4.0"
52
+ ```
53
+
54
+ Restart the runtime after installs (**Runtime → Restart runtime**) so the new Torch version is actually used.
55
+
56
+ ```python
57
+ from transformers import pipeline
58
+
59
+ clf = pipeline(
60
+ "admesh-intent",
61
+ model="admesh/agentic-intent-classifier",
62
+ trust_remote_code=True, # required (custom pipeline + multi-model bundle)
63
+ )
64
+
65
+ out = clf("Which laptop should I buy for college?")
66
+ print(out["meta"])
67
+ print(out["model_output"]["classification"]["intent"])
68
+ ```
69
+
70
+ ---
71
+
72
+ ## Latency / inference timing (quick check)
73
+
74
+ The first call includes model/code loading. Warm up once, then measure:
75
+
76
+ ```python
77
+ import time
78
+ q = "Which laptop should I buy for college?"
79
+
80
+ _ = clf("warm up")
81
+ t0 = time.perf_counter()
82
+ out = clf(q)
83
+ print(f"latency_ms={(time.perf_counter() - t0) * 1000:.1f}")
84
+ ```
85
+
86
+ ### 1. `transformers.pipeline()` — anywhere (Python)
87
 
88
  ```python
89
  from transformers import pipeline
 
182
 
183
  ---
184
 
185
+ ## Troubleshooting (avoid environment errors)
186
+
187
+ ### `No module named 'combined_inference'` (or similar)
188
+
189
+ This means the Hub repo root is missing required Python files. Ensure these exist at the **root of the model repo** (same level as `pipeline.py`):
190
+
191
+ - `pipeline.py`, `config.json`, `config.py`
192
+ - `combined_inference.py`, `schemas.py`
193
+ - `model_runtime.py`, `multitask_runtime.py`, `multitask_model.py`
194
+ - `inference_intent_type.py`, `inference_subtype.py`, `inference_decision_phase.py`, `inference_iab_classifier.py`
195
+ - `iab_classifier.py`, `iab_taxonomy.py`
196
+
197
+ ### `does not appear to have a file named model.safetensors`
198
+
199
+ Transformers requires a standard checkpoint at the repo root for `pipeline()` to initialize. This repo includes a **small dummy** `model.safetensors` + tokenizer files at the root for compatibility; the *real* production weights live in:
200
+
201
+ - `multitask_intent_model_output/`
202
+ - `iab_classifier_model_output/`
203
+ - `artifacts/calibration/`
204
+
205
+ ---
206
+
207
  ## Example Output
208
 
209
  ```json
README.md CHANGED
@@ -1,57 +1,10 @@
1
- ---
2
- language:
3
- - en
4
- library_name: transformers
5
- pipeline_tag: text-classification
6
- base_model: distilbert-base-uncased
7
- metrics:
8
- - accuracy
9
- - f1
10
- tags:
11
- - intent-classification
12
- - multitask
13
- - iab
14
- - conversational-ai
15
- - adtech
16
- - calibrated-confidence
17
- license: apache-2.0
18
- ---
19
-
20
- # admesh/agentic-intent-classifier
21
-
22
- Production-ready intent + IAB classifier bundle for conversational traffic.
23
-
24
- Combines multitask intent modeling, supervised IAB content classification, and per-head confidence calibration to support safe monetization decisions in real time.
25
-
26
- ## Links
27
-
28
- - Hugging Face: https://huggingface.co/admesh/agentic-intent-classifier
29
- - GitHub: https://github.com/GouniManikumar12/agentic-intent-classifier
30
 
31
- ## What It Predicts
32
 
33
- | Field | Description |
34
- |---|---|
35
- | `intent.type` | `commercial`, `informational`, `navigational`, `transactional`, … |
36
- | `intent.subtype` | `product_discovery`, `comparison`, `how_to`, … |
37
- | `intent.decision_phase` | `awareness`, `consideration`, `decision`, … |
38
- | `iab_content` | IAB Content Taxonomy 3.0 tier1 / tier2 / tier3 labels |
39
- | `component_confidence` | Per-head calibrated confidence with threshold flags |
40
- | `system_decision` | Monetization eligibility, opportunity type, policy |
41
-
42
- ---
43
-
44
- ## Deployment Options
45
-
46
- ### 0. Colab / Kaggle Quickstart (copy/paste)
47
-
48
- ```python
49
- !pip -q install -U pip
50
- !pip -q install -U "torch==2.10.0" "torchvision==0.25.0" "torchaudio==2.10.0"
51
- !pip -q install -U "transformers>=4.36.0" "huggingface_hub>=0.20.0" "safetensors>=0.4.0"
52
- ```
53
 
54
- Restart the runtime after installs (**Runtime Restart runtime**) so the new Torch version is actually used.
55
 
56
  ```python
57
  from transformers import pipeline
@@ -59,212 +12,539 @@ from transformers import pipeline
59
  clf = pipeline(
60
  "admesh-intent",
61
  model="admesh/agentic-intent-classifier",
62
- trust_remote_code=True, # required (custom pipeline + multi-model bundle)
63
  )
64
 
65
  out = clf("Which laptop should I buy for college?")
66
- print(out["meta"])
67
  print(out["model_output"]["classification"]["intent"])
 
 
68
  ```
69
 
70
- ---
 
 
71
 
72
- ## Latency / inference timing (quick check)
73
 
74
- The first call includes model/code loading. Warm up once, then measure:
75
 
76
  ```python
77
  import time
 
 
 
78
  q = "Which laptop should I buy for college?"
79
 
80
  _ = clf("warm up")
81
  t0 = time.perf_counter()
82
  out = clf(q)
83
- print(f"latency_ms={(time.perf_counter() - t0) * 1000:.1f}")
 
 
 
84
  ```
85
 
86
- ### 1. `transformers.pipeline()` anywhere (Python)
87
 
88
  ```python
89
- from transformers import pipeline
90
 
91
- clf = pipeline(
92
- "admesh-intent",
93
- model="admesh/agentic-intent-classifier",
94
- trust_remote_code=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  )
 
 
 
 
 
96
 
 
 
97
  result = clf("Which laptop should I buy for college?")
 
 
 
 
 
 
 
 
 
 
98
  ```
99
 
100
- Batch and custom thresholds:
101
 
102
  ```python
103
- # batch
104
  results = clf([
105
  "Best running shoes under $100",
106
- "How does TCP work?",
107
  "Buy noise-cancelling headphones",
108
  ])
109
 
110
- # custom confidence thresholds
111
  result = clf(
112
- "Buy headphones",
113
  threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
114
  )
115
  ```
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  ---
118
 
119
- ### 2. HF Inference Endpoints (managed, deploy to AWS / Azure / GCP)
120
 
121
- 1. Go to https://ui.endpoints.huggingface.co
122
- 2. **New Endpoint** → select `admesh/agentic-intent-classifier`
123
- 3. Framework: **PyTorch** — Task: **Text Classification**
124
- 4. Enable **"Load with trust_remote_code"**
125
- 5. Deploy
 
 
126
 
127
- The endpoint serves the same `pipeline()` interface above via REST:
128
 
129
  ```bash
130
- curl https://<your-endpoint>.endpoints.huggingface.cloud \
131
- -H "Authorization: Bearer $HF_TOKEN" \
132
- -H "Content-Type: application/json" \
133
- -d '{"inputs": "Which laptop should I buy for college?"}'
134
  ```
135
 
136
- ---
137
 
138
- ### 3. HF Spaces (Gradio / Streamlit demo)
 
 
 
139
 
140
- ```python
141
- # app.py for a Gradio Space
142
- import gradio as gr
143
- from transformers import pipeline
144
 
145
- clf = pipeline(
146
- "admesh-intent",
147
- model="admesh/agentic-intent-classifier",
148
- trust_remote_code=True,
149
- )
150
 
151
- def classify(text):
152
- return clf(text)
153
 
154
- gr.Interface(fn=classify, inputs="text", outputs="json").launch()
 
 
 
155
  ```
156
 
157
- ---
158
 
159
- ### 4. Local / notebook via `snapshot_download`
 
 
 
160
 
161
- ```python
162
- import sys
163
- from huggingface_hub import snapshot_download
164
 
165
- local_dir = snapshot_download(
166
- repo_id="admesh/agentic-intent-classifier",
167
- repo_type="model",
168
- )
169
- sys.path.insert(0, local_dir)
170
 
171
- from pipeline import AdmeshIntentPipeline
172
- clf = AdmeshIntentPipeline()
173
- result = clf("I need a CRM for a 5-person startup")
 
 
 
 
174
  ```
175
 
176
- Or the one-liner factory:
177
 
178
- ```python
179
- from pipeline import AdmeshIntentPipeline
180
- clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
 
 
 
 
 
 
 
 
 
 
181
  ```
182
 
183
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- ## Troubleshooting (avoid environment errors)
186
 
187
- ### `No module named 'combined_inference'` (or similar)
 
 
 
188
 
189
- This means the Hub repo root is missing required Python files. Ensure these exist at the **root of the model repo** (same level as `pipeline.py`):
 
 
 
 
 
190
 
191
- - `pipeline.py`, `config.json`, `config.py`
192
- - `combined_inference.py`, `schemas.py`
193
- - `model_runtime.py`, `multitask_runtime.py`, `multitask_model.py`
194
- - `inference_intent_type.py`, `inference_subtype.py`, `inference_decision_phase.py`, `inference_iab_classifier.py`
195
- - `iab_classifier.py`, `iab_taxonomy.py`
196
 
197
- ### `does not appear to have a file named model.safetensors`
 
 
 
 
 
 
 
 
 
 
198
 
199
- Transformers requires a standard checkpoint at the repo root for `pipeline()` to initialize. This repo includes a **small dummy** `model.safetensors` + tokenizer files at the root for compatibility; the *real* production weights live in:
200
 
201
- - `multitask_intent_model_output/`
202
- - `iab_classifier_model_output/`
203
  - `artifacts/calibration/`
 
204
 
205
- ---
206
 
207
- ## Example Output
208
-
209
- ```json
210
- {
211
- "model_output": {
212
- "classification": {
213
- "iab_content": {
214
- "taxonomy": "IAB Content Taxonomy",
215
- "taxonomy_version": "3.0",
216
- "tier1": {"id": "552", "label": "Style & Fashion"},
217
- "tier2": {"id": "579", "label": "Men's Fashion"},
218
- "mapping_mode": "exact",
219
- "mapping_confidence": 0.73
220
- },
221
- "intent": {
222
- "type": "commercial",
223
- "subtype": "product_discovery",
224
- "decision_phase": "consideration",
225
- "confidence": 0.9549,
226
- "commercial_score": 0.656
227
- }
228
- }
229
- },
230
- "system_decision": {
231
- "policy": {
232
- "monetization_eligibility": "allowed_with_caution",
233
- "eligibility_reason": "commercial_discovery_signal_present"
234
- },
235
- "opportunity": {"type": "soft_recommendation", "strength": "medium"}
236
- },
237
- "meta": {
238
- "system_version": "0.6.0-phase4",
239
- "calibration_enabled": true,
240
- "iab_mapping_is_placeholder": false
241
- }
242
- }
243
- ```
244
-
245
- ## Reproducible Revision
246
 
247
- ```python
248
- from huggingface_hub import snapshot_download
249
- local_dir = snapshot_download(
250
- repo_id="admesh/agentic-intent-classifier",
251
- repo_type="model",
252
- revision="0584798f8efee6beccd778b0afa06782ab5add60",
253
- )
 
 
 
 
 
254
  ```
255
 
256
- ## Included Artifacts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- | Path | Contents |
259
- |---|---|
260
- | `multitask_intent_model_output/` | DistilBERT multitask weights + tokenizer |
261
- | `iab_classifier_model_output/` | IAB content classifier weights + tokenizer |
262
- | `artifacts/calibration/` | Per-head temperature + threshold JSONs |
263
- | `pipeline.py` | `AdmeshIntentPipeline` (transformers.Pipeline subclass) |
264
- | `combined_inference.py` | Core inference logic |
265
 
266
- ## Notes
267
 
268
- - `trust_remote_code=True` is required because this model uses a custom multi-head architecture that does not map to a single standard `AutoModel` checkpoint.
269
- - `meta.iab_mapping_is_placeholder: true` means IAB artifacts were missing or skipped; train and calibrate IAB for full production accuracy.
270
- - For long-running servers, instantiate once and reuse — models are cached in memory after the first call.
 
 
1
+ # Agentic Intent Classifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ `agentic-intent-classifier` is a multi-head query classification stack for conversational traffic.
4
 
5
+ ## Quickstart (recommended): run from Hugging Face Hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ This is the easiest way for developers to test the full production stack (multitask intent + IAB + calibration) without training locally.
8
 
9
  ```python
10
  from transformers import pipeline
 
12
  clf = pipeline(
13
  "admesh-intent",
14
  model="admesh/agentic-intent-classifier",
15
+ trust_remote_code=True,
16
  )
17
 
18
  out = clf("Which laptop should I buy for college?")
 
19
  print(out["model_output"]["classification"]["intent"])
20
+ print(out["model_output"]["classification"]["iab_content"])
21
+ print(out["meta"])
22
  ```
23
 
24
+ If you’re running in Colab/Kaggle and see Torch version conflicts, follow `COLAB_SETUP.md`.
25
+
26
+ ## Latency / inference timing (developer quick check)
27
 
28
+ The first call includes model/code loading; measure latency after a warm-up call.
29
 
30
+ Single query:
31
 
32
  ```python
33
  import time
34
+ from transformers import pipeline
35
+
36
+ clf = pipeline("admesh-intent", model="admesh/agentic-intent-classifier", trust_remote_code=True)
37
  q = "Which laptop should I buy for college?"
38
 
39
  _ = clf("warm up")
40
  t0 = time.perf_counter()
41
  out = clf(q)
42
+ dt_ms = (time.perf_counter() - t0) * 1000
43
+
44
+ print(f"latency_ms={dt_ms:.1f}")
45
+ print(out["model_output"]["classification"]["intent"])
46
  ```
47
 
48
+ Warm p50 / p95 over 20 runs:
49
 
50
  ```python
51
+ import time, statistics
52
 
53
+ times = []
54
+ for _ in range(20):
55
+ t0 = time.perf_counter()
56
+ _ = clf(q)
57
+ times.append((time.perf_counter() - t0) * 1000)
58
+
59
+ times_sorted = sorted(times)
60
+ print(f"p50={statistics.median(times):.1f}ms p95={times_sorted[int(0.95*len(times))-1]:.1f}ms mean={statistics.mean(times):.1f}ms")
61
+ ```
62
+
63
+ It currently produces:
64
+
65
+ - `intent.type`
66
+ - `intent.subtype`
67
+ - `intent.decision_phase`
68
+ - `iab_content`
69
+ - calibrated confidence per head
70
+ - combined fallback / policy / opportunity decisions
71
+
72
+ The repo is beyond the original v0.1 baseline. It now includes:
73
+
74
+ - shared config and label ownership
75
+ - reusable model runtime
76
+ - calibrated confidence and threshold gating
77
+ - combined inference with fallback/policy logic
78
+ - request/response validation in the demo API
79
+ - repeatable evaluation and regression suites
80
+ - full-TSV IAB taxonomy retrieval support through tier4
81
+ - a local embedding index for taxonomy-node retrieval over IAB content paths
82
+ - a separate synthetic full-intent-taxonomy augmentation dataset for non-IAB heads
83
+ - a dedicated intent-type difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
84
+ - a dedicated decision-phase difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
85
+
86
+ Generated model weights are intentionally not committed.
87
+
88
+ ## Current Taxonomy
89
+
90
+ ### `intent.type`
91
+
92
+ - `informational`
93
+ - `exploratory`
94
+ - `commercial`
95
+ - `transactional`
96
+ - `support`
97
+ - `personal_reflection`
98
+ - `creative_generation`
99
+ - `chit_chat`
100
+ - `ambiguous`
101
+ - `prohibited`
102
+
103
+ ### `intent.decision_phase`
104
+
105
+ - `awareness`
106
+ - `research`
107
+ - `consideration`
108
+ - `decision`
109
+ - `action`
110
+ - `post_purchase`
111
+ - `support`
112
+
113
+ ### `intent.subtype`
114
+
115
+ - `education`
116
+ - `product_discovery`
117
+ - `comparison`
118
+ - `evaluation`
119
+ - `deal_seeking`
120
+ - `provider_selection`
121
+ - `signup`
122
+ - `purchase`
123
+ - `booking`
124
+ - `download`
125
+ - `contact_sales`
126
+ - `task_execution`
127
+ - `onboarding_setup`
128
+ - `troubleshooting`
129
+ - `account_help`
130
+ - `billing_help`
131
+ - `follow_up`
132
+ - `emotional_reflection`
133
+
134
+ ### `iab_content`
135
+
136
+ - candidates are derived from every row in [data/iab-content/Content Taxonomy 3.0.tsv](data/iab-content/Content%20Taxonomy%203.0.tsv)
137
+ - retrieval output supports `tier1`, `tier2`, `tier3`, and optional `tier4`
138
+
139
+ ## What The System Does
140
+
141
+ - runs three classifier heads:
142
+ - `intent_type`
143
+ - `intent_subtype`
144
+ - `decision_phase`
145
+ - resolves `iab_content` through a local embedding index over taxonomy nodes plus generic label/path reranking
146
+ - applies calibration artifacts when present
147
+ - computes `commercial_score`
148
+ - applies fallback when confidence is too weak or policy-safe blocking is required
149
+ - emits a schema-validated combined envelope
150
+
151
+ ## What The System Does Not Do
152
+
153
+ - it is not a multi-turn memory system
154
+ - it is not a production-optimized low-latency serving path
155
+ - it is not yet trained on large real-traffic human-labeled intent data
156
+ - combined decision logic is still heuristic, even though it is materially stronger than the original baseline
157
+
158
+ ## Project Layout
159
+
160
+ - [config.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/config.py): labels, thresholds, artifact paths, model paths
161
+ - [model_runtime.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/model_runtime.py): shared calibrated inference runtime
162
+ - [combined_inference.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/combined_inference.py): composed system response
163
+ - [inference_intent_type.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_intent_type.py): direct `intent_type` inference entrypoint
164
+ - [inference_iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_iab_classifier.py): direct supervised `iab_content` inference entrypoint
165
+ - [schemas.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/schemas.py): request/response validation
166
+ - [demo_api.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/demo_api.py): local validated API
167
+ - [iab_taxonomy.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_taxonomy.py): full taxonomy parser/index
168
+ - [iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_classifier.py): supervised IAB runtime with taxonomy-aware parent fallback
169
+ - [iab_retrieval.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_retrieval.py): optional shadow retrieval baseline
170
+ - [training/build_full_intent_taxonomy_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_full_intent_taxonomy_dataset.py): separate synthetic intent augmentation dataset
171
+ - [training/build_intent_type_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_intent_type_difficulty_dataset.py): extra `intent_type` augmentation plus held-out difficulty benchmark
172
+ - [training/build_decision_phase_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_decision_phase_difficulty_dataset.py): extra `decision_phase` augmentation plus held-out difficulty benchmark
173
+ - [training/build_subtype_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_difficulty_dataset.py): extra `intent_subtype` augmentation plus held-out difficulty benchmark
174
+ - [training/build_subtype_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_dataset.py): subtype dataset generation from existing corpora
175
+ - [training/train_iab.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/train_iab.py): train the supervised IAB classifier head
176
+ - [training/build_iab_taxonomy_embeddings.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_iab_taxonomy_embeddings.py): build local IAB node embedding artifacts
177
+ - [training/run_full_training_pipeline.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/run_full_training_pipeline.py): full multi-head training/calibration/eval pipeline
178
+ - [evaluation/run_evaluation.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_evaluation.py): repeatable benchmark runner
179
+ - [evaluation/run_regression_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_regression_suite.py): known-failure regression runner
180
+ - [evaluation/run_iab_mapping_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_mapping_suite.py): IAB behavior-lock regression runner
181
+ - [evaluation/run_iab_quality_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_quality_suite.py): curated IAB quality-target runner
182
+ - [known_limitations.md](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/known_limitations.md): current gaps and caveats
183
+
184
+ ## Quickstart: Run From Hugging Face
185
+
186
+ Download the trained bundle and run inference in three lines — no local training required.
187
+
188
+ ```python
189
+ import sys
190
+ from huggingface_hub import snapshot_download
191
+
192
+ # Download the full bundle (models + calibration + code)
193
+ local_dir = snapshot_download(
194
+ repo_id="admesh/agentic-intent-classifier",
195
+ repo_type="model",
196
  )
197
+ sys.path.insert(0, local_dir)
198
+
199
+ # Import and instantiate
200
+ from pipeline import AdmeshIntentPipeline
201
+ clf = AdmeshIntentPipeline()
202
 
203
+ # Classify
204
+ import json
205
  result = clf("Which laptop should I buy for college?")
206
+ print(json.dumps(result, indent=2))
207
+ ```
208
+
209
+ Or use the one-liner factory method:
210
+
211
+ ```python
212
+ from pipeline import AdmeshIntentPipeline # after sys.path.insert above
213
+
214
+ clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
215
+ result = clf("I need a CRM for a 5-person startup")
216
  ```
217
 
218
+ Batch mode and custom thresholds are also supported:
219
 
220
  ```python
221
+ # Batch
222
  results = clf([
223
  "Best running shoes under $100",
224
+ "How does gradient descent work?",
225
  "Buy noise-cancelling headphones",
226
  ])
227
 
228
+ # Custom confidence thresholds
229
  result = clf(
230
+ "Buy noise-cancelling headphones",
231
  threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
232
  )
233
  ```
234
 
235
+ Verify artifacts and run a smoke test from the CLI:
236
+
237
+ ```bash
238
+ cd "<local_dir>"
239
+ python3 training/pipeline_verify.py
240
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
241
+ ```
242
+
243
+ Pin a specific revision for reproducibility:
244
+
245
+ ```python
246
+ local_dir = snapshot_download(
247
+ repo_id="admesh/agentic-intent-classifier",
248
+ repo_type="model",
249
+ revision="0584798f8efee6beccd778b0afa06782ab5add60",
250
+ )
251
+ ```
252
+
253
  ---
254
 
255
+ ## Setup (for local training)
256
 
257
+ ```bash
258
+ python3 -m venv .venv
259
+ source .venv/bin/activate
260
+ pip install -r agentic-intent-classifier/requirements.txt
261
+ ```
262
+
263
+ ## Inference (local training path)
264
 
265
+ Run one query locally:
266
 
267
  ```bash
268
+ cd agentic-intent-classifier
269
+ python3 training/train_iab.py
270
+ python3 training/calibrate_confidence.py --head iab_content
271
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
272
  ```
273
 
274
+ Run only the `intent_type` head:
275
 
276
+ ```bash
277
+ cd agentic-intent-classifier
278
+ python3 inference_intent_type.py "best shoes under 100"
279
+ ```
280
 
281
+ Run the demo API:
 
 
 
282
 
283
+ ```bash
284
+ cd agentic-intent-classifier
285
+ python3 demo_api.py
286
+ ```
 
287
 
288
+ Example request:
 
289
 
290
+ ```bash
291
+ curl -sS -X POST http://127.0.0.1:8008/classify \
292
+ -H 'Content-Type: application/json' \
293
+ -d '{"text":"I cannot log into my account"}'
294
  ```
295
 
296
+ Infra endpoints:
297
 
298
+ ```bash
299
+ curl -sS http://127.0.0.1:8008/health
300
+ curl -sS http://127.0.0.1:8008/version
301
+ ```
302
 
303
+ Train only the IAB classifier head:
 
 
304
 
305
+ ```bash
306
+ cd agentic-intent-classifier
307
+ python3 training/train_iab.py
308
+ python3 training/calibrate_confidence.py --head iab_content
309
+ ```
310
 
311
+ The online `iab_content` path now uses the compact supervised classifier. Retrieval is still available as an optional shadow baseline.
312
+
313
+ Build the optional retrieval shadow index:
314
+
315
+ ```bash
316
+ cd agentic-intent-classifier
317
+ python3 training/build_iab_taxonomy_embeddings.py
318
  ```
319
 
320
+ By default the shadow retrieval path uses `Alibaba-NLP/gte-Qwen2-1.5B-instruct`. The retrieval runtime applies the model's query-side instruction format and last-token pooling, matching the Hugging Face usage guidance. If you want to point retrieval at a different embedding model, set `IAB_RETRIEVAL_MODEL_NAME_OVERRIDE` before building the index.
321
 
322
+ Open-source users can swap in their own embedding model, but the contract is:
323
+
324
+ - query embeddings and taxonomy-node embeddings must be produced by the same model and model revision
325
+ - after changing models, you must rebuild `artifacts/iab/taxonomy_embeddings.pt`
326
+ - the repository only tests and supports the default model path out of the box
327
+ - not every Hugging Face embedding model is drop-in compatible with this runtime; some require custom pooling, query instructions, or `trust_remote_code`
328
+
329
+ Example override:
330
+
331
+ ```bash
332
+ cd agentic-intent-classifier
333
+ export IAB_RETRIEVAL_MODEL_NAME_OVERRIDE=mixedbread-ai/mxbai-embed-large-v1
334
+ python3 training/build_iab_taxonomy_embeddings.py
335
  ```
336
 
337
+ This writes:
338
+
339
+ - `artifacts/iab/taxonomy_nodes.json`
340
+ - `artifacts/iab/taxonomy_embeddings.pt`
341
+
342
+ ## Training
343
+
344
+ ### Full local pipeline
345
+
346
+ ```bash
347
+ cd agentic-intent-classifier
348
+ python3 training/run_full_training_pipeline.py
349
+ ```
350
+
351
+ This pipeline now does:
352
+
353
+ 1. build separate full-intent-taxonomy augmentation data
354
+ 2. build separate `intent_type` difficulty augmentation + benchmark
355
+ 3. train `intent_type`
356
+ 4. build subtype corpus
357
+ 5. build separate `intent_subtype` difficulty augmentation + benchmark
358
+ 6. train `intent_subtype`
359
+ 7. build separate `decision_phase` difficulty augmentation + benchmark
360
+ 8. train `decision_phase`
361
+ 9. train `iab_content`
362
+ 10. calibrate all classifier heads, including `iab_content`
363
+ 11. run regression/evaluation unless `--skip-full-eval` is used
364
+
365
+ ### Build datasets individually
366
+
367
+ Separate full-intent augmentation:
368
+
369
+ ```bash
370
+ cd agentic-intent-classifier
371
+ python3 training/build_full_intent_taxonomy_dataset.py
372
+ ```
373
+
374
+ Intent-type difficulty augmentation and benchmark:
375
+
376
+ ```bash
377
+ cd agentic-intent-classifier
378
+ python3 training/build_intent_type_difficulty_dataset.py
379
+ ```
380
+
381
+ Decision-phase difficulty augmentation and benchmark:
382
+
383
+ ```bash
384
+ cd agentic-intent-classifier
385
+ python3 training/build_decision_phase_difficulty_dataset.py
386
+ ```
387
+
388
+ Subtype difficulty augmentation and benchmark:
389
+
390
+ ```bash
391
+ cd agentic-intent-classifier
392
+ python3 training/build_subtype_difficulty_dataset.py
393
+ ```
394
+
395
+ Subtype dataset:
396
+
397
+ ```bash
398
+ cd agentic-intent-classifier
399
+ python3 training/build_subtype_dataset.py
400
+ ```
401
+
402
+ IAB embedding index:
403
+
404
+ ```bash
405
+ cd agentic-intent-classifier
406
+ python3 training/build_iab_taxonomy_embeddings.py
407
+ ```
408
+
409
+ ### Train heads individually
410
+
411
+ ```bash
412
+ cd agentic-intent-classifier
413
+ python3 training/train.py
414
+ python3 training/train_subtype.py
415
+ python3 training/train_decision_phase.py
416
+ ```
417
+
418
+ ### Calibration
419
+
420
+ ```bash
421
+ cd agentic-intent-classifier
422
+ python3 training/calibrate_confidence.py --head intent_type
423
+ python3 training/calibrate_confidence.py --head intent_subtype
424
+ python3 training/calibrate_confidence.py --head decision_phase
425
+ ```
426
+
427
+ ## Evaluation
428
+
429
+ Full evaluation:
430
+
431
+ ```bash
432
+ cd agentic-intent-classifier
433
+ python3 evaluation/run_evaluation.py
434
+ ```
435
 
436
+ Known-failure regression:
437
 
438
+ ```bash
439
+ cd agentic-intent-classifier
440
+ python3 evaluation/run_regression_suite.py
441
+ ```
442
 
443
+ IAB behavior-lock regression:
444
+
445
+ ```bash
446
+ cd agentic-intent-classifier
447
+ python3 evaluation/run_iab_mapping_suite.py
448
+ ```
449
 
450
+ IAB quality-target evaluation:
 
 
 
 
451
 
452
+ ```bash
453
+ cd agentic-intent-classifier
454
+ python3 evaluation/run_iab_quality_suite.py
455
+ ```
456
+
457
+ Threshold sweeps:
458
+
459
+ ```bash
460
+ cd agentic-intent-classifier
461
+ python3 evaluation/sweep_intent_threshold.py
462
+ ```
463
 
464
+ Artifacts are written to:
465
 
 
 
466
  - `artifacts/calibration/`
467
+ - `artifacts/evaluation/latest/`
468
 
469
+ ## Google Colab
470
 
471
+ Use Colab for the full retraining pass if local memory is limited.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
+ Clone once:
474
+
475
+ ```bash
476
+ %cd /content
477
+ !git clone https://github.com/GouniManikumar12/agentic-intent-classifier.git
478
+ %cd /content/agentic-intent-classifier
479
+ ```
480
+
481
+ If the repo is already cloned and you want the latest code, pull manually:
482
+
483
+ ```bash
484
+ !git pull origin main
485
  ```
486
 
487
+ Full pipeline:
488
+
489
+ ```bash
490
+ !python training/run_full_training_pipeline.py
491
+ ```
492
+
493
+ If full evaluation is too heavy for the current Colab runtime:
494
+
495
+ ```bash
496
+ !python training/run_full_training_pipeline.py \
497
+ --iab-embedding-batch-size 32 \
498
+ --skip-full-eval
499
+ ```
500
+
501
+ Then run eval separately after training:
502
+
503
+ ```bash
504
+ !python evaluation/run_regression_suite.py
505
+ !python evaluation/run_iab_mapping_suite.py
506
+ !python evaluation/run_iab_quality_suite.py
507
+ !python evaluation/run_evaluation.py
508
+ ```
509
+
510
+ ## Current Saved Metrics
511
+
512
+ Generate fresh metrics with:
513
+
514
+ ```bash
515
+ cd agentic-intent-classifier
516
+ python3 evaluation/run_evaluation.py
517
+ ```
518
+
519
+ Do not treat any checked-in summary as canonical unless it was regenerated after the current code and artifacts were built. The IAB path is now retrieval-based, so older saved reports from the deleted hierarchy stack are not meaningful.
520
+
521
+ ## Latency Note
522
+
523
+ `combined_inference.py` is a debugging/offline path, not a production latency path.
524
+
525
+ Current production truth:
526
+
527
+ - per-request CLI execution is not a sub-50ms architecture
528
+ - production serving should use a long-lived API process with preloaded models
529
+ - if sub-50ms becomes a hard requirement, the serving path will need:
530
+ - persistent loaded models
531
+ - runtime optimization
532
+ - likely fewer model passes or a shared multi-head model
533
+
534
+ ## Current Status
535
+
536
+ Current repo status:
537
 
538
+ - full 10-class `intent.type` taxonomy is wired
539
+ - subtype and phase heads are present
540
+ - difficulty benchmarks are wired for `intent_type`, `intent_subtype`, and `decision_phase`
541
+ - full-TSV IAB taxonomy retrieval is wired through tier4
542
+ - separate full-intent augmentation dataset is in place
543
+ - evaluation/runtime memory handling is improved for large IAB splits
 
544
 
545
+ The main remaining gap is not basic infrastructure anymore. It is improving real-world robustness, especially for:
546
 
547
+ - `decision_phase`
548
+ - `intent_subtype`
549
+ - confidence quality on borderline commercial queries
550
+ - real-traffic supervision beyond synthetic data
artifacts/calibration/decision_phase.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
- "generated_at": "2026-03-25T16:15:10.949430+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
  "calibrated_accuracy": 0.8621,
8
- "calibrated_expected_calibration_error": 0.0672,
9
- "calibrated_negative_log_likelihood": 0.4798,
10
- "mean_calibrated_confidence": 0.868,
11
- "mean_raw_confidence": 0.868,
12
  "raw_accuracy": 0.8621,
13
- "raw_expected_calibration_error": 0.0672,
14
- "raw_negative_log_likelihood": 0.4798
15
  },
16
  "minimum_threshold_floor": 0.22,
17
- "optimized_temperature_candidate": 1.000144,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.8621,
20
  "coverage": 1.0,
@@ -22,7 +22,7 @@
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 1.000144,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.8621,
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
+ "generated_at": "2026-03-25T19:06:15.261600+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
  "calibrated_accuracy": 0.8621,
8
+ "calibrated_expected_calibration_error": 0.047,
9
+ "calibrated_negative_log_likelihood": 0.5014,
10
+ "mean_calibrated_confidence": 0.8653,
11
+ "mean_raw_confidence": 0.8672,
12
  "raw_accuracy": 0.8621,
13
+ "raw_expected_calibration_error": 0.0325,
14
+ "raw_negative_log_likelihood": 0.5015
15
  },
16
  "minimum_threshold_floor": 0.22,
17
+ "optimized_temperature_candidate": 1.007321,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.8621,
20
  "coverage": 1.0,
 
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 1.007321,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.8621,
artifacts/calibration/iab_content.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
- "generated_at": "2026-03-25T16:17:01.813766+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
- "calibrated_accuracy": 0.9308,
8
- "calibrated_expected_calibration_error": 0.2692,
9
- "calibrated_negative_log_likelihood": 0.5893,
10
- "mean_calibrated_confidence": 0.6617,
11
- "mean_raw_confidence": 0.1809,
12
- "raw_accuracy": 0.9308,
13
- "raw_expected_calibration_error": 0.75,
14
- "raw_negative_log_likelihood": 1.9134
15
  },
16
  "minimum_threshold_floor": 0.12,
17
- "optimized_temperature_candidate": 0.55869,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.9308,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
- "temperature": 0.55869,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.9436,
29
- "coverage": 0.9775,
30
  "threshold": 0.12
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
+ "generated_at": "2026-03-25T19:08:04.708996+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.9442,
8
+ "calibrated_expected_calibration_error": 0.0283,
9
+ "calibrated_negative_log_likelihood": 0.2113,
10
+ "mean_calibrated_confidence": 0.9159,
11
+ "mean_raw_confidence": 0.1856,
12
+ "raw_accuracy": 0.9442,
13
+ "raw_expected_calibration_error": 0.7587,
14
+ "raw_negative_log_likelihood": 1.8642
15
  },
16
  "minimum_threshold_floor": 0.12,
17
+ "optimized_temperature_candidate": 0.26014,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.9442,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
+ "temperature": 0.26014,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.9442,
29
+ "coverage": 1.0,
30
  "threshold": 0.12
31
  }
32
  }
artifacts/calibration/intent_subtype.json CHANGED
@@ -1,31 +1,31 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
- "generated_at": "2026-03-25T16:15:00.986765+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
- "calibrated_accuracy": 0.875,
8
- "calibrated_expected_calibration_error": 0.0667,
9
- "calibrated_negative_log_likelihood": 0.3811,
10
- "mean_calibrated_confidence": 0.8307,
11
- "mean_raw_confidence": 0.7584,
12
- "raw_accuracy": 0.875,
13
- "raw_expected_calibration_error": 0.1314,
14
- "raw_negative_log_likelihood": 0.4541
15
  },
16
  "minimum_threshold_floor": 0.25,
17
- "optimized_temperature_candidate": 0.824082,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.875,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 0.824082,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.8734,
29
  "coverage": 0.9875,
30
  "threshold": 0.25
31
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
+ "generated_at": "2026-03-25T19:06:04.625853+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.85,
8
+ "calibrated_expected_calibration_error": 0.0782,
9
+ "calibrated_negative_log_likelihood": 0.451,
10
+ "mean_calibrated_confidence": 0.826,
11
+ "mean_raw_confidence": 0.775,
12
+ "raw_accuracy": 0.85,
13
+ "raw_expected_calibration_error": 0.124,
14
+ "raw_negative_log_likelihood": 0.4945
15
  },
16
  "minimum_threshold_floor": 0.25,
17
+ "optimized_temperature_candidate": 0.868223,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.85,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 0.868223,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.8608,
29
  "coverage": 0.9875,
30
  "threshold": 0.25
31
  }
artifacts/calibration/intent_type.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
- "generated_at": "2026-03-25T16:14:49.053223+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
  "calibrated_accuracy": 0.9362,
8
- "calibrated_expected_calibration_error": 0.0715,
9
- "calibrated_negative_log_likelihood": 0.2384,
10
- "mean_calibrated_confidence": 0.917,
11
- "mean_raw_confidence": 0.8891,
12
  "raw_accuracy": 0.9362,
13
- "raw_expected_calibration_error": 0.0807,
14
- "raw_negative_log_likelihood": 0.257
15
  },
16
  "minimum_threshold_floor": 0.4,
17
- "optimized_temperature_candidate": 0.901567,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.9362,
20
  "coverage": 1.0,
@@ -22,7 +22,7 @@
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
- "temperature": 0.901567,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.9362,
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
+ "generated_at": "2026-03-25T19:05:52.623864+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
  "calibrated_accuracy": 0.9362,
8
+ "calibrated_expected_calibration_error": 0.0845,
9
+ "calibrated_negative_log_likelihood": 0.2091,
10
+ "mean_calibrated_confidence": 0.9221,
11
+ "mean_raw_confidence": 0.8936,
12
  "raw_accuracy": 0.9362,
13
+ "raw_expected_calibration_error": 0.0771,
14
+ "raw_negative_log_likelihood": 0.2295
15
  },
16
  "minimum_threshold_floor": 0.4,
17
+ "optimized_temperature_candidate": 0.895563,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.9362,
20
  "coverage": 1.0,
 
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
+ "temperature": 0.895563,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.9362,
artifacts/evaluation/latest/combined_demo_benchmark.json CHANGED
@@ -11,8 +11,8 @@
11
  "model_output": {
12
  "classification": {
13
  "iab_content": {
14
- "mapping_confidence": 0.676,
15
- "mapping_mode": "nearest_equivalent",
16
  "taxonomy": "IAB Content Taxonomy",
17
  "taxonomy_version": "3.0",
18
  "tier1": {
@@ -22,6 +22,10 @@
22
  "tier2": {
23
  "id": "599",
24
  "label": "Computing"
 
 
 
 
25
  }
26
  },
27
  "intent": {
@@ -29,31 +33,31 @@
29
  "component_confidence": {
30
  "decision_phase": {
31
  "calibrated": true,
32
- "confidence": 0.961,
33
  "confidence_threshold": 0.22,
34
  "label": "awareness",
35
  "meets_threshold": true,
36
- "raw_confidence": 0.9611
37
  },
38
  "intent_subtype": {
39
  "calibrated": true,
40
- "confidence": 0.9853,
41
  "confidence_threshold": 0.25,
42
  "label": "education",
43
  "meets_threshold": true,
44
- "raw_confidence": 0.9516
45
  },
46
  "intent_type": {
47
  "calibrated": true,
48
- "confidence": 0.9807,
49
  "confidence_threshold": 0.4,
50
  "label": "informational",
51
  "meets_threshold": true,
52
- "raw_confidence": 0.9655
53
  },
54
  "overall_strategy": "min_required_component_confidence"
55
  },
56
- "confidence": 0.961,
57
  "decision_phase": "awareness",
58
  "subtype": "education",
59
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -98,8 +102,8 @@
98
  "model_output": {
99
  "classification": {
100
  "iab_content": {
101
- "mapping_confidence": 0.4448,
102
- "mapping_mode": "nearest_equivalent",
103
  "taxonomy": "IAB Content Taxonomy",
104
  "taxonomy_version": "3.0",
105
  "tier1": {
@@ -112,31 +116,31 @@
112
  "component_confidence": {
113
  "decision_phase": {
114
  "calibrated": true,
115
- "confidence": 0.9381,
116
  "confidence_threshold": 0.22,
117
  "label": "awareness",
118
  "meets_threshold": true,
119
- "raw_confidence": 0.9381
120
  },
121
  "intent_subtype": {
122
  "calibrated": true,
123
- "confidence": 0.9753,
124
  "confidence_threshold": 0.25,
125
  "label": "education",
126
  "meets_threshold": true,
127
- "raw_confidence": 0.9275
128
  },
129
  "intent_type": {
130
  "calibrated": true,
131
- "confidence": 0.9768,
132
  "confidence_threshold": 0.4,
133
  "label": "informational",
134
  "meets_threshold": true,
135
- "raw_confidence": 0.9597
136
  },
137
  "overall_strategy": "min_required_component_confidence"
138
  },
139
- "confidence": 0.9381,
140
  "decision_phase": "awareness",
141
  "subtype": "education",
142
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -181,13 +185,17 @@
181
  "model_output": {
182
  "classification": {
183
  "iab_content": {
184
- "mapping_confidence": 0.7819,
185
- "mapping_mode": "nearest_equivalent",
186
  "taxonomy": "IAB Content Taxonomy",
187
  "taxonomy_version": "3.0",
188
  "tier1": {
189
  "id": "483",
190
  "label": "Sports"
 
 
 
 
191
  }
192
  },
193
  "intent": {
@@ -195,31 +203,31 @@
195
  "component_confidence": {
196
  "decision_phase": {
197
  "calibrated": true,
198
- "confidence": 0.9691,
199
  "confidence_threshold": 0.22,
200
  "label": "consideration",
201
  "meets_threshold": true,
202
- "raw_confidence": 0.9691
203
  },
204
  "intent_subtype": {
205
  "calibrated": true,
206
- "confidence": 0.563,
207
  "confidence_threshold": 0.25,
208
  "label": "comparison",
209
  "meets_threshold": true,
210
- "raw_confidence": 0.4806
211
  },
212
  "intent_type": {
213
  "calibrated": true,
214
- "confidence": 0.9869,
215
  "confidence_threshold": 0.4,
216
  "label": "commercial",
217
  "meets_threshold": true,
218
- "raw_confidence": 0.9756
219
  },
220
  "overall_strategy": "min_required_component_confidence"
221
  },
222
- "confidence": 0.563,
223
  "decision_phase": "consideration",
224
  "subtype": "comparison",
225
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -264,13 +272,17 @@
264
  "model_output": {
265
  "classification": {
266
  "iab_content": {
267
- "mapping_confidence": 0.3576,
268
- "mapping_mode": "nearest_equivalent",
269
  "taxonomy": "IAB Content Taxonomy",
270
  "taxonomy_version": "3.0",
271
  "tier1": {
272
  "id": "596",
273
  "label": "Technology & Computing"
 
 
 
 
274
  }
275
  },
276
  "intent": {
@@ -278,31 +290,31 @@
278
  "component_confidence": {
279
  "decision_phase": {
280
  "calibrated": true,
281
- "confidence": 0.9283,
282
  "confidence_threshold": 0.22,
283
  "label": "consideration",
284
  "meets_threshold": true,
285
- "raw_confidence": 0.9284
286
  },
287
  "intent_subtype": {
288
  "calibrated": true,
289
- "confidence": 0.9727,
290
  "confidence_threshold": 0.25,
291
  "label": "comparison",
292
  "meets_threshold": true,
293
- "raw_confidence": 0.9236
294
  },
295
  "intent_type": {
296
  "calibrated": true,
297
- "confidence": 0.9734,
298
  "confidence_threshold": 0.4,
299
  "label": "commercial",
300
  "meets_threshold": true,
301
- "raw_confidence": 0.954
302
  },
303
  "overall_strategy": "min_required_component_confidence"
304
  },
305
- "confidence": 0.9283,
306
  "decision_phase": "consideration",
307
  "subtype": "comparison",
308
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -347,17 +359,13 @@
347
  "model_output": {
348
  "classification": {
349
  "iab_content": {
350
- "mapping_confidence": 0.3542,
351
- "mapping_mode": "nearest_equivalent",
352
  "taxonomy": "IAB Content Taxonomy",
353
  "taxonomy_version": "3.0",
354
  "tier1": {
355
- "id": "52",
356
- "label": "Business and Finance"
357
- },
358
- "tier2": {
359
- "id": "53",
360
- "label": "Business"
361
  }
362
  },
363
  "intent": {
@@ -365,31 +373,31 @@
365
  "component_confidence": {
366
  "decision_phase": {
367
  "calibrated": true,
368
- "confidence": 0.8012,
369
  "confidence_threshold": 0.22,
370
  "label": "decision",
371
  "meets_threshold": true,
372
- "raw_confidence": 0.8012
373
  },
374
  "intent_subtype": {
375
  "calibrated": true,
376
- "confidence": 0.9028,
377
  "confidence_threshold": 0.25,
378
  "label": "provider_selection",
379
  "meets_threshold": true,
380
- "raw_confidence": 0.8041
381
  },
382
  "intent_type": {
383
  "calibrated": true,
384
- "confidence": 0.9759,
385
  "confidence_threshold": 0.4,
386
  "label": "commercial",
387
  "meets_threshold": true,
388
- "raw_confidence": 0.9582
389
  },
390
  "overall_strategy": "min_required_component_confidence"
391
  },
392
- "confidence": 0.8012,
393
  "decision_phase": "decision",
394
  "subtype": "provider_selection",
395
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
@@ -434,13 +442,17 @@
434
  "model_output": {
435
  "classification": {
436
  "iab_content": {
437
- "mapping_confidence": 0.2259,
438
- "mapping_mode": "nearest_equivalent",
439
  "taxonomy": "IAB Content Taxonomy",
440
  "taxonomy_version": "3.0",
441
  "tier1": {
442
- "id": "483",
443
- "label": "Sports"
 
 
 
 
444
  }
445
  },
446
  "intent": {
@@ -448,31 +460,31 @@
448
  "component_confidence": {
449
  "decision_phase": {
450
  "calibrated": true,
451
- "confidence": 0.9176,
452
  "confidence_threshold": 0.22,
453
  "label": "action",
454
  "meets_threshold": true,
455
- "raw_confidence": 0.9176
456
  },
457
  "intent_subtype": {
458
  "calibrated": true,
459
- "confidence": 0.9675,
460
  "confidence_threshold": 0.25,
461
  "label": "signup",
462
  "meets_threshold": true,
463
- "raw_confidence": 0.9135
464
  },
465
  "intent_type": {
466
  "calibrated": true,
467
- "confidence": 0.9416,
468
  "confidence_threshold": 0.4,
469
  "label": "transactional",
470
  "meets_threshold": true,
471
- "raw_confidence": 0.909
472
  },
473
  "overall_strategy": "min_required_component_confidence"
474
  },
475
- "confidence": 0.9176,
476
  "decision_phase": "action",
477
  "subtype": "signup",
478
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -517,7 +529,7 @@
517
  "model_output": {
518
  "classification": {
519
  "iab_content": {
520
- "mapping_confidence": 0.5455,
521
  "mapping_mode": "exact",
522
  "taxonomy": "IAB Content Taxonomy",
523
  "taxonomy_version": "3.0",
@@ -535,31 +547,31 @@
535
  "component_confidence": {
536
  "decision_phase": {
537
  "calibrated": true,
538
- "confidence": 0.9628,
539
  "confidence_threshold": 0.22,
540
  "label": "action",
541
  "meets_threshold": true,
542
- "raw_confidence": 0.9628
543
  },
544
  "intent_subtype": {
545
  "calibrated": true,
546
- "confidence": 0.7841,
547
  "confidence_threshold": 0.25,
548
  "label": "booking",
549
  "meets_threshold": true,
550
- "raw_confidence": 0.6676
551
  },
552
  "intent_type": {
553
  "calibrated": true,
554
- "confidence": 0.9761,
555
  "confidence_threshold": 0.4,
556
  "label": "transactional",
557
  "meets_threshold": true,
558
- "raw_confidence": 0.9583
559
  },
560
  "overall_strategy": "min_required_component_confidence"
561
  },
562
- "confidence": 0.7841,
563
  "decision_phase": "action",
564
  "subtype": "booking",
565
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
@@ -604,8 +616,8 @@
604
  "model_output": {
605
  "classification": {
606
  "iab_content": {
607
- "mapping_confidence": 0.3927,
608
- "mapping_mode": "nearest_equivalent",
609
  "taxonomy": "IAB Content Taxonomy",
610
  "taxonomy_version": "3.0",
611
  "tier1": {
@@ -618,31 +630,31 @@
618
  "component_confidence": {
619
  "decision_phase": {
620
  "calibrated": true,
621
- "confidence": 0.9539,
622
  "confidence_threshold": 0.22,
623
  "label": "post_purchase",
624
  "meets_threshold": true,
625
- "raw_confidence": 0.9539
626
  },
627
  "intent_subtype": {
628
  "calibrated": true,
629
- "confidence": 0.9652,
630
  "confidence_threshold": 0.25,
631
  "label": "onboarding_setup",
632
  "meets_threshold": true,
633
- "raw_confidence": 0.9053
634
  },
635
  "intent_type": {
636
  "calibrated": true,
637
- "confidence": 0.7786,
638
  "confidence_threshold": 0.4,
639
  "label": "transactional",
640
  "meets_threshold": true,
641
- "raw_confidence": 0.7173
642
  },
643
  "overall_strategy": "min_required_component_confidence"
644
  },
645
- "confidence": 0.7786,
646
  "decision_phase": "post_purchase",
647
  "subtype": "onboarding_setup",
648
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
@@ -687,13 +699,21 @@
687
  "model_output": {
688
  "classification": {
689
  "iab_content": {
690
- "mapping_confidence": 0.2935,
691
- "mapping_mode": "nearest_equivalent",
692
  "taxonomy": "IAB Content Taxonomy",
693
  "taxonomy_version": "3.0",
694
  "tier1": {
695
  "id": "52",
696
  "label": "Business and Finance"
 
 
 
 
 
 
 
 
697
  }
698
  },
699
  "intent": {
@@ -701,31 +721,31 @@
701
  "component_confidence": {
702
  "decision_phase": {
703
  "calibrated": true,
704
- "confidence": 0.9528,
705
  "confidence_threshold": 0.22,
706
  "label": "support",
707
  "meets_threshold": true,
708
- "raw_confidence": 0.9528
709
  },
710
  "intent_subtype": {
711
  "calibrated": true,
712
- "confidence": 0.894,
713
  "confidence_threshold": 0.25,
714
  "label": "account_help",
715
  "meets_threshold": true,
716
- "raw_confidence": 0.8027
717
  },
718
  "intent_type": {
719
  "calibrated": true,
720
- "confidence": 0.9636,
721
  "confidence_threshold": 0.4,
722
  "label": "support",
723
  "meets_threshold": true,
724
- "raw_confidence": 0.9403
725
  },
726
  "overall_strategy": "min_required_component_confidence"
727
  },
728
- "confidence": 0.894,
729
  "decision_phase": "support",
730
  "subtype": "account_help",
731
  "summary": "Classified as support intent with subtype account_help in the support phase.",
@@ -776,13 +796,13 @@
776
  "model_output": {
777
  "classification": {
778
  "iab_content": {
779
- "mapping_confidence": 0.1521,
780
- "mapping_mode": "nearest_equivalent",
781
  "taxonomy": "IAB Content Taxonomy",
782
  "taxonomy_version": "3.0",
783
  "tier1": {
784
- "id": "v9i3On",
785
- "label": "Sensitive Topics"
786
  }
787
  },
788
  "intent": {
@@ -790,31 +810,31 @@
790
  "component_confidence": {
791
  "decision_phase": {
792
  "calibrated": true,
793
- "confidence": 0.843,
794
  "confidence_threshold": 0.22,
795
  "label": "awareness",
796
  "meets_threshold": true,
797
- "raw_confidence": 0.843
798
  },
799
  "intent_subtype": {
800
  "calibrated": true,
801
- "confidence": 0.9678,
802
  "confidence_threshold": 0.25,
803
  "label": "emotional_reflection",
804
  "meets_threshold": true,
805
- "raw_confidence": 0.9123
806
  },
807
  "intent_type": {
808
  "calibrated": true,
809
- "confidence": 0.929,
810
  "confidence_threshold": 0.4,
811
  "label": "personal_reflection",
812
  "meets_threshold": true,
813
- "raw_confidence": 0.8937
814
  },
815
  "overall_strategy": "min_required_component_confidence"
816
  },
817
- "confidence": 0.843,
818
  "decision_phase": "awareness",
819
  "subtype": "emotional_reflection",
820
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
@@ -865,8 +885,8 @@
865
  "model_output": {
866
  "classification": {
867
  "iab_content": {
868
- "mapping_confidence": 0.0846,
869
- "mapping_mode": "nearest_equivalent",
870
  "taxonomy": "IAB Content Taxonomy",
871
  "taxonomy_version": "3.0",
872
  "tier1": {
@@ -879,31 +899,31 @@
879
  "component_confidence": {
880
  "decision_phase": {
881
  "calibrated": true,
882
- "confidence": 0.8336,
883
  "confidence_threshold": 0.22,
884
  "label": "research",
885
  "meets_threshold": true,
886
- "raw_confidence": 0.8336
887
  },
888
  "intent_subtype": {
889
  "calibrated": true,
890
- "confidence": 0.9685,
891
  "confidence_threshold": 0.25,
892
  "label": "follow_up",
893
  "meets_threshold": true,
894
- "raw_confidence": 0.9121
895
  },
896
  "intent_type": {
897
  "calibrated": true,
898
- "confidence": 0.9583,
899
  "confidence_threshold": 0.4,
900
  "label": "ambiguous",
901
  "meets_threshold": true,
902
- "raw_confidence": 0.9339
903
  },
904
  "overall_strategy": "min_required_component_confidence"
905
  },
906
- "confidence": 0.8336,
907
  "decision_phase": "research",
908
  "subtype": "follow_up",
909
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -954,13 +974,17 @@
954
  "model_output": {
955
  "classification": {
956
  "iab_content": {
957
- "mapping_confidence": 0.2223,
958
- "mapping_mode": "nearest_equivalent",
959
  "taxonomy": "IAB Content Taxonomy",
960
  "taxonomy_version": "3.0",
961
  "tier1": {
962
  "id": "391",
963
  "label": "Personal Finance"
 
 
 
 
964
  }
965
  },
966
  "intent": {
@@ -968,31 +992,31 @@
968
  "component_confidence": {
969
  "decision_phase": {
970
  "calibrated": true,
971
- "confidence": 0.9337,
972
  "confidence_threshold": 0.22,
973
  "label": "research",
974
  "meets_threshold": true,
975
- "raw_confidence": 0.9337
976
  },
977
  "intent_subtype": {
978
  "calibrated": true,
979
- "confidence": 0.9493,
980
  "confidence_threshold": 0.25,
981
  "label": "follow_up",
982
  "meets_threshold": true,
983
- "raw_confidence": 0.875
984
  },
985
  "intent_type": {
986
  "calibrated": true,
987
- "confidence": 0.9454,
988
  "confidence_threshold": 0.4,
989
  "label": "ambiguous",
990
  "meets_threshold": true,
991
- "raw_confidence": 0.9149
992
  },
993
  "overall_strategy": "min_required_component_confidence"
994
  },
995
- "confidence": 0.9337,
996
  "decision_phase": "research",
997
  "subtype": "follow_up",
998
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -1043,17 +1067,13 @@
1043
  "model_output": {
1044
  "classification": {
1045
  "iab_content": {
1046
- "mapping_confidence": 0.1233,
1047
- "mapping_mode": "exact",
1048
  "taxonomy": "IAB Content Taxonomy",
1049
  "taxonomy_version": "3.0",
1050
  "tier1": {
1051
- "id": "239",
1052
- "label": "Hobbies & Interests"
1053
- },
1054
- "tier2": {
1055
- "id": "264",
1056
- "label": "Content Production"
1057
  }
1058
  },
1059
  "intent": {
@@ -1061,31 +1081,31 @@
1061
  "component_confidence": {
1062
  "decision_phase": {
1063
  "calibrated": true,
1064
- "confidence": 0.9632,
1065
  "confidence_threshold": 0.22,
1066
  "label": "action",
1067
  "meets_threshold": true,
1068
- "raw_confidence": 0.9632
1069
  },
1070
  "intent_subtype": {
1071
  "calibrated": true,
1072
- "confidence": 0.9349,
1073
  "confidence_threshold": 0.25,
1074
  "label": "signup",
1075
  "meets_threshold": true,
1076
- "raw_confidence": 0.8563
1077
  },
1078
  "intent_type": {
1079
  "calibrated": true,
1080
- "confidence": 0.9378,
1081
  "confidence_threshold": 0.4,
1082
  "label": "transactional",
1083
  "meets_threshold": true,
1084
- "raw_confidence": 0.9047
1085
  },
1086
  "overall_strategy": "min_required_component_confidence"
1087
  },
1088
- "confidence": 0.9349,
1089
  "decision_phase": "action",
1090
  "subtype": "signup",
1091
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -1130,13 +1150,17 @@
1130
  "model_output": {
1131
  "classification": {
1132
  "iab_content": {
1133
- "mapping_confidence": 0.2185,
1134
- "mapping_mode": "nearest_equivalent",
1135
  "taxonomy": "IAB Content Taxonomy",
1136
  "taxonomy_version": "3.0",
1137
  "tier1": {
1138
- "id": "52",
1139
- "label": "Business and Finance"
 
 
 
 
1140
  }
1141
  },
1142
  "intent": {
@@ -1144,31 +1168,31 @@
1144
  "component_confidence": {
1145
  "decision_phase": {
1146
  "calibrated": true,
1147
- "confidence": 0.9572,
1148
  "confidence_threshold": 0.22,
1149
  "label": "consideration",
1150
  "meets_threshold": true,
1151
- "raw_confidence": 0.9572
1152
  },
1153
  "intent_subtype": {
1154
  "calibrated": true,
1155
- "confidence": 0.9432,
1156
  "confidence_threshold": 0.25,
1157
  "label": "comparison",
1158
  "meets_threshold": true,
1159
- "raw_confidence": 0.8708
1160
  },
1161
  "intent_type": {
1162
  "calibrated": true,
1163
- "confidence": 0.9622,
1164
  "confidence_threshold": 0.4,
1165
  "label": "commercial",
1166
  "meets_threshold": true,
1167
- "raw_confidence": 0.9374
1168
  },
1169
  "overall_strategy": "min_required_component_confidence"
1170
  },
1171
- "confidence": 0.9432,
1172
  "decision_phase": "consideration",
1173
  "subtype": "comparison",
1174
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -1213,21 +1237,13 @@
1213
  "model_output": {
1214
  "classification": {
1215
  "iab_content": {
1216
- "mapping_confidence": 0.1513,
1217
  "mapping_mode": "exact",
1218
  "taxonomy": "IAB Content Taxonomy",
1219
  "taxonomy_version": "3.0",
1220
  "tier1": {
1221
  "id": "596",
1222
  "label": "Technology & Computing"
1223
- },
1224
- "tier2": {
1225
- "id": "599",
1226
- "label": "Computing"
1227
- },
1228
- "tier3": {
1229
- "id": "618",
1230
- "label": "Information and Network Security"
1231
  }
1232
  },
1233
  "intent": {
@@ -1235,31 +1251,31 @@
1235
  "component_confidence": {
1236
  "decision_phase": {
1237
  "calibrated": true,
1238
- "confidence": 0.9609,
1239
  "confidence_threshold": 0.22,
1240
  "label": "awareness",
1241
  "meets_threshold": true,
1242
- "raw_confidence": 0.961
1243
  },
1244
  "intent_subtype": {
1245
  "calibrated": true,
1246
- "confidence": 0.9867,
1247
  "confidence_threshold": 0.25,
1248
  "label": "education",
1249
  "meets_threshold": true,
1250
- "raw_confidence": 0.9556
1251
  },
1252
  "intent_type": {
1253
  "calibrated": true,
1254
- "confidence": 0.975,
1255
  "confidence_threshold": 0.4,
1256
  "label": "informational",
1257
  "meets_threshold": true,
1258
- "raw_confidence": 0.9567
1259
  },
1260
  "overall_strategy": "min_required_component_confidence"
1261
  },
1262
- "confidence": 0.9609,
1263
  "decision_phase": "awareness",
1264
  "subtype": "education",
1265
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
11
  "model_output": {
12
  "classification": {
13
  "iab_content": {
14
+ "mapping_confidence": 0.5429,
15
+ "mapping_mode": "exact",
16
  "taxonomy": "IAB Content Taxonomy",
17
  "taxonomy_version": "3.0",
18
  "tier1": {
 
22
  "tier2": {
23
  "id": "599",
24
  "label": "Computing"
25
+ },
26
+ "tier3": {
27
+ "id": "602",
28
+ "label": "Software and Applications"
29
  }
30
  },
31
  "intent": {
 
33
  "component_confidence": {
34
  "decision_phase": {
35
  "calibrated": true,
36
+ "confidence": 0.962,
37
  "confidence_threshold": 0.22,
38
  "label": "awareness",
39
  "meets_threshold": true,
40
+ "raw_confidence": 0.9633
41
  },
42
  "intent_subtype": {
43
  "calibrated": true,
44
+ "confidence": 0.9805,
45
  "confidence_threshold": 0.25,
46
  "label": "education",
47
  "meets_threshold": true,
48
+ "raw_confidence": 0.9549
49
  },
50
  "intent_type": {
51
  "calibrated": true,
52
+ "confidence": 0.9817,
53
  "confidence_threshold": 0.4,
54
  "label": "informational",
55
  "meets_threshold": true,
56
+ "raw_confidence": 0.9658
57
  },
58
  "overall_strategy": "min_required_component_confidence"
59
  },
60
+ "confidence": 0.962,
61
  "decision_phase": "awareness",
62
  "subtype": "education",
63
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
102
  "model_output": {
103
  "classification": {
104
  "iab_content": {
105
+ "mapping_confidence": 0.4784,
106
+ "mapping_mode": "exact",
107
  "taxonomy": "IAB Content Taxonomy",
108
  "taxonomy_version": "3.0",
109
  "tier1": {
 
116
  "component_confidence": {
117
  "decision_phase": {
118
  "calibrated": true,
119
+ "confidence": 0.9277,
120
  "confidence_threshold": 0.22,
121
  "label": "awareness",
122
  "meets_threshold": true,
123
+ "raw_confidence": 0.9297
124
  },
125
  "intent_subtype": {
126
  "calibrated": true,
127
+ "confidence": 0.9749,
128
  "confidence_threshold": 0.25,
129
  "label": "education",
130
  "meets_threshold": true,
131
+ "raw_confidence": 0.9445
132
  },
133
  "intent_type": {
134
  "calibrated": true,
135
+ "confidence": 0.9797,
136
  "confidence_threshold": 0.4,
137
  "label": "informational",
138
  "meets_threshold": true,
139
+ "raw_confidence": 0.9626
140
  },
141
  "overall_strategy": "min_required_component_confidence"
142
  },
143
+ "confidence": 0.9277,
144
  "decision_phase": "awareness",
145
  "subtype": "education",
146
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
185
  "model_output": {
186
  "classification": {
187
  "iab_content": {
188
+ "mapping_confidence": 0.2179,
189
+ "mapping_mode": "exact",
190
  "taxonomy": "IAB Content Taxonomy",
191
  "taxonomy_version": "3.0",
192
  "tier1": {
193
  "id": "483",
194
  "label": "Sports"
195
+ },
196
+ "tier2": {
197
+ "id": "496",
198
+ "label": "Equine Sports"
199
  }
200
  },
201
  "intent": {
 
203
  "component_confidence": {
204
  "decision_phase": {
205
  "calibrated": true,
206
+ "confidence": 0.9444,
207
  "confidence_threshold": 0.22,
208
  "label": "consideration",
209
  "meets_threshold": true,
210
+ "raw_confidence": 0.9461
211
  },
212
  "intent_subtype": {
213
  "calibrated": true,
214
+ "confidence": 0.4804,
215
  "confidence_threshold": 0.25,
216
  "label": "comparison",
217
  "meets_threshold": true,
218
+ "raw_confidence": 0.4327
219
  },
220
  "intent_type": {
221
  "calibrated": true,
222
+ "confidence": 0.981,
223
  "confidence_threshold": 0.4,
224
  "label": "commercial",
225
  "meets_threshold": true,
226
+ "raw_confidence": 0.9653
227
  },
228
  "overall_strategy": "min_required_component_confidence"
229
  },
230
+ "confidence": 0.4804,
231
  "decision_phase": "consideration",
232
  "subtype": "comparison",
233
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
272
  "model_output": {
273
  "classification": {
274
  "iab_content": {
275
+ "mapping_confidence": 0.3122,
276
+ "mapping_mode": "exact",
277
  "taxonomy": "IAB Content Taxonomy",
278
  "taxonomy_version": "3.0",
279
  "tier1": {
280
  "id": "596",
281
  "label": "Technology & Computing"
282
+ },
283
+ "tier2": {
284
+ "id": "638",
285
+ "label": "Robotics"
286
  }
287
  },
288
  "intent": {
 
290
  "component_confidence": {
291
  "decision_phase": {
292
  "calibrated": true,
293
+ "confidence": 0.8858,
294
  "confidence_threshold": 0.22,
295
  "label": "consideration",
296
  "meets_threshold": true,
297
+ "raw_confidence": 0.8885
298
  },
299
  "intent_subtype": {
300
  "calibrated": true,
301
+ "confidence": 0.9538,
302
  "confidence_threshold": 0.25,
303
  "label": "comparison",
304
  "meets_threshold": true,
305
+ "raw_confidence": 0.9083
306
  },
307
  "intent_type": {
308
  "calibrated": true,
309
+ "confidence": 0.9676,
310
  "confidence_threshold": 0.4,
311
  "label": "commercial",
312
  "meets_threshold": true,
313
+ "raw_confidence": 0.9435
314
  },
315
  "overall_strategy": "min_required_component_confidence"
316
  },
317
+ "confidence": 0.8858,
318
  "decision_phase": "consideration",
319
  "subtype": "comparison",
320
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
359
  "model_output": {
360
  "classification": {
361
  "iab_content": {
362
+ "mapping_confidence": 0.5309,
363
+ "mapping_mode": "exact",
364
  "taxonomy": "IAB Content Taxonomy",
365
  "taxonomy_version": "3.0",
366
  "tier1": {
367
+ "id": "596",
368
+ "label": "Technology & Computing"
 
 
 
 
369
  }
370
  },
371
  "intent": {
 
373
  "component_confidence": {
374
  "decision_phase": {
375
  "calibrated": true,
376
+ "confidence": 0.6077,
377
  "confidence_threshold": 0.22,
378
  "label": "decision",
379
  "meets_threshold": true,
380
+ "raw_confidence": 0.6097
381
  },
382
  "intent_subtype": {
383
  "calibrated": true,
384
+ "confidence": 0.7801,
385
  "confidence_threshold": 0.25,
386
  "label": "provider_selection",
387
  "meets_threshold": true,
388
+ "raw_confidence": 0.6968
389
  },
390
  "intent_type": {
391
  "calibrated": true,
392
+ "confidence": 0.9843,
393
  "confidence_threshold": 0.4,
394
  "label": "commercial",
395
  "meets_threshold": true,
396
+ "raw_confidence": 0.9703
397
  },
398
  "overall_strategy": "min_required_component_confidence"
399
  },
400
+ "confidence": 0.6077,
401
  "decision_phase": "decision",
402
  "subtype": "provider_selection",
403
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
 
442
  "model_output": {
443
  "classification": {
444
  "iab_content": {
445
+ "mapping_confidence": 0.2299,
446
+ "mapping_mode": "exact",
447
  "taxonomy": "IAB Content Taxonomy",
448
  "taxonomy_version": "3.0",
449
  "tier1": {
450
+ "id": "v9i3On",
451
+ "label": "Sensitive Topics"
452
+ },
453
+ "tier2": {
454
+ "id": "XtODT3",
455
+ "label": "Crime & Harmful Acts to Individuals, Society & Human Right Violations"
456
  }
457
  },
458
  "intent": {
 
460
  "component_confidence": {
461
  "decision_phase": {
462
  "calibrated": true,
463
+ "confidence": 0.9662,
464
  "confidence_threshold": 0.22,
465
  "label": "action",
466
  "meets_threshold": true,
467
+ "raw_confidence": 0.9674
468
  },
469
  "intent_subtype": {
470
  "calibrated": true,
471
+ "confidence": 0.9473,
472
  "confidence_threshold": 0.25,
473
  "label": "signup",
474
  "meets_threshold": true,
475
+ "raw_confidence": 0.8993
476
  },
477
  "intent_type": {
478
  "calibrated": true,
479
+ "confidence": 0.9788,
480
  "confidence_threshold": 0.4,
481
  "label": "transactional",
482
  "meets_threshold": true,
483
+ "raw_confidence": 0.9614
484
  },
485
  "overall_strategy": "min_required_component_confidence"
486
  },
487
+ "confidence": 0.9473,
488
  "decision_phase": "action",
489
  "subtype": "signup",
490
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
529
  "model_output": {
530
  "classification": {
531
  "iab_content": {
532
+ "mapping_confidence": 0.8304,
533
  "mapping_mode": "exact",
534
  "taxonomy": "IAB Content Taxonomy",
535
  "taxonomy_version": "3.0",
 
547
  "component_confidence": {
548
  "decision_phase": {
549
  "calibrated": true,
550
+ "confidence": 0.9595,
551
  "confidence_threshold": 0.22,
552
  "label": "action",
553
  "meets_threshold": true,
554
+ "raw_confidence": 0.9608
555
  },
556
  "intent_subtype": {
557
  "calibrated": true,
558
+ "confidence": 0.8434,
559
  "confidence_threshold": 0.25,
560
  "label": "booking",
561
  "meets_threshold": true,
562
+ "raw_confidence": 0.7616
563
  },
564
  "intent_type": {
565
  "calibrated": true,
566
+ "confidence": 0.9805,
567
  "confidence_threshold": 0.4,
568
  "label": "transactional",
569
  "meets_threshold": true,
570
+ "raw_confidence": 0.9649
571
  },
572
  "overall_strategy": "min_required_component_confidence"
573
  },
574
+ "confidence": 0.8434,
575
  "decision_phase": "action",
576
  "subtype": "booking",
577
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
 
616
  "model_output": {
617
  "classification": {
618
  "iab_content": {
619
+ "mapping_confidence": 0.5261,
620
+ "mapping_mode": "exact",
621
  "taxonomy": "IAB Content Taxonomy",
622
  "taxonomy_version": "3.0",
623
  "tier1": {
 
630
  "component_confidence": {
631
  "decision_phase": {
632
  "calibrated": true,
633
+ "confidence": 0.9573,
634
  "confidence_threshold": 0.22,
635
  "label": "post_purchase",
636
  "meets_threshold": true,
637
+ "raw_confidence": 0.9587
638
  },
639
  "intent_subtype": {
640
  "calibrated": true,
641
+ "confidence": 0.967,
642
  "confidence_threshold": 0.25,
643
  "label": "onboarding_setup",
644
  "meets_threshold": true,
645
+ "raw_confidence": 0.9306
646
  },
647
  "intent_type": {
648
  "calibrated": true,
649
+ "confidence": 0.5834,
650
  "confidence_threshold": 0.4,
651
  "label": "transactional",
652
  "meets_threshold": true,
653
+ "raw_confidence": 0.5253
654
  },
655
  "overall_strategy": "min_required_component_confidence"
656
  },
657
+ "confidence": 0.5834,
658
  "decision_phase": "post_purchase",
659
  "subtype": "onboarding_setup",
660
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
 
699
  "model_output": {
700
  "classification": {
701
  "iab_content": {
702
+ "mapping_confidence": 0.272,
703
+ "mapping_mode": "exact",
704
  "taxonomy": "IAB Content Taxonomy",
705
  "taxonomy_version": "3.0",
706
  "tier1": {
707
  "id": "52",
708
  "label": "Business and Finance"
709
+ },
710
+ "tier2": {
711
+ "id": "53",
712
+ "label": "Business"
713
+ },
714
+ "tier3": {
715
+ "id": "72",
716
+ "label": "Business I.T."
717
  }
718
  },
719
  "intent": {
 
721
  "component_confidence": {
722
  "decision_phase": {
723
  "calibrated": true,
724
+ "confidence": 0.9589,
725
  "confidence_threshold": 0.22,
726
  "label": "support",
727
  "meets_threshold": true,
728
+ "raw_confidence": 0.9603
729
  },
730
  "intent_subtype": {
731
  "calibrated": true,
732
+ "confidence": 0.8859,
733
  "confidence_threshold": 0.25,
734
  "label": "account_help",
735
  "meets_threshold": true,
736
+ "raw_confidence": 0.8147
737
  },
738
  "intent_type": {
739
  "calibrated": true,
740
+ "confidence": 0.9699,
741
  "confidence_threshold": 0.4,
742
  "label": "support",
743
  "meets_threshold": true,
744
+ "raw_confidence": 0.9476
745
  },
746
  "overall_strategy": "min_required_component_confidence"
747
  },
748
+ "confidence": 0.8859,
749
  "decision_phase": "support",
750
  "subtype": "account_help",
751
  "summary": "Classified as support intent with subtype account_help in the support phase.",
 
796
  "model_output": {
797
  "classification": {
798
  "iab_content": {
799
+ "mapping_confidence": 0.7892,
800
+ "mapping_mode": "exact",
801
  "taxonomy": "IAB Content Taxonomy",
802
  "taxonomy_version": "3.0",
803
  "tier1": {
804
+ "id": "186",
805
+ "label": "Family and Relationships"
806
  }
807
  },
808
  "intent": {
 
810
  "component_confidence": {
811
  "decision_phase": {
812
  "calibrated": true,
813
+ "confidence": 0.9219,
814
  "confidence_threshold": 0.22,
815
  "label": "awareness",
816
  "meets_threshold": true,
817
+ "raw_confidence": 0.9239
818
  },
819
  "intent_subtype": {
820
  "calibrated": true,
821
+ "confidence": 0.9492,
822
  "confidence_threshold": 0.25,
823
  "label": "emotional_reflection",
824
  "meets_threshold": true,
825
+ "raw_confidence": 0.9021
826
  },
827
  "intent_type": {
828
  "calibrated": true,
829
+ "confidence": 0.9388,
830
  "confidence_threshold": 0.4,
831
  "label": "personal_reflection",
832
  "meets_threshold": true,
833
+ "raw_confidence": 0.9059
834
  },
835
  "overall_strategy": "min_required_component_confidence"
836
  },
837
+ "confidence": 0.9219,
838
  "decision_phase": "awareness",
839
  "subtype": "emotional_reflection",
840
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
 
885
  "model_output": {
886
  "classification": {
887
  "iab_content": {
888
+ "mapping_confidence": 0.2238,
889
+ "mapping_mode": "exact",
890
  "taxonomy": "IAB Content Taxonomy",
891
  "taxonomy_version": "3.0",
892
  "tier1": {
 
899
  "component_confidence": {
900
  "decision_phase": {
901
  "calibrated": true,
902
+ "confidence": 0.8763,
903
  "confidence_threshold": 0.22,
904
  "label": "research",
905
  "meets_threshold": true,
906
+ "raw_confidence": 0.8791
907
  },
908
  "intent_subtype": {
909
  "calibrated": true,
910
+ "confidence": 0.9683,
911
  "confidence_threshold": 0.25,
912
  "label": "follow_up",
913
  "meets_threshold": true,
914
+ "raw_confidence": 0.9314
915
  },
916
  "intent_type": {
917
  "calibrated": true,
918
+ "confidence": 0.9623,
919
  "confidence_threshold": 0.4,
920
  "label": "ambiguous",
921
  "meets_threshold": true,
922
+ "raw_confidence": 0.9367
923
  },
924
  "overall_strategy": "min_required_component_confidence"
925
  },
926
+ "confidence": 0.8763,
927
  "decision_phase": "research",
928
  "subtype": "follow_up",
929
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
974
  "model_output": {
975
  "classification": {
976
  "iab_content": {
977
+ "mapping_confidence": 0.2371,
978
+ "mapping_mode": "exact",
979
  "taxonomy": "IAB Content Taxonomy",
980
  "taxonomy_version": "3.0",
981
  "tier1": {
982
  "id": "391",
983
  "label": "Personal Finance"
984
+ },
985
+ "tier2": {
986
+ "id": "396",
987
+ "label": "Financial Planning"
988
  }
989
  },
990
  "intent": {
 
992
  "component_confidence": {
993
  "decision_phase": {
994
  "calibrated": true,
995
+ "confidence": 0.9225,
996
  "confidence_threshold": 0.22,
997
  "label": "research",
998
  "meets_threshold": true,
999
+ "raw_confidence": 0.9246
1000
  },
1001
  "intent_subtype": {
1002
  "calibrated": true,
1003
+ "confidence": 0.9586,
1004
  "confidence_threshold": 0.25,
1005
  "label": "follow_up",
1006
  "meets_threshold": true,
1007
+ "raw_confidence": 0.9146
1008
  },
1009
  "intent_type": {
1010
  "calibrated": true,
1011
+ "confidence": 0.9488,
1012
  "confidence_threshold": 0.4,
1013
  "label": "ambiguous",
1014
  "meets_threshold": true,
1015
+ "raw_confidence": 0.9179
1016
  },
1017
  "overall_strategy": "min_required_component_confidence"
1018
  },
1019
+ "confidence": 0.9225,
1020
  "decision_phase": "research",
1021
  "subtype": "follow_up",
1022
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
1067
  "model_output": {
1068
  "classification": {
1069
  "iab_content": {
1070
+ "mapping_confidence": 0.2131,
1071
+ "mapping_mode": "nearest_equivalent",
1072
  "taxonomy": "IAB Content Taxonomy",
1073
  "taxonomy_version": "3.0",
1074
  "tier1": {
1075
+ "id": "42",
1076
+ "label": "Books and Literature"
 
 
 
 
1077
  }
1078
  },
1079
  "intent": {
 
1081
  "component_confidence": {
1082
  "decision_phase": {
1083
  "calibrated": true,
1084
+ "confidence": 0.9861,
1085
  "confidence_threshold": 0.22,
1086
  "label": "action",
1087
  "meets_threshold": true,
1088
+ "raw_confidence": 0.9867
1089
  },
1090
  "intent_subtype": {
1091
  "calibrated": true,
1092
+ "confidence": 0.7335,
1093
  "confidence_threshold": 0.25,
1094
  "label": "signup",
1095
  "meets_threshold": true,
1096
+ "raw_confidence": 0.6454
1097
  },
1098
  "intent_type": {
1099
  "calibrated": true,
1100
+ "confidence": 0.9628,
1101
  "confidence_threshold": 0.4,
1102
  "label": "transactional",
1103
  "meets_threshold": true,
1104
+ "raw_confidence": 0.938
1105
  },
1106
  "overall_strategy": "min_required_component_confidence"
1107
  },
1108
+ "confidence": 0.7335,
1109
  "decision_phase": "action",
1110
  "subtype": "signup",
1111
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
1150
  "model_output": {
1151
  "classification": {
1152
  "iab_content": {
1153
+ "mapping_confidence": 0.3327,
1154
+ "mapping_mode": "exact",
1155
  "taxonomy": "IAB Content Taxonomy",
1156
  "taxonomy_version": "3.0",
1157
  "tier1": {
1158
+ "id": "596",
1159
+ "label": "Technology & Computing"
1160
+ },
1161
+ "tier2": {
1162
+ "id": "639",
1163
+ "label": "Virtual Reality"
1164
  }
1165
  },
1166
  "intent": {
 
1168
  "component_confidence": {
1169
  "decision_phase": {
1170
  "calibrated": true,
1171
+ "confidence": 0.9295,
1172
  "confidence_threshold": 0.22,
1173
  "label": "consideration",
1174
  "meets_threshold": true,
1175
+ "raw_confidence": 0.9315
1176
  },
1177
  "intent_subtype": {
1178
  "calibrated": true,
1179
+ "confidence": 0.9374,
1180
  "confidence_threshold": 0.25,
1181
  "label": "comparison",
1182
  "meets_threshold": true,
1183
+ "raw_confidence": 0.8838
1184
  },
1185
  "intent_type": {
1186
  "calibrated": true,
1187
+ "confidence": 0.9602,
1188
  "confidence_threshold": 0.4,
1189
  "label": "commercial",
1190
  "meets_threshold": true,
1191
+ "raw_confidence": 0.9329
1192
  },
1193
  "overall_strategy": "min_required_component_confidence"
1194
  },
1195
+ "confidence": 0.9295,
1196
  "decision_phase": "consideration",
1197
  "subtype": "comparison",
1198
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
1237
  "model_output": {
1238
  "classification": {
1239
  "iab_content": {
1240
+ "mapping_confidence": 0.3227,
1241
  "mapping_mode": "exact",
1242
  "taxonomy": "IAB Content Taxonomy",
1243
  "taxonomy_version": "3.0",
1244
  "tier1": {
1245
  "id": "596",
1246
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
1247
  }
1248
  },
1249
  "intent": {
 
1251
  "component_confidence": {
1252
  "decision_phase": {
1253
  "calibrated": true,
1254
+ "confidence": 0.9535,
1255
  "confidence_threshold": 0.22,
1256
  "label": "awareness",
1257
  "meets_threshold": true,
1258
+ "raw_confidence": 0.955
1259
  },
1260
  "intent_subtype": {
1261
  "calibrated": true,
1262
+ "confidence": 0.9793,
1263
  "confidence_threshold": 0.25,
1264
  "label": "education",
1265
  "meets_threshold": true,
1266
+ "raw_confidence": 0.9527
1267
  },
1268
  "intent_type": {
1269
  "calibrated": true,
1270
+ "confidence": 0.9769,
1271
  "confidence_threshold": 0.4,
1272
  "label": "informational",
1273
  "meets_threshold": true,
1274
+ "raw_confidence": 0.9584
1275
  },
1276
  "overall_strategy": "min_required_component_confidence"
1277
  },
1278
+ "confidence": 0.9535,
1279
  "decision_phase": "awareness",
1280
  "subtype": "education",
1281
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,14,1,0,0,0,0,0
3
  research,0,15,0,0,0,0,0
4
- consideration,0,1,14,0,0,0,0
5
- decision,0,0,0,15,0,0,0
6
- action,0,0,0,0,15,0,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,14,1,0,0,0,0,0
3
  research,0,15,0,0,0,0,0
4
+ consideration,0,2,13,0,0,0,0
5
+ decision,0,1,0,14,0,0,0
6
+ action,0,0,0,1,14,0,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "accepted_accuracy": 0.981,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.981,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
@@ -15,12 +15,12 @@
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.9714,
19
  "accepted_coverage": 1.0,
20
- "accuracy": 0.9714,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
- "macro_f1": 0.9711
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
@@ -33,13 +33,13 @@
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
- "macro_f1": 0.9812,
37
  "per_class_metrics": {
38
- "accuracy": 0.9809523809523809,
39
  "action": {
40
- "f1-score": 1.0,
41
  "precision": 1.0,
42
- "recall": 1.0,
43
  "support": 15.0
44
  },
45
  "awareness": {
@@ -49,21 +49,21 @@
49
  "support": 15.0
50
  },
51
  "consideration": {
52
- "f1-score": 0.9655172413793104,
53
  "precision": 1.0,
54
- "recall": 0.9333333333333333,
55
  "support": 15.0
56
  },
57
  "decision": {
58
- "f1-score": 1.0,
59
- "precision": 1.0,
60
- "recall": 1.0,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
- "f1-score": 0.9812192118226601,
65
- "precision": 0.9831932773109244,
66
- "recall": 0.980952380952381,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
@@ -73,8 +73,8 @@
73
  "support": 15.0
74
  },
75
  "research": {
76
- "f1-score": 0.9375,
77
- "precision": 0.8823529411764706,
78
  "recall": 1.0,
79
  "support": 15.0
80
  },
@@ -85,9 +85,9 @@
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
- "f1-score": 0.9812192118226601,
89
- "precision": 0.9831932773109243,
90
- "recall": 0.9809523809523809,
91
  "support": 105.0
92
  }
93
  },
 
1
  {
2
+ "accepted_accuracy": 0.9524,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9524,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
 
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.8857,
19
  "accepted_coverage": 1.0,
20
+ "accuracy": 0.8857,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
+ "macro_f1": 0.8908
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
 
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
+ "macro_f1": 0.9536,
37
  "per_class_metrics": {
38
+ "accuracy": 0.9523809523809523,
39
  "action": {
40
+ "f1-score": 0.9655172413793104,
41
  "precision": 1.0,
42
+ "recall": 0.9333333333333333,
43
  "support": 15.0
44
  },
45
  "awareness": {
 
49
  "support": 15.0
50
  },
51
  "consideration": {
52
+ "f1-score": 0.9285714285714286,
53
  "precision": 1.0,
54
+ "recall": 0.8666666666666667,
55
  "support": 15.0
56
  },
57
  "decision": {
58
+ "f1-score": 0.9333333333333333,
59
+ "precision": 0.9333333333333333,
60
+ "recall": 0.9333333333333333,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
+ "f1-score": 0.9536131694056934,
65
+ "precision": 0.9604010025062657,
66
+ "recall": 0.9523809523809524,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
 
73
  "support": 15.0
74
  },
75
  "research": {
76
+ "f1-score": 0.8823529411764706,
77
+ "precision": 0.7894736842105263,
78
  "recall": 1.0,
79
  "support": 15.0
80
  },
 
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
+ "f1-score": 0.9536131694056934,
89
+ "precision": 0.9604010025062656,
90
+ "recall": 0.9523809523809523,
91
  "support": 105.0
92
  }
93
  },
artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv CHANGED
@@ -1,7 +1,7 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
- research,2,2,0,0,0,0,0
4
- consideration,0,1,4,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,4,0
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
+ research,1,3,0,0,0,0,0
4
+ consideration,0,2,3,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,4,0
artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json CHANGED
@@ -7,7 +7,7 @@
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8823,
11
  "per_class_metrics": {
12
  "accuracy": 0.8888888888888888,
13
  "action": {
@@ -17,15 +17,15 @@
17
  "support": 0.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.8333333333333334,
21
- "precision": 0.7142857142857143,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.8888888888888888,
27
  "precision": 1.0,
28
- "recall": 0.8,
29
  "support": 5.0
30
  },
31
  "decision": {
@@ -35,9 +35,9 @@
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.7562358276643991,
39
- "precision": 0.7687074829931974,
40
- "recall": 0.7571428571428571,
41
  "support": 27.0
42
  },
43
  "post_purchase": {
@@ -47,9 +47,9 @@
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.5714285714285714,
51
- "precision": 0.6666666666666666,
52
- "recall": 0.5,
53
  "support": 4.0
54
  },
55
  "support": {
@@ -59,8 +59,8 @@
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8850676072898296,
63
- "precision": 0.8977072310405644,
64
  "recall": 0.8888888888888888,
65
  "support": 27.0
66
  }
 
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.8876,
11
  "per_class_metrics": {
12
  "accuracy": 0.8888888888888888,
13
  "action": {
 
17
  "support": 0.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.9090909090909091,
21
+ "precision": 0.8333333333333334,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.75,
27
  "precision": 1.0,
28
+ "recall": 0.6,
29
  "support": 5.0
30
  },
31
  "decision": {
 
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.7608225108225108,
39
+ "precision": 0.7761904761904762,
40
+ "recall": 0.7642857142857142,
41
  "support": 27.0
42
  },
43
  "post_purchase": {
 
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.6666666666666666,
51
+ "precision": 0.6,
52
+ "recall": 0.75,
53
  "support": 4.0
54
  },
55
  "support": {
 
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8874859708193041,
63
+ "precision": 0.9098765432098765,
64
  "recall": 0.8888888888888888,
65
  "support": 27.0
66
  }
artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv CHANGED
@@ -2,7 +2,7 @@
2
  awareness,3,0,0,0,0,0,0
3
  research,3,2,0,0,0,0,0
4
  consideration,0,2,3,0,0,0,0
5
- decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
  support,0,0,0,0,0,1,3
 
2
  awareness,3,0,0,0,0,0,0
3
  research,3,2,0,0,0,0,0
4
  consideration,0,2,3,0,0,0,0
5
+ decision,0,0,0,4,0,1,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
  support,0,0,0,0,0,1,3
artifacts/evaluation/latest/decision_phase_test_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.7931,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.7931,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv",
6
  "count": 29,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.801,
11
  "per_class_metrics": {
12
- "accuracy": 0.7931034482758621,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -29,20 +29,20 @@
29
  "support": 5.0
30
  },
31
  "decision": {
32
- "f1-score": 1.0,
33
  "precision": 1.0,
34
- "recall": 1.0,
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.8010204081632653,
39
- "precision": 0.8285714285714285,
40
- "recall": 0.8214285714285714,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 0.8888888888888888,
45
- "precision": 0.8,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
@@ -59,9 +59,9 @@
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.7915982484948002,
63
- "precision": 0.8344827586206897,
64
- "recall": 0.7931034482758621,
65
  "support": 29.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.7586,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.7586,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv",
6
  "count": 29,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.7724,
11
  "per_class_metrics": {
12
+ "accuracy": 0.7586206896551724,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
29
  "support": 5.0
30
  },
31
  "decision": {
32
+ "f1-score": 0.8888888888888888,
33
  "precision": 1.0,
34
+ "recall": 0.8,
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.7724489795918367,
39
+ "precision": 0.8095238095238095,
40
+ "recall": 0.7928571428571428,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 0.8,
45
+ "precision": 0.6666666666666666,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
 
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.7601806239737274,
63
+ "precision": 0.8160919540229885,
64
+ "recall": 0.7586206896551724,
65
  "support": 29.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
- research,1,13,1,0,0,0,0
4
- consideration,0,3,14,0,0,0,0
5
  decision,0,0,1,15,0,0,0
6
  action,0,0,0,0,10,0,0
7
- post_purchase,0,1,0,0,0,13,0
8
  support,0,0,0,0,0,0,14
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
+ research,1,14,0,0,0,0,0
4
+ consideration,0,4,13,0,0,0,0
5
  decision,0,0,1,15,0,0,0
6
  action,0,0,0,0,10,0,0
7
+ post_purchase,0,0,0,0,0,14,0
8
  support,0,0,0,0,0,0,14
artifacts/evaluation/latest/decision_phase_train_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9314,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9314,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.9373,
11
  "per_class_metrics": {
12
- "accuracy": 0.9313725490196079,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -23,9 +23,9 @@
23
  "support": 16.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.8484848484848485,
27
- "precision": 0.875,
28
- "recall": 0.8235294117647058,
29
  "support": 17.0
30
  },
31
  "decision": {
@@ -35,21 +35,21 @@
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.9373409595183789,
39
- "precision": 0.9401260504201681,
40
- "recall": 0.9366096438575431,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 0.9629629629629629,
45
  "precision": 1.0,
46
- "recall": 0.9285714285714286,
47
  "support": 14.0
48
  },
49
  "research": {
50
- "f1-score": 0.8125,
51
- "precision": 0.7647058823529411,
52
- "recall": 0.8666666666666667,
53
  "support": 15.0
54
  },
55
  "support": {
@@ -59,9 +59,9 @@
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.9322769253786015,
63
- "precision": 0.9353373702422145,
64
- "recall": 0.9313725490196079,
65
  "support": 102.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.9412,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9412,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.9464,
11
  "per_class_metrics": {
12
+ "accuracy": 0.9411764705882353,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
23
  "support": 16.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.8387096774193549,
27
+ "precision": 0.9285714285714286,
28
+ "recall": 0.7647058823529411,
29
  "support": 17.0
30
  },
31
  "decision": {
 
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.9463762044407206,
39
+ "precision": 0.9496465252767774,
40
+ "recall": 0.9479341736694679,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 1.0,
45
  "precision": 1.0,
46
+ "recall": 1.0,
47
  "support": 14.0
48
  },
49
  "research": {
50
+ "f1-score": 0.8484848484848485,
51
+ "precision": 0.7777777777777778,
52
+ "recall": 0.9333333333333333,
53
  "support": 15.0
54
  },
55
  "support": {
 
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.9410231345715216,
63
+ "precision": 0.946188279233262,
64
+ "recall": 0.9411764705882353,
65
  "support": 102.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
- research,2,2,0,0,0,0,0
4
  consideration,0,0,5,0,0,0,0
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,1,0,0,0,3,0
8
- support,0,0,0,0,0,0,4
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
+ research,1,3,0,0,0,0,0
4
  consideration,0,0,5,0,0,0,0
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,1,0,0,0,3,0
8
+ support,0,0,0,0,1,0,3
artifacts/evaluation/latest/decision_phase_val_report.json CHANGED
@@ -7,18 +7,18 @@
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8612,
11
  "per_class_metrics": {
12
  "accuracy": 0.8620689655172413,
13
  "action": {
14
- "f1-score": 1.0,
15
- "precision": 1.0,
16
  "recall": 1.0,
17
  "support": 3.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.8333333333333334,
21
- "precision": 0.7142857142857143,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
@@ -35,8 +35,8 @@
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.8611626468769326,
39
- "precision": 0.8877551020408163,
40
  "recall": 0.8571428571428571,
41
  "support": 29.0
42
  },
@@ -47,20 +47,20 @@
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.5714285714285714,
51
- "precision": 0.6666666666666666,
52
- "recall": 0.5,
53
  "support": 4.0
54
  },
55
  "support": {
56
- "f1-score": 1.0,
57
  "precision": 1.0,
58
- "recall": 1.0,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8570682191371846,
63
- "precision": 0.8760262725779967,
64
  "recall": 0.8620689655172413,
65
  "support": 29.0
66
  }
 
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.8567,
11
  "per_class_metrics": {
12
  "accuracy": 0.8620689655172413,
13
  "action": {
14
+ "f1-score": 0.8571428571428571,
15
+ "precision": 0.75,
16
  "recall": 1.0,
17
  "support": 3.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.9090909090909091,
21
+ "precision": 0.8333333333333334,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
 
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.8566790352504637,
39
+ "precision": 0.880952380952381,
40
  "recall": 0.8571428571428571,
41
  "support": 29.0
42
  },
 
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.75,
51
+ "precision": 0.75,
52
+ "recall": 0.75,
53
  "support": 4.0
54
  },
55
  "support": {
56
+ "f1-score": 0.8571428571428571,
57
  "precision": 1.0,
58
+ "recall": 0.75,
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8602776533811015,
63
+ "precision": 0.8821839080459771,
64
  "recall": 0.8620689655172413,
65
  "support": 29.0
66
  }
artifacts/evaluation/latest/iab_behavior_lock_regression.json CHANGED
@@ -13,7 +13,7 @@
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
@@ -24,6 +24,11 @@
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
  "mismatches": [
 
 
 
 
 
27
  {
28
  "actual": null,
29
  "expected": "Auto Type",
@@ -63,7 +68,7 @@
63
  },
64
  {
65
  "actual": {
66
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
67
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
68
  "model_output.classification.iab_content.tier2.label": null,
69
  "model_output.classification.iab_content.tier3.label": null
@@ -76,6 +81,11 @@
76
  },
77
  "id": "labtop-buying-maps-to-laptops",
78
  "mismatches": [
 
 
 
 
 
79
  {
80
  "actual": null,
81
  "expected": "Computing",
@@ -94,10 +104,10 @@
94
  },
95
  {
96
  "actual": {
97
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
98
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
99
  "model_output.classification.iab_content.tier2.label": "Computing",
100
- "model_output.classification.iab_content.tier3.label": null
101
  },
102
  "expected": {
103
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -108,9 +118,9 @@
108
  "id": "crm-awareness-maps-to-sales",
109
  "mismatches": [
110
  {
111
- "actual": null,
112
- "expected": "Software and Applications",
113
- "path": "model_output.classification.iab_content.tier3.label"
114
  }
115
  ],
116
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
@@ -120,9 +130,9 @@
120
  },
121
  {
122
  "actual": {
123
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
124
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
125
- "model_output.classification.iab_content.tier2.label": null,
126
  "model_output.classification.iab_content.tier3.label": null
127
  },
128
  "expected": {
@@ -134,7 +144,12 @@
134
  "id": "crm-comparison-maps-to-sales",
135
  "mismatches": [
136
  {
137
- "actual": null,
 
 
 
 
 
138
  "expected": "Computing",
139
  "path": "model_output.classification.iab_content.tier2.label"
140
  },
@@ -151,9 +166,9 @@
151
  },
152
  {
153
  "actual": {
154
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
155
  "model_output.classification.iab_content.tier1.label": "Careers",
156
- "model_output.classification.iab_content.tier2.label": null,
157
  "model_output.classification.iab_content.tier3.label": null
158
  },
159
  "expected": {
@@ -170,7 +185,12 @@
170
  "path": "model_output.classification.iab_content.tier1.label"
171
  },
172
  {
173
- "actual": null,
 
 
 
 
 
174
  "expected": "Computing",
175
  "path": "model_output.classification.iab_content.tier2.label"
176
  },
@@ -188,7 +208,7 @@
188
  {
189
  "actual": {
190
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
191
- "model_output.classification.iab_content.tier1.label": "Sports"
192
  },
193
  "expected": {
194
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -197,7 +217,7 @@
197
  "id": "ml-explanation-maps-to-ai",
198
  "mismatches": [
199
  {
200
- "actual": "Sports",
201
  "expected": "Technology & Computing",
202
  "path": "model_output.classification.iab_content.tier1.label"
203
  }
@@ -209,10 +229,10 @@
209
  },
210
  {
211
  "actual": {
212
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
213
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
214
  "model_output.classification.iab_content.tier2.label": "Computing",
215
- "model_output.classification.iab_content.tier3.label": null
216
  },
217
  "expected": {
218
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -223,7 +243,12 @@
223
  "id": "support-credential-help-maps-to-business-it",
224
  "mismatches": [
225
  {
226
- "actual": null,
 
 
 
 
 
227
  "expected": "Internet",
228
  "path": "model_output.classification.iab_content.tier3.label"
229
  }
@@ -259,9 +284,9 @@
259
  },
260
  {
261
  "actual": {
262
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
263
- "model_output.classification.iab_content.tier1.label": "Sports",
264
- "model_output.classification.iab_content.tier2.label": null,
265
  "model_output.classification.iab_content.tier3.label": null
266
  },
267
  "expected": {
@@ -273,12 +298,17 @@
273
  "id": "trial-signup-maps-to-software",
274
  "mismatches": [
275
  {
276
- "actual": "Sports",
277
  "expected": "Hobbies & Interests",
278
  "path": "model_output.classification.iab_content.tier1.label"
279
  },
280
  {
281
- "actual": null,
 
 
 
 
 
282
  "expected": "Content Production",
283
  "path": "model_output.classification.iab_content.tier2.label"
284
  },
 
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "exact",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
 
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
  "mismatches": [
27
+ {
28
+ "actual": "exact",
29
+ "expected": "nearest_equivalent",
30
+ "path": "model_output.classification.iab_content.mapping_mode"
31
+ },
32
  {
33
  "actual": null,
34
  "expected": "Auto Type",
 
68
  },
69
  {
70
  "actual": {
71
+ "model_output.classification.iab_content.mapping_mode": "exact",
72
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
73
  "model_output.classification.iab_content.tier2.label": null,
74
  "model_output.classification.iab_content.tier3.label": null
 
81
  },
82
  "id": "labtop-buying-maps-to-laptops",
83
  "mismatches": [
84
+ {
85
+ "actual": "exact",
86
+ "expected": "nearest_equivalent",
87
+ "path": "model_output.classification.iab_content.mapping_mode"
88
+ },
89
  {
90
  "actual": null,
91
  "expected": "Computing",
 
104
  },
105
  {
106
  "actual": {
107
+ "model_output.classification.iab_content.mapping_mode": "exact",
108
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
109
  "model_output.classification.iab_content.tier2.label": "Computing",
110
+ "model_output.classification.iab_content.tier3.label": "Software and Applications"
111
  },
112
  "expected": {
113
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
118
  "id": "crm-awareness-maps-to-sales",
119
  "mismatches": [
120
  {
121
+ "actual": "exact",
122
+ "expected": "nearest_equivalent",
123
+ "path": "model_output.classification.iab_content.mapping_mode"
124
  }
125
  ],
126
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
 
130
  },
131
  {
132
  "actual": {
133
+ "model_output.classification.iab_content.mapping_mode": "exact",
134
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
135
+ "model_output.classification.iab_content.tier2.label": "Robotics",
136
  "model_output.classification.iab_content.tier3.label": null
137
  },
138
  "expected": {
 
144
  "id": "crm-comparison-maps-to-sales",
145
  "mismatches": [
146
  {
147
+ "actual": "exact",
148
+ "expected": "nearest_equivalent",
149
+ "path": "model_output.classification.iab_content.mapping_mode"
150
+ },
151
+ {
152
+ "actual": "Robotics",
153
  "expected": "Computing",
154
  "path": "model_output.classification.iab_content.tier2.label"
155
  },
 
166
  },
167
  {
168
  "actual": {
169
+ "model_output.classification.iab_content.mapping_mode": "exact",
170
  "model_output.classification.iab_content.tier1.label": "Careers",
171
+ "model_output.classification.iab_content.tier2.label": "Job Search",
172
  "model_output.classification.iab_content.tier3.label": null
173
  },
174
  "expected": {
 
185
  "path": "model_output.classification.iab_content.tier1.label"
186
  },
187
  {
188
+ "actual": "exact",
189
+ "expected": "nearest_equivalent",
190
+ "path": "model_output.classification.iab_content.mapping_mode"
191
+ },
192
+ {
193
+ "actual": "Job Search",
194
  "expected": "Computing",
195
  "path": "model_output.classification.iab_content.tier2.label"
196
  },
 
208
  {
209
  "actual": {
210
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
211
+ "model_output.classification.iab_content.tier1.label": "Science"
212
  },
213
  "expected": {
214
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
217
  "id": "ml-explanation-maps-to-ai",
218
  "mismatches": [
219
  {
220
+ "actual": "Science",
221
  "expected": "Technology & Computing",
222
  "path": "model_output.classification.iab_content.tier1.label"
223
  }
 
229
  },
230
  {
231
  "actual": {
232
+ "model_output.classification.iab_content.mapping_mode": "exact",
233
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
234
  "model_output.classification.iab_content.tier2.label": "Computing",
235
+ "model_output.classification.iab_content.tier3.label": "Information and Network Security"
236
  },
237
  "expected": {
238
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
243
  "id": "support-credential-help-maps-to-business-it",
244
  "mismatches": [
245
  {
246
+ "actual": "exact",
247
+ "expected": "nearest_equivalent",
248
+ "path": "model_output.classification.iab_content.mapping_mode"
249
+ },
250
+ {
251
+ "actual": "Information and Network Security",
252
  "expected": "Internet",
253
  "path": "model_output.classification.iab_content.tier3.label"
254
  }
 
284
  },
285
  {
286
  "actual": {
287
+ "model_output.classification.iab_content.mapping_mode": "exact",
288
+ "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
289
+ "model_output.classification.iab_content.tier2.label": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
290
  "model_output.classification.iab_content.tier3.label": null
291
  },
292
  "expected": {
 
298
  "id": "trial-signup-maps-to-software",
299
  "mismatches": [
300
  {
301
+ "actual": "Sensitive Topics",
302
  "expected": "Hobbies & Interests",
303
  "path": "model_output.classification.iab_content.tier1.label"
304
  },
305
  {
306
+ "actual": "exact",
307
+ "expected": "nearest_equivalent",
308
+ "path": "model_output.classification.iab_content.mapping_mode"
309
+ },
310
+ {
311
+ "actual": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
312
  "expected": "Content Production",
313
  "path": "model_output.classification.iab_content.tier2.label"
314
  },
artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json CHANGED
@@ -1,90 +1,90 @@
1
  {
2
- "accepted_accuracy": 0.3,
3
- "accepted_coverage": 0.8889,
4
- "accuracy": 0.2667,
5
  "count": 90,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.28,
10
- "accepted_coverage": 0.8333,
11
- "accuracy": 0.2333,
12
  "count": 30,
13
- "fallback_rate": 0.1667,
14
- "macro_f1": 0.1556
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.3846,
18
- "accepted_coverage": 0.8667,
19
- "accuracy": 0.3333,
20
  "count": 30,
21
- "fallback_rate": 0.1333,
22
- "macro_f1": 0.2083
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.2414,
26
- "accepted_coverage": 0.9667,
27
- "accuracy": 0.2333,
28
  "count": 30,
29
- "fallback_rate": 0.0333,
30
- "macro_f1": 0.1458
31
  }
32
  },
33
- "fallback_rate": 0.1111,
34
  "head": "iab_content",
35
- "macro_f1": 0.1418,
36
  "primary_source": "supervised_classifier",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 1.9556,
40
  "error_buckets": {
41
- "exact_match": 24,
42
- "parent_safe_stop": 10,
43
- "right_tier1_wrong_tier2": 15,
44
- "wrong_deep_leaf": 12,
45
  "wrong_tier1": 29
46
  },
47
- "exact_path_accuracy": 0.2667,
48
- "parent_safe_accuracy": 0.4556,
49
  "tier1_accuracy": 0.6778,
50
- "tier2_accuracy": 0.4762,
51
- "tier3_accuracy": 0.2143,
52
- "tier4_accuracy": 0.0
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
- "average_prediction_depth": 1.9556,
57
  "error_buckets": {
58
- "exact_match": 24,
59
- "parent_safe_stop": 10,
60
- "right_tier1_wrong_tier2": 15,
61
- "wrong_deep_leaf": 12,
62
  "wrong_tier1": 29
63
  },
64
- "exact_path_accuracy": 0.2667,
65
- "parent_safe_accuracy": 0.4556,
66
  "tier1_accuracy": 0.6778,
67
- "tier2_accuracy": 0.4762,
68
- "tier3_accuracy": 0.2143,
69
- "tier4_accuracy": 0.0
70
  },
71
  "combined_path": {
72
- "average_prediction_depth": 1.9556,
73
  "error_buckets": {
74
- "exact_match": 24,
75
- "parent_safe_stop": 10,
76
- "right_tier1_wrong_tier2": 15,
77
- "wrong_deep_leaf": 12,
78
  "wrong_tier1": 29
79
  },
80
- "exact_path_accuracy": 0.2667,
81
- "fallback_overuse_count": 17,
82
- "fallback_rate": 0.1889,
83
- "parent_safe_accuracy": 0.4556,
84
  "tier1_accuracy": 0.6778,
85
- "tier2_accuracy": 0.4762,
86
- "tier3_accuracy": 0.2143,
87
- "tier4_accuracy": 0.0
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
 
1
  {
2
+ "accepted_accuracy": 0.427,
3
+ "accepted_coverage": 0.9889,
4
+ "accuracy": 0.4222,
5
  "count": 90,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.4138,
10
+ "accepted_coverage": 0.9667,
11
+ "accuracy": 0.4,
12
  "count": 30,
13
+ "fallback_rate": 0.0333,
14
+ "macro_f1": 0.2727
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.4667,
18
+ "accepted_coverage": 1.0,
19
+ "accuracy": 0.4667,
20
  "count": 30,
21
+ "fallback_rate": 0.0,
22
+ "macro_f1": 0.3106
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.4,
26
+ "accepted_coverage": 1.0,
27
+ "accuracy": 0.4,
28
  "count": 30,
29
+ "fallback_rate": 0.0,
30
+ "macro_f1": 0.2667
31
  }
32
  },
33
+ "fallback_rate": 0.0111,
34
  "head": "iab_content",
35
+ "macro_f1": 0.227,
36
  "primary_source": "supervised_classifier",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 2.4,
40
  "error_buckets": {
41
+ "exact_match": 38,
42
+ "parent_safe_stop": 1,
43
+ "right_tier1_wrong_tier2": 14,
44
+ "wrong_deep_leaf": 8,
45
  "wrong_tier1": 29
46
  },
47
+ "exact_path_accuracy": 0.4222,
48
+ "parent_safe_accuracy": 0.4444,
49
  "tier1_accuracy": 0.6778,
50
+ "tier2_accuracy": 0.4881,
51
+ "tier3_accuracy": 0.5238,
52
+ "tier4_accuracy": 0.5
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
+ "average_prediction_depth": 2.4,
57
  "error_buckets": {
58
+ "exact_match": 37,
59
+ "parent_safe_stop": 1,
60
+ "right_tier1_wrong_tier2": 14,
61
+ "wrong_deep_leaf": 9,
62
  "wrong_tier1": 29
63
  },
64
+ "exact_path_accuracy": 0.4111,
65
+ "parent_safe_accuracy": 0.4333,
66
  "tier1_accuracy": 0.6778,
67
+ "tier2_accuracy": 0.4881,
68
+ "tier3_accuracy": 0.4762,
69
+ "tier4_accuracy": 0.5
70
  },
71
  "combined_path": {
72
+ "average_prediction_depth": 2.4,
73
  "error_buckets": {
74
+ "exact_match": 37,
75
+ "parent_safe_stop": 1,
76
+ "right_tier1_wrong_tier2": 14,
77
+ "wrong_deep_leaf": 9,
78
  "wrong_tier1": 29
79
  },
80
+ "exact_path_accuracy": 0.4111,
81
+ "fallback_overuse_count": 25,
82
+ "fallback_rate": 0.2778,
83
+ "parent_safe_accuracy": 0.4333,
84
  "tier1_accuracy": 0.6778,
85
+ "tier2_accuracy": 0.4881,
86
+ "tier3_accuracy": 0.4762,
87
+ "tier4_accuracy": 0.5
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json CHANGED
@@ -1,90 +1,90 @@
1
  {
2
- "accepted_accuracy": 0.4219,
3
- "accepted_coverage": 0.8205,
4
- "accuracy": 0.3462,
5
  "count": 156,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.4889,
10
- "accepted_coverage": 0.8654,
11
- "accuracy": 0.4231,
12
  "count": 52,
13
- "fallback_rate": 0.1346,
14
- "macro_f1": 0.2305
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.3846,
18
- "accepted_coverage": 0.75,
19
- "accuracy": 0.2885,
20
  "count": 52,
21
- "fallback_rate": 0.25,
22
- "macro_f1": 0.1638
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.3864,
26
- "accepted_coverage": 0.8462,
27
- "accuracy": 0.3269,
28
  "count": 52,
29
- "fallback_rate": 0.1538,
30
- "macro_f1": 0.1819
31
  }
32
  },
33
- "fallback_rate": 0.1795,
34
  "head": "iab_content",
35
- "macro_f1": 0.1478,
36
  "primary_source": "supervised_classifier",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 2.0256,
40
  "error_buckets": {
41
- "exact_match": 54,
42
- "parent_safe_stop": 21,
43
- "right_tier1_wrong_tier2": 37,
44
- "wrong_deep_leaf": 3,
45
- "wrong_tier1": 41
46
  },
47
- "exact_path_accuracy": 0.3462,
48
- "parent_safe_accuracy": 0.6603,
49
- "tier1_accuracy": 0.7372,
50
- "tier2_accuracy": 0.5,
51
- "tier3_accuracy": 0.3519,
52
- "tier4_accuracy": 0.2917
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
- "average_prediction_depth": 2.0256,
57
  "error_buckets": {
58
- "exact_match": 49,
59
- "parent_safe_stop": 21,
60
- "right_tier1_wrong_tier2": 37,
61
- "wrong_deep_leaf": 8,
62
- "wrong_tier1": 41
63
  },
64
- "exact_path_accuracy": 0.3141,
65
- "parent_safe_accuracy": 0.6282,
66
- "tier1_accuracy": 0.7372,
67
- "tier2_accuracy": 0.5,
68
- "tier3_accuracy": 0.3056,
69
- "tier4_accuracy": 0.2083
70
  },
71
  "combined_path": {
72
- "average_prediction_depth": 2.0256,
73
  "error_buckets": {
74
- "exact_match": 49,
75
- "parent_safe_stop": 21,
76
- "right_tier1_wrong_tier2": 37,
77
- "wrong_deep_leaf": 8,
78
- "wrong_tier1": 41
79
  },
80
- "exact_path_accuracy": 0.3141,
81
- "fallback_overuse_count": 14,
82
- "fallback_rate": 0.0897,
83
- "parent_safe_accuracy": 0.6282,
84
- "tier1_accuracy": 0.7372,
85
- "tier2_accuracy": 0.5,
86
- "tier3_accuracy": 0.3056,
87
- "tier4_accuracy": 0.2083
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
 
1
  {
2
+ "accepted_accuracy": 0.4231,
3
+ "accepted_coverage": 1.0,
4
+ "accuracy": 0.4231,
5
  "count": 156,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.4615,
10
+ "accepted_coverage": 1.0,
11
+ "accuracy": 0.4615,
12
  "count": 52,
13
+ "fallback_rate": 0.0,
14
+ "macro_f1": 0.2359
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.3654,
18
+ "accepted_coverage": 1.0,
19
+ "accuracy": 0.3654,
20
  "count": 52,
21
+ "fallback_rate": 0.0,
22
+ "macro_f1": 0.1892
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.4423,
26
+ "accepted_coverage": 1.0,
27
+ "accuracy": 0.4423,
28
  "count": 52,
29
+ "fallback_rate": 0.0,
30
+ "macro_f1": 0.2338
31
  }
32
  },
33
+ "fallback_rate": 0.0,
34
  "head": "iab_content",
35
+ "macro_f1": 0.1524,
36
  "primary_source": "supervised_classifier",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 2.4103,
40
  "error_buckets": {
41
+ "exact_match": 66,
42
+ "parent_safe_stop": 1,
43
+ "right_tier1_wrong_tier2": 42,
44
+ "wrong_deep_leaf": 8,
45
+ "wrong_tier1": 39
46
  },
47
+ "exact_path_accuracy": 0.4231,
48
+ "parent_safe_accuracy": 0.5385,
49
+ "tier1_accuracy": 0.75,
50
+ "tier2_accuracy": 0.4808,
51
+ "tier3_accuracy": 0.5093,
52
+ "tier4_accuracy": 0.4583
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
+ "average_prediction_depth": 2.4103,
57
  "error_buckets": {
58
+ "exact_match": 59,
59
+ "parent_safe_stop": 1,
60
+ "right_tier1_wrong_tier2": 42,
61
+ "wrong_deep_leaf": 15,
62
+ "wrong_tier1": 39
63
  },
64
+ "exact_path_accuracy": 0.3782,
65
+ "parent_safe_accuracy": 0.4936,
66
+ "tier1_accuracy": 0.75,
67
+ "tier2_accuracy": 0.4808,
68
+ "tier3_accuracy": 0.4259,
69
+ "tier4_accuracy": 0.1667
70
  },
71
  "combined_path": {
72
+ "average_prediction_depth": 2.4103,
73
  "error_buckets": {
74
+ "exact_match": 59,
75
+ "parent_safe_stop": 1,
76
+ "right_tier1_wrong_tier2": 42,
77
+ "wrong_deep_leaf": 15,
78
+ "wrong_tier1": 39
79
  },
80
+ "exact_path_accuracy": 0.3782,
81
+ "fallback_overuse_count": 15,
82
+ "fallback_rate": 0.0962,
83
+ "parent_safe_accuracy": 0.4936,
84
+ "tier1_accuracy": 0.75,
85
+ "tier2_accuracy": 0.4808,
86
+ "tier3_accuracy": 0.4259,
87
+ "tier4_accuracy": 0.1667
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
artifacts/evaluation/latest/iab_content_extended_cases_report.json CHANGED
@@ -1,24 +1,24 @@
1
  {
2
- "accepted_accuracy": 0.4286,
3
- "accepted_coverage": 0.875,
4
- "accuracy": 0.375,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
- "fallback_rate": 0.125,
8
  "head": "iab_content",
9
- "macro_f1": 0.2308,
10
  "primary_source": "supervised_classifier",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.0,
14
  "error_buckets": {
15
- "exact_match": 3,
16
  "right_tier1_wrong_tier2": 2,
17
- "wrong_deep_leaf": 2,
18
  "wrong_tier1": 1
19
  },
20
- "exact_path_accuracy": 0.375,
21
- "parent_safe_accuracy": 0.375,
22
  "tier1_accuracy": 0.875,
23
  "tier2_accuracy": 0.5714,
24
  "tier3_accuracy": 0.0,
@@ -26,32 +26,32 @@
26
  },
27
  "view_metrics": {
28
  "classifier": {
29
- "average_prediction_depth": 2.0,
30
  "error_buckets": {
31
- "exact_match": 3,
32
  "right_tier1_wrong_tier2": 2,
33
- "wrong_deep_leaf": 2,
34
  "wrong_tier1": 1
35
  },
36
- "exact_path_accuracy": 0.375,
37
- "parent_safe_accuracy": 0.375,
38
  "tier1_accuracy": 0.875,
39
  "tier2_accuracy": 0.5714,
40
  "tier3_accuracy": 0.0,
41
  "tier4_accuracy": 0.0
42
  },
43
  "combined_path": {
44
- "average_prediction_depth": 2.0,
45
  "error_buckets": {
46
- "exact_match": 3,
47
  "right_tier1_wrong_tier2": 2,
48
- "wrong_deep_leaf": 2,
49
  "wrong_tier1": 1
50
  },
51
- "exact_path_accuracy": 0.375,
52
  "fallback_overuse_count": 2,
53
  "fallback_rate": 0.25,
54
- "parent_safe_accuracy": 0.375,
55
  "tier1_accuracy": 0.875,
56
  "tier2_accuracy": 0.5714,
57
  "tier3_accuracy": 0.0,
 
1
  {
2
+ "accepted_accuracy": 0.5,
3
+ "accepted_coverage": 1.0,
4
+ "accuracy": 0.5,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
+ "fallback_rate": 0.0,
8
  "head": "iab_content",
9
+ "macro_f1": 0.3333,
10
  "primary_source": "supervised_classifier",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.125,
14
  "error_buckets": {
15
+ "exact_match": 4,
16
  "right_tier1_wrong_tier2": 2,
17
+ "wrong_deep_leaf": 1,
18
  "wrong_tier1": 1
19
  },
20
+ "exact_path_accuracy": 0.5,
21
+ "parent_safe_accuracy": 0.5,
22
  "tier1_accuracy": 0.875,
23
  "tier2_accuracy": 0.5714,
24
  "tier3_accuracy": 0.0,
 
26
  },
27
  "view_metrics": {
28
  "classifier": {
29
+ "average_prediction_depth": 2.125,
30
  "error_buckets": {
31
+ "exact_match": 4,
32
  "right_tier1_wrong_tier2": 2,
33
+ "wrong_deep_leaf": 1,
34
  "wrong_tier1": 1
35
  },
36
+ "exact_path_accuracy": 0.5,
37
+ "parent_safe_accuracy": 0.5,
38
  "tier1_accuracy": 0.875,
39
  "tier2_accuracy": 0.5714,
40
  "tier3_accuracy": 0.0,
41
  "tier4_accuracy": 0.0
42
  },
43
  "combined_path": {
44
+ "average_prediction_depth": 2.125,
45
  "error_buckets": {
46
+ "exact_match": 4,
47
  "right_tier1_wrong_tier2": 2,
48
+ "wrong_deep_leaf": 1,
49
  "wrong_tier1": 1
50
  },
51
+ "exact_path_accuracy": 0.5,
52
  "fallback_overuse_count": 2,
53
  "fallback_rate": 0.25,
54
+ "parent_safe_accuracy": 0.5,
55
  "tier1_accuracy": 0.875,
56
  "tier2_accuracy": 0.5714,
57
  "tier3_accuracy": 0.0,
artifacts/evaluation/latest/iab_content_hard_cases_report.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "accepted_accuracy": 0.5,
3
- "accepted_coverage": 0.75,
4
  "accuracy": 0.375,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
- "fallback_rate": 0.25,
8
  "head": "iab_content",
9
- "macro_f1": 0.25,
10
  "primary_source": "supervised_classifier",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.0,
14
  "error_buckets": {
15
  "exact_match": 3,
16
  "right_tier1_wrong_tier2": 1,
@@ -25,7 +25,7 @@
25
  },
26
  "view_metrics": {
27
  "classifier": {
28
- "average_prediction_depth": 2.0,
29
  "error_buckets": {
30
  "exact_match": 3,
31
  "right_tier1_wrong_tier2": 1,
@@ -39,7 +39,7 @@
39
  "tier4_accuracy": 0.0
40
  },
41
  "combined_path": {
42
- "average_prediction_depth": 2.0,
43
  "error_buckets": {
44
  "exact_match": 3,
45
  "right_tier1_wrong_tier2": 1,
 
1
  {
2
+ "accepted_accuracy": 0.4286,
3
+ "accepted_coverage": 0.875,
4
  "accuracy": 0.375,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
+ "fallback_rate": 0.125,
8
  "head": "iab_content",
9
+ "macro_f1": 0.2308,
10
  "primary_source": "supervised_classifier",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.25,
14
  "error_buckets": {
15
  "exact_match": 3,
16
  "right_tier1_wrong_tier2": 1,
 
25
  },
26
  "view_metrics": {
27
  "classifier": {
28
+ "average_prediction_depth": 2.25,
29
  "error_buckets": {
30
  "exact_match": 3,
31
  "right_tier1_wrong_tier2": 1,
 
39
  "tier4_accuracy": 0.0
40
  },
41
  "combined_path": {
42
+ "average_prediction_depth": 2.25,
43
  "error_buckets": {
44
  "exact_match": 3,
45
  "right_tier1_wrong_tier2": 1,
artifacts/evaluation/latest/iab_content_test_report.json CHANGED
@@ -1,46 +1,46 @@
1
  {
2
- "accepted_accuracy": 0.9242,
3
- "accepted_coverage": 0.9973,
4
- "accuracy": 0.922,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/test.jsonl",
7
- "fallback_rate": 0.0027,
8
  "head": "iab_content",
9
- "macro_f1": 0.8741,
10
  "primary_source": "supervised_classifier",
11
  "suite": "test",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.1789,
14
  "error_buckets": {
15
- "exact_match": 3026,
16
- "parent_safe_stop": 68,
17
- "right_tier1_wrong_tier2": 59,
18
- "wrong_deep_leaf": 96,
19
- "wrong_tier1": 33
20
  },
21
- "exact_path_accuracy": 0.922,
22
- "parent_safe_accuracy": 0.9509,
23
- "tier1_accuracy": 0.9899,
24
- "tier2_accuracy": 0.9693,
25
- "tier3_accuracy": 0.8477,
26
- "tier4_accuracy": 0.5143
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
- "average_prediction_depth": 2.1789,
31
  "error_buckets": {
32
- "exact_match": 2995,
33
- "parent_safe_stop": 62,
34
- "right_tier1_wrong_tier2": 71,
35
- "wrong_deep_leaf": 121,
36
- "wrong_tier1": 33
37
  },
38
- "exact_path_accuracy": 0.9126,
39
- "parent_safe_accuracy": 0.9397,
40
- "tier1_accuracy": 0.9899,
41
- "tier2_accuracy": 0.9651,
42
- "tier3_accuracy": 0.8218,
43
- "tier4_accuracy": 0.3929
44
  },
45
  "combined_path": {
46
  "count": 3282,
 
1
  {
2
+ "accepted_accuracy": 0.943,
3
+ "accepted_coverage": 1.0,
4
+ "accuracy": 0.943,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/test.jsonl",
7
+ "fallback_rate": 0.0,
8
  "head": "iab_content",
9
+ "macro_f1": 0.911,
10
  "primary_source": "supervised_classifier",
11
  "suite": "test",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.213,
14
  "error_buckets": {
15
+ "exact_match": 3095,
16
+ "parent_safe_stop": 45,
17
+ "right_tier1_wrong_tier2": 41,
18
+ "wrong_deep_leaf": 72,
19
+ "wrong_tier1": 29
20
  },
21
+ "exact_path_accuracy": 0.943,
22
+ "parent_safe_accuracy": 0.958,
23
+ "tier1_accuracy": 0.9912,
24
+ "tier2_accuracy": 0.9776,
25
+ "tier3_accuracy": 0.9078,
26
+ "tier4_accuracy": 0.7
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
+ "average_prediction_depth": 2.213,
31
  "error_buckets": {
32
+ "exact_match": 3052,
33
+ "parent_safe_stop": 44,
34
+ "right_tier1_wrong_tier2": 53,
35
+ "wrong_deep_leaf": 104,
36
+ "wrong_tier1": 29
37
  },
38
+ "exact_path_accuracy": 0.9299,
39
+ "parent_safe_accuracy": 0.9445,
40
+ "tier1_accuracy": 0.9912,
41
+ "tier2_accuracy": 0.9734,
42
+ "tier3_accuracy": 0.8725,
43
+ "tier4_accuracy": 0.5
44
  },
45
  "combined_path": {
46
  "count": 3282,
artifacts/evaluation/latest/iab_content_train_report.json CHANGED
@@ -1,46 +1,46 @@
1
  {
2
- "accepted_accuracy": 0.93,
3
- "accepted_coverage": 0.9978,
4
- "accuracy": 0.9282,
5
  "count": 13211,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/train.jsonl",
7
- "fallback_rate": 0.0022,
8
  "head": "iab_content",
9
- "macro_f1": 0.8851,
10
  "primary_source": "supervised_classifier",
11
  "suite": "train",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.172,
14
  "error_buckets": {
15
- "exact_match": 12263,
16
- "parent_safe_stop": 259,
17
- "right_tier1_wrong_tier2": 229,
18
- "wrong_deep_leaf": 356,
19
- "wrong_tier1": 104
20
  },
21
- "exact_path_accuracy": 0.9282,
22
- "parent_safe_accuracy": 0.9572,
23
- "tier1_accuracy": 0.9921,
24
- "tier2_accuracy": 0.9726,
25
- "tier3_accuracy": 0.8565,
26
- "tier4_accuracy": 0.5518
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
- "average_prediction_depth": 2.172,
31
  "error_buckets": {
32
- "exact_match": 12130,
33
- "parent_safe_stop": 238,
34
- "right_tier1_wrong_tier2": 277,
35
- "wrong_deep_leaf": 462,
36
- "wrong_tier1": 104
37
  },
38
- "exact_path_accuracy": 0.9182,
39
- "parent_safe_accuracy": 0.9456,
40
- "tier1_accuracy": 0.9921,
41
- "tier2_accuracy": 0.9685,
42
- "tier3_accuracy": 0.829,
43
- "tier4_accuracy": 0.4214
44
  },
45
  "combined_path": {
46
  "count": 13211,
 
1
  {
2
+ "accepted_accuracy": 0.9459,
3
+ "accepted_coverage": 1.0,
4
+ "accuracy": 0.9459,
5
  "count": 13211,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/train.jsonl",
7
+ "fallback_rate": 0.0,
8
  "head": "iab_content",
9
+ "macro_f1": 0.9194,
10
  "primary_source": "supervised_classifier",
11
  "suite": "train",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.2105,
14
  "error_buckets": {
15
+ "exact_match": 12496,
16
+ "parent_safe_stop": 162,
17
+ "right_tier1_wrong_tier2": 144,
18
+ "wrong_deep_leaf": 284,
19
+ "wrong_tier1": 125
20
  },
21
+ "exact_path_accuracy": 0.9459,
22
+ "parent_safe_accuracy": 0.9585,
23
+ "tier1_accuracy": 0.9905,
24
+ "tier2_accuracy": 0.9805,
25
+ "tier3_accuracy": 0.9135,
26
+ "tier4_accuracy": 0.7268
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
+ "average_prediction_depth": 2.2105,
31
  "error_buckets": {
32
+ "exact_match": 12323,
33
+ "parent_safe_stop": 157,
34
+ "right_tier1_wrong_tier2": 192,
35
+ "wrong_deep_leaf": 414,
36
+ "wrong_tier1": 125
37
  },
38
+ "exact_path_accuracy": 0.9328,
39
+ "parent_safe_accuracy": 0.945,
40
+ "tier1_accuracy": 0.9905,
41
+ "tier2_accuracy": 0.9764,
42
+ "tier3_accuracy": 0.8777,
43
+ "tier4_accuracy": 0.525
44
  },
45
  "combined_path": {
46
  "count": 13211,
artifacts/evaluation/latest/iab_content_val_report.json CHANGED
@@ -1,46 +1,46 @@
1
  {
2
- "accepted_accuracy": 0.9246,
3
- "accepted_coverage": 0.9979,
4
- "accuracy": 0.9229,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/val.jsonl",
7
- "fallback_rate": 0.0021,
8
  "head": "iab_content",
9
- "macro_f1": 0.8789,
10
  "primary_source": "supervised_classifier",
11
  "suite": "val",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.1789,
14
  "error_buckets": {
15
- "exact_match": 3029,
16
- "parent_safe_stop": 69,
17
- "right_tier1_wrong_tier2": 67,
18
- "wrong_deep_leaf": 91,
19
- "wrong_tier1": 26
20
  },
21
- "exact_path_accuracy": 0.9229,
22
- "parent_safe_accuracy": 0.9549,
23
- "tier1_accuracy": 0.9921,
24
- "tier2_accuracy": 0.9686,
25
- "tier3_accuracy": 0.8549,
26
- "tier4_accuracy": 0.5286
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
- "average_prediction_depth": 2.1789,
31
  "error_buckets": {
32
- "exact_match": 2997,
33
- "parent_safe_stop": 64,
34
- "right_tier1_wrong_tier2": 79,
35
- "wrong_deep_leaf": 116,
36
- "wrong_tier1": 26
37
  },
38
- "exact_path_accuracy": 0.9132,
39
- "parent_safe_accuracy": 0.9436,
40
- "tier1_accuracy": 0.9921,
41
- "tier2_accuracy": 0.9644,
42
- "tier3_accuracy": 0.829,
43
- "tier4_accuracy": 0.4071
44
  },
45
  "combined_path": {
46
  "count": 3282,
 
1
  {
2
+ "accepted_accuracy": 0.9442,
3
+ "accepted_coverage": 1.0,
4
+ "accuracy": 0.9442,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/val.jsonl",
7
+ "fallback_rate": 0.0,
8
  "head": "iab_content",
9
+ "macro_f1": 0.9166,
10
  "primary_source": "supervised_classifier",
11
  "suite": "val",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.2151,
14
  "error_buckets": {
15
+ "exact_match": 3099,
16
+ "parent_safe_stop": 35,
17
+ "right_tier1_wrong_tier2": 45,
18
+ "wrong_deep_leaf": 72,
19
+ "wrong_tier1": 31
20
  },
21
+ "exact_path_accuracy": 0.9442,
22
+ "parent_safe_accuracy": 0.9576,
23
+ "tier1_accuracy": 0.9906,
24
+ "tier2_accuracy": 0.9769,
25
+ "tier3_accuracy": 0.9088,
26
+ "tier4_accuracy": 0.7286
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
+ "average_prediction_depth": 2.2151,
31
  "error_buckets": {
32
+ "exact_match": 3056,
33
+ "parent_safe_stop": 34,
34
+ "right_tier1_wrong_tier2": 57,
35
+ "wrong_deep_leaf": 104,
36
+ "wrong_tier1": 31
37
  },
38
+ "exact_path_accuracy": 0.9311,
39
+ "parent_safe_accuracy": 0.9442,
40
+ "tier1_accuracy": 0.9906,
41
+ "tier2_accuracy": 0.9727,
42
+ "tier3_accuracy": 0.8736,
43
+ "tier4_accuracy": 0.5286
44
  },
45
  "combined_path": {
46
  "count": 3282,
artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 88,
5
- "passed": 2,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_behavior_lock_cases.json",
10
  "count": 90,
11
- "failed": 88,
12
- "passed": 2,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "exact",
17
- "model_output.classification.iab_content.tier1.label": "Automotive",
18
- "model_output.classification.iab_content.tier2.label": "Auto Rentals"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -25,7 +25,7 @@
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
- "actual": "Automotive",
29
  "expected": "Travel",
30
  "path": "model_output.classification.iab_content.tier1.label"
31
  },
@@ -35,7 +35,7 @@
35
  "path": "model_output.classification.iab_content.mapping_mode"
36
  },
37
  {
38
- "actual": "Auto Rentals",
39
  "expected": "Travel Type",
40
  "path": "model_output.classification.iab_content.tier2.label"
41
  }
@@ -47,7 +47,7 @@
47
  },
48
  {
49
  "actual": {
50
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
51
  "model_output.classification.iab_content.tier1.label": "Automotive",
52
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
53
  },
@@ -57,17 +57,23 @@
57
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
58
  },
59
  "id": "auto-buying-medium",
60
- "mismatches": [],
 
 
 
 
 
 
61
  "notes": "Cross-vertical medium IAB mapping case for Automotive > Auto Buying and Selling.",
62
- "pass": true,
63
  "status": "must_fix",
64
  "text": "Best used SUV for a family of four"
65
  },
66
  {
67
  "actual": {
68
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
69
  "model_output.classification.iab_content.tier1.label": "Automotive",
70
- "model_output.classification.iab_content.tier2.label": "Auto Type"
71
  },
72
  "expected": {
73
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -75,18 +81,29 @@
75
  "model_output.classification.iab_content.tier2.label": "Auto Type"
76
  },
77
  "id": "auto-buying-hard",
78
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
79
  "notes": "Cross-vertical hard IAB mapping case for Automotive > Auto Buying and Selling.",
80
- "pass": true,
81
  "status": "must_fix",
82
  "text": "I need a shortlist of practical cars before making a purchase this month"
83
  },
84
  {
85
  "actual": {
86
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
87
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
88
  "model_output.classification.iab_content.tier2.label": "Computing",
89
- "model_output.classification.iab_content.tier3.label": null
90
  },
91
  "expected": {
92
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -97,9 +114,9 @@
97
  "id": "sales-crm-easy",
98
  "mismatches": [
99
  {
100
- "actual": null,
101
- "expected": "Software and Applications",
102
- "path": "model_output.classification.iab_content.tier3.label"
103
  }
104
  ],
105
  "notes": "Cross-vertical easy IAB mapping case for Business and Finance > Business > Sales.",
@@ -109,9 +126,9 @@
109
  },
110
  {
111
  "actual": {
112
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
113
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
114
- "model_output.classification.iab_content.tier2.label": null,
115
  "model_output.classification.iab_content.tier3.label": null
116
  },
117
  "expected": {
@@ -123,7 +140,12 @@
123
  "id": "sales-crm-medium",
124
  "mismatches": [
125
  {
126
- "actual": null,
 
 
 
 
 
127
  "expected": "Computing",
128
  "path": "model_output.classification.iab_content.tier2.label"
129
  },
@@ -140,10 +162,10 @@
140
  },
141
  {
142
  "actual": {
143
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
144
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
145
  "model_output.classification.iab_content.tier2.label": "Business",
146
- "model_output.classification.iab_content.tier3.label": null
147
  },
148
  "expected": {
149
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -154,9 +176,9 @@
154
  "id": "sales-crm-hard",
155
  "mismatches": [
156
  {
157
- "actual": null,
158
- "expected": "Sales",
159
- "path": "model_output.classification.iab_content.tier3.label"
160
  }
161
  ],
162
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
@@ -166,9 +188,9 @@
166
  },
167
  {
168
  "actual": {
169
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
170
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
171
- "model_output.classification.iab_content.tier2.label": null,
172
  "model_output.classification.iab_content.tier3.label": null
173
  },
174
  "expected": {
@@ -180,12 +202,17 @@
180
  "id": "marketing-tools-easy",
181
  "mismatches": [
182
  {
183
- "actual": "Hobbies & Interests",
184
  "expected": "Technology & Computing",
185
  "path": "model_output.classification.iab_content.tier1.label"
186
  },
187
  {
188
- "actual": null,
 
 
 
 
 
189
  "expected": "Computing",
190
  "path": "model_output.classification.iab_content.tier2.label"
191
  },
@@ -202,9 +229,9 @@
202
  },
203
  {
204
  "actual": {
205
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
206
- "model_output.classification.iab_content.tier1.label": "Careers",
207
- "model_output.classification.iab_content.tier2.label": null
208
  },
209
  "expected": {
210
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -214,12 +241,17 @@
214
  "id": "marketing-tools-medium",
215
  "mismatches": [
216
  {
217
- "actual": "Careers",
218
  "expected": "Business and Finance",
219
  "path": "model_output.classification.iab_content.tier1.label"
220
  },
221
  {
222
- "actual": null,
 
 
 
 
 
223
  "expected": "Business",
224
  "path": "model_output.classification.iab_content.tier2.label"
225
  }
@@ -231,9 +263,9 @@
231
  },
232
  {
233
  "actual": {
234
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
235
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
236
- "model_output.classification.iab_content.tier2.label": null,
237
  "model_output.classification.iab_content.tier3.label": null
238
  },
239
  "expected": {
@@ -250,7 +282,12 @@
250
  "path": "model_output.classification.iab_content.tier1.label"
251
  },
252
  {
253
- "actual": null,
 
 
 
 
 
254
  "expected": "Computing",
255
  "path": "model_output.classification.iab_content.tier2.label"
256
  },
@@ -267,10 +304,10 @@
267
  },
268
  {
269
  "actual": {
270
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
271
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
272
  "model_output.classification.iab_content.tier2.label": "Computing",
273
- "model_output.classification.iab_content.tier3.label": null
274
  },
275
  "expected": {
276
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -281,7 +318,12 @@
281
  "id": "business-it-easy",
282
  "mismatches": [
283
  {
284
- "actual": null,
 
 
 
 
 
285
  "expected": "Internet",
286
  "path": "model_output.classification.iab_content.tier3.label"
287
  }
@@ -293,9 +335,9 @@
293
  },
294
  {
295
  "actual": {
296
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
297
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
298
- "model_output.classification.iab_content.tier2.label": null
299
  },
300
  "expected": {
301
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -305,14 +347,9 @@
305
  "id": "business-it-medium",
306
  "mismatches": [
307
  {
308
- "actual": "Personal Finance",
309
- "expected": "Careers",
310
- "path": "model_output.classification.iab_content.tier1.label"
311
- },
312
- {
313
- "actual": null,
314
- "expected": "Job Search",
315
- "path": "model_output.classification.iab_content.tier2.label"
316
  }
317
  ],
318
  "notes": "Cross-vertical medium IAB mapping case for Business and Finance > Business > Business I.T..",
@@ -412,8 +449,8 @@
412
  {
413
  "actual": {
414
  "model_output.classification.iab_content.mapping_mode": "exact",
415
- "model_output.classification.iab_content.tier1.label": "Food & Drink",
416
- "model_output.classification.iab_content.tier2.label": "Dining Out"
417
  },
418
  "expected": {
419
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -422,10 +459,20 @@
422
  },
423
  "id": "dining-out-hard",
424
  "mismatches": [
 
 
 
 
 
425
  {
426
  "actual": "exact",
427
  "expected": "nearest_equivalent",
428
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
429
  }
430
  ],
431
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
@@ -502,7 +549,7 @@
502
  {
503
  "actual": {
504
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
505
- "model_output.classification.iab_content.tier1.label": "Sports"
506
  },
507
  "expected": {
508
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -511,7 +558,7 @@
511
  "id": "artificial-intelligence-easy",
512
  "mismatches": [
513
  {
514
- "actual": "Sports",
515
  "expected": "Technology & Computing",
516
  "path": "model_output.classification.iab_content.tier1.label"
517
  }
@@ -571,7 +618,7 @@
571
  },
572
  {
573
  "actual": {
574
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
575
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
576
  "model_output.classification.iab_content.tier2.label": "Computing"
577
  },
@@ -587,6 +634,11 @@
587
  "expected": "Business and Finance",
588
  "path": "model_output.classification.iab_content.tier1.label"
589
  },
 
 
 
 
 
590
  {
591
  "actual": "Computing",
592
  "expected": "Business",
@@ -600,10 +652,10 @@
600
  },
601
  {
602
  "actual": {
603
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
604
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
605
  "model_output.classification.iab_content.tier2.label": "Computing",
606
- "model_output.classification.iab_content.tier3.label": null,
607
  "model_output.classification.iab_content.tier4.label": null
608
  },
609
  "expected": {
@@ -616,7 +668,12 @@
616
  "id": "software-apps-medium",
617
  "mismatches": [
618
  {
619
- "actual": null,
 
 
 
 
 
620
  "expected": "Internet",
621
  "path": "model_output.classification.iab_content.tier3.label"
622
  },
@@ -633,9 +690,9 @@
633
  },
634
  {
635
  "actual": {
636
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
637
- "model_output.classification.iab_content.tier1.label": "Business and Finance",
638
- "model_output.classification.iab_content.tier2.label": null,
639
  "model_output.classification.iab_content.tier3.label": null
640
  },
641
  "expected": {
@@ -647,12 +704,12 @@
647
  "id": "software-apps-hard",
648
  "mismatches": [
649
  {
650
- "actual": "Business and Finance",
651
- "expected": "Technology & Computing",
652
- "path": "model_output.classification.iab_content.tier1.label"
653
  },
654
  {
655
- "actual": null,
656
  "expected": "Computing",
657
  "path": "model_output.classification.iab_content.tier2.label"
658
  },
@@ -717,10 +774,10 @@
717
  },
718
  {
719
  "actual": {
720
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
721
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
722
  "model_output.classification.iab_content.tier2.label": "Computing",
723
- "model_output.classification.iab_content.tier3.label": null,
724
  "model_output.classification.iab_content.tier4.label": null
725
  },
726
  "expected": {
@@ -733,7 +790,12 @@
733
  "id": "communication-software-medium",
734
  "mismatches": [
735
  {
736
- "actual": null,
 
 
 
 
 
737
  "expected": "Software and Applications",
738
  "path": "model_output.classification.iab_content.tier3.label"
739
  },
@@ -750,9 +812,9 @@
750
  },
751
  {
752
  "actual": {
753
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
754
- "model_output.classification.iab_content.tier1.label": "Careers",
755
- "model_output.classification.iab_content.tier2.label": null,
756
  "model_output.classification.iab_content.tier3.label": null,
757
  "model_output.classification.iab_content.tier4.label": null
758
  },
@@ -766,12 +828,12 @@
766
  "id": "communication-software-hard",
767
  "mismatches": [
768
  {
769
- "actual": "Careers",
770
- "expected": "Technology & Computing",
771
- "path": "model_output.classification.iab_content.tier1.label"
772
  },
773
  {
774
- "actual": null,
775
  "expected": "Computing",
776
  "path": "model_output.classification.iab_content.tier2.label"
777
  },
@@ -793,11 +855,11 @@
793
  },
794
  {
795
  "actual": {
796
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
797
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
798
  "model_output.classification.iab_content.tier2.label": "Computing",
799
- "model_output.classification.iab_content.tier3.label": null,
800
- "model_output.classification.iab_content.tier4.label": null
801
  },
802
  "expected": {
803
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -809,14 +871,9 @@
809
  "id": "web-hosting-easy",
810
  "mismatches": [
811
  {
812
- "actual": null,
813
- "expected": "Internet",
814
- "path": "model_output.classification.iab_content.tier3.label"
815
- },
816
- {
817
- "actual": null,
818
- "expected": "Web Hosting",
819
- "path": "model_output.classification.iab_content.tier4.label"
820
  }
821
  ],
822
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
@@ -826,11 +883,11 @@
826
  },
827
  {
828
  "actual": {
829
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
830
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
831
  "model_output.classification.iab_content.tier2.label": "Computing",
832
- "model_output.classification.iab_content.tier3.label": null,
833
- "model_output.classification.iab_content.tier4.label": null
834
  },
835
  "expected": {
836
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -842,14 +899,9 @@
842
  "id": "web-hosting-medium",
843
  "mismatches": [
844
  {
845
- "actual": null,
846
- "expected": "Internet",
847
- "path": "model_output.classification.iab_content.tier3.label"
848
- },
849
- {
850
- "actual": null,
851
- "expected": "Web Hosting",
852
- "path": "model_output.classification.iab_content.tier4.label"
853
  }
854
  ],
855
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
@@ -859,11 +911,11 @@
859
  },
860
  {
861
  "actual": {
862
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
863
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
864
  "model_output.classification.iab_content.tier2.label": "Computing",
865
- "model_output.classification.iab_content.tier3.label": null,
866
- "model_output.classification.iab_content.tier4.label": null
867
  },
868
  "expected": {
869
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -875,14 +927,9 @@
875
  "id": "web-hosting-hard",
876
  "mismatches": [
877
  {
878
- "actual": null,
879
- "expected": "Internet",
880
- "path": "model_output.classification.iab_content.tier3.label"
881
- },
882
- {
883
- "actual": null,
884
- "expected": "Web Hosting",
885
- "path": "model_output.classification.iab_content.tier4.label"
886
  }
887
  ],
888
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
@@ -892,10 +939,10 @@
892
  },
893
  {
894
  "actual": {
895
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
896
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
897
- "model_output.classification.iab_content.tier2.label": null,
898
- "model_output.classification.iab_content.tier3.label": null
899
  },
900
  "expected": {
901
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -906,14 +953,9 @@
906
  "id": "laptops-easy",
907
  "mismatches": [
908
  {
909
- "actual": null,
910
- "expected": "Computing",
911
- "path": "model_output.classification.iab_content.tier2.label"
912
- },
913
- {
914
- "actual": null,
915
- "expected": "Laptops",
916
- "path": "model_output.classification.iab_content.tier3.label"
917
  }
918
  ],
919
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
@@ -949,10 +991,10 @@
949
  },
950
  {
951
  "actual": {
952
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
953
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
954
  "model_output.classification.iab_content.tier2.label": "Computing",
955
- "model_output.classification.iab_content.tier3.label": null
956
  },
957
  "expected": {
958
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -962,13 +1004,18 @@
962
  },
963
  "id": "laptops-hard",
964
  "mismatches": [
 
 
 
 
 
965
  {
966
  "actual": "Computing",
967
  "expected": "Consumer Electronics",
968
  "path": "model_output.classification.iab_content.tier2.label"
969
  },
970
  {
971
- "actual": null,
972
  "expected": "Smartphones",
973
  "path": "model_output.classification.iab_content.tier3.label"
974
  }
@@ -980,11 +1027,11 @@
980
  },
981
  {
982
  "actual": {
983
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
984
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
985
  "model_output.classification.iab_content.tier2.label": "Computing",
986
  "model_output.classification.iab_content.tier3.label": "Software and Applications",
987
- "model_output.classification.iab_content.tier4.label": null
988
  },
989
  "expected": {
990
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -996,7 +1043,12 @@
996
  "id": "desktops-easy",
997
  "mismatches": [
998
  {
999
- "actual": null,
 
 
 
 
 
1000
  "expected": "Photo Editing Software",
1001
  "path": "model_output.classification.iab_content.tier4.label"
1002
  }
@@ -1008,10 +1060,10 @@
1008
  },
1009
  {
1010
  "actual": {
1011
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1012
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1013
  "model_output.classification.iab_content.tier2.label": "Computing",
1014
- "model_output.classification.iab_content.tier3.label": null
1015
  },
1016
  "expected": {
1017
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1022,9 +1074,9 @@
1022
  "id": "desktops-medium",
1023
  "mismatches": [
1024
  {
1025
- "actual": null,
1026
- "expected": "Desktops",
1027
- "path": "model_output.classification.iab_content.tier3.label"
1028
  }
1029
  ],
1030
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
@@ -1087,7 +1139,7 @@
1087
  "model_output.classification.iab_content.mapping_mode": "exact",
1088
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1089
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1090
- "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1091
  },
1092
  "expected": {
1093
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1101,11 +1153,6 @@
1101
  "actual": "exact",
1102
  "expected": "nearest_equivalent",
1103
  "path": "model_output.classification.iab_content.mapping_mode"
1104
- },
1105
- {
1106
- "actual": "Wearable Technology",
1107
- "expected": "Smartphones",
1108
- "path": "model_output.classification.iab_content.tier3.label"
1109
  }
1110
  ],
1111
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
@@ -1118,7 +1165,7 @@
1118
  "model_output.classification.iab_content.mapping_mode": "exact",
1119
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1120
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1121
- "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1122
  },
1123
  "expected": {
1124
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1132,11 +1179,6 @@
1132
  "actual": "exact",
1133
  "expected": "nearest_equivalent",
1134
  "path": "model_output.classification.iab_content.mapping_mode"
1135
- },
1136
- {
1137
- "actual": "Wearable Technology",
1138
- "expected": "Smartphones",
1139
- "path": "model_output.classification.iab_content.tier3.label"
1140
  }
1141
  ],
1142
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
@@ -1148,8 +1190,8 @@
1148
  "actual": {
1149
  "model_output.classification.iab_content.mapping_mode": "exact",
1150
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1151
- "model_output.classification.iab_content.tier2.label": "Designer Clothing",
1152
- "model_output.classification.iab_content.tier3.label": null
1153
  },
1154
  "expected": {
1155
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1165,12 +1207,12 @@
1165
  "path": "model_output.classification.iab_content.mapping_mode"
1166
  },
1167
  {
1168
- "actual": "Designer Clothing",
1169
  "expected": "Women's Fashion",
1170
  "path": "model_output.classification.iab_content.tier2.label"
1171
  },
1172
  {
1173
- "actual": null,
1174
  "expected": "Women's Shoes and Footwear",
1175
  "path": "model_output.classification.iab_content.tier3.label"
1176
  }
@@ -1220,8 +1262,8 @@
1220
  "actual": {
1221
  "model_output.classification.iab_content.mapping_mode": "exact",
1222
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1223
- "model_output.classification.iab_content.tier2.label": "Designer Clothing",
1224
- "model_output.classification.iab_content.tier3.label": null
1225
  },
1226
  "expected": {
1227
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1235,16 +1277,6 @@
1235
  "actual": "exact",
1236
  "expected": "nearest_equivalent",
1237
  "path": "model_output.classification.iab_content.mapping_mode"
1238
- },
1239
- {
1240
- "actual": "Designer Clothing",
1241
- "expected": "Women's Fashion",
1242
- "path": "model_output.classification.iab_content.tier2.label"
1243
- },
1244
- {
1245
- "actual": null,
1246
- "expected": "Women's Shoes and Footwear",
1247
- "path": "model_output.classification.iab_content.tier3.label"
1248
  }
1249
  ],
1250
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion.",
@@ -1254,9 +1286,9 @@
1254
  },
1255
  {
1256
  "actual": {
1257
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1258
  "model_output.classification.iab_content.tier1.label": "Sports",
1259
- "model_output.classification.iab_content.tier2.label": null,
1260
  "model_output.classification.iab_content.tier3.label": null
1261
  },
1262
  "expected": {
@@ -1273,7 +1305,12 @@
1273
  "path": "model_output.classification.iab_content.tier1.label"
1274
  },
1275
  {
1276
- "actual": null,
 
 
 
 
 
1277
  "expected": "Women's Fashion",
1278
  "path": "model_output.classification.iab_content.tier2.label"
1279
  },
@@ -1357,9 +1394,9 @@
1357
  },
1358
  {
1359
  "actual": {
1360
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1361
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1362
- "model_output.classification.iab_content.tier2.label": null,
1363
  "model_output.classification.iab_content.tier3.label": null
1364
  },
1365
  "expected": {
@@ -1371,7 +1408,12 @@
1371
  "id": "mens-shoes-easy",
1372
  "mismatches": [
1373
  {
1374
- "actual": null,
 
 
 
 
 
1375
  "expected": "Men's Fashion",
1376
  "path": "model_output.classification.iab_content.tier2.label"
1377
  },
@@ -1419,9 +1461,9 @@
1419
  },
1420
  {
1421
  "actual": {
1422
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1423
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1424
- "model_output.classification.iab_content.tier2.label": null,
1425
  "model_output.classification.iab_content.tier3.label": null
1426
  },
1427
  "expected": {
@@ -1433,7 +1475,12 @@
1433
  "id": "mens-shoes-hard",
1434
  "mismatches": [
1435
  {
1436
- "actual": null,
 
 
 
 
 
1437
  "expected": "Men's Fashion",
1438
  "path": "model_output.classification.iab_content.tier2.label"
1439
  },
@@ -1512,7 +1559,7 @@
1512
  "actual": {
1513
  "model_output.classification.iab_content.mapping_mode": "exact",
1514
  "model_output.classification.iab_content.tier1.label": "Travel",
1515
- "model_output.classification.iab_content.tier2.label": null
1516
  },
1517
  "expected": {
1518
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1525,11 +1572,6 @@
1525
  "actual": "exact",
1526
  "expected": "nearest_equivalent",
1527
  "path": "model_output.classification.iab_content.mapping_mode"
1528
- },
1529
- {
1530
- "actual": null,
1531
- "expected": "Travel Type",
1532
- "path": "model_output.classification.iab_content.tier2.label"
1533
  }
1534
  ],
1535
  "notes": "Cross-vertical hard IAB mapping case for Travel > Travel Type > Hotels and Motels.",
@@ -1628,10 +1670,10 @@
1628
  },
1629
  {
1630
  "actual": {
1631
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1632
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1633
- "model_output.classification.iab_content.tier2.label": null,
1634
- "model_output.classification.iab_content.tier3.label": null
1635
  },
1636
  "expected": {
1637
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1647,12 +1689,17 @@
1647
  "path": "model_output.classification.iab_content.tier1.label"
1648
  },
1649
  {
1650
- "actual": null,
 
 
 
 
 
1651
  "expected": "Business",
1652
  "path": "model_output.classification.iab_content.tier2.label"
1653
  },
1654
  {
1655
- "actual": null,
1656
  "expected": "Green Solutions",
1657
  "path": "model_output.classification.iab_content.tier3.label"
1658
  }
@@ -1706,8 +1753,8 @@
1706
  {
1707
  "actual": {
1708
  "model_output.classification.iab_content.mapping_mode": "exact",
1709
- "model_output.classification.iab_content.tier1.label": "Healthy Living",
1710
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1711
  "model_output.classification.iab_content.tier3.label": null
1712
  },
1713
  "expected": {
@@ -1718,11 +1765,21 @@
1718
  },
1719
  "id": "running-and-jogging-hard",
1720
  "mismatches": [
 
 
 
 
 
1721
  {
1722
  "actual": "exact",
1723
  "expected": "nearest_equivalent",
1724
  "path": "model_output.classification.iab_content.mapping_mode"
1725
  },
 
 
 
 
 
1726
  {
1727
  "actual": null,
1728
  "expected": "Running and Jogging",
@@ -1841,8 +1898,8 @@
1841
  {
1842
  "actual": {
1843
  "model_output.classification.iab_content.mapping_mode": "exact",
1844
- "model_output.classification.iab_content.tier1.label": "Books and Literature",
1845
- "model_output.classification.iab_content.tier2.label": "Fiction"
1846
  },
1847
  "expected": {
1848
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1851,10 +1908,20 @@
1851
  },
1852
  "id": "fiction-medium",
1853
  "mismatches": [
 
 
 
 
 
1854
  {
1855
  "actual": "exact",
1856
  "expected": "nearest_equivalent",
1857
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1858
  }
1859
  ],
1860
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
@@ -1888,7 +1955,7 @@
1888
  "actual": {
1889
  "model_output.classification.iab_content.mapping_mode": "exact",
1890
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1891
- "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1892
  },
1893
  "expected": {
1894
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1901,6 +1968,11 @@
1901
  "actual": "exact",
1902
  "expected": "nearest_equivalent",
1903
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1904
  }
1905
  ],
1906
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Home Improvement.",
@@ -1910,9 +1982,9 @@
1910
  },
1911
  {
1912
  "actual": {
1913
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1914
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1915
- "model_output.classification.iab_content.tier2.label": null,
1916
  "model_output.classification.iab_content.tier3.label": null
1917
  },
1918
  "expected": {
@@ -1929,7 +2001,12 @@
1929
  "path": "model_output.classification.iab_content.tier1.label"
1930
  },
1931
  {
1932
- "actual": null,
 
 
 
 
 
1933
  "expected": "Personal Care",
1934
  "path": "model_output.classification.iab_content.tier2.label"
1935
  },
@@ -1980,9 +2057,9 @@
1980
  },
1981
  {
1982
  "actual": {
1983
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1984
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1985
- "model_output.classification.iab_content.tier2.label": null
1986
  },
1987
  "expected": {
1988
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1997,7 +2074,12 @@
1997
  "path": "model_output.classification.iab_content.tier1.label"
1998
  },
1999
  {
2000
- "actual": null,
 
 
 
 
 
2001
  "expected": "Language Learning",
2002
  "path": "model_output.classification.iab_content.tier2.label"
2003
  }
@@ -2033,8 +2115,8 @@
2033
  },
2034
  {
2035
  "actual": {
2036
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2037
- "model_output.classification.iab_content.tier1.label": "Healthy Living"
2038
  },
2039
  "expected": {
2040
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2043,9 +2125,9 @@
2043
  "id": "online-education-hard",
2044
  "mismatches": [
2045
  {
2046
- "actual": "Healthy Living",
2047
- "expected": "Careers",
2048
- "path": "model_output.classification.iab_content.tier1.label"
2049
  }
2050
  ],
2051
  "notes": "Cross-vertical hard IAB mapping case for Education > Online Education.",
@@ -2136,10 +2218,10 @@
2136
  },
2137
  {
2138
  "actual": {
2139
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2140
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2141
  "model_output.classification.iab_content.tier2.label": "Diseases and Conditions",
2142
- "model_output.classification.iab_content.tier3.label": null
2143
  },
2144
  "expected": {
2145
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2150,9 +2232,9 @@
2150
  "id": "medical-health-easy",
2151
  "mismatches": [
2152
  {
2153
- "actual": null,
2154
- "expected": "Allergies",
2155
- "path": "model_output.classification.iab_content.tier3.label"
2156
  }
2157
  ],
2158
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
@@ -2165,7 +2247,7 @@
2165
  "model_output.classification.iab_content.mapping_mode": "exact",
2166
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2167
  "model_output.classification.iab_content.tier2.label": "Diseases and Conditions",
2168
- "model_output.classification.iab_content.tier3.label": "Injuries",
2169
  "model_output.classification.iab_content.tier4.label": null
2170
  },
2171
  "expected": {
@@ -2182,6 +2264,11 @@
2182
  "expected": "nearest_equivalent",
2183
  "path": "model_output.classification.iab_content.mapping_mode"
2184
  },
 
 
 
 
 
2185
  {
2186
  "actual": null,
2187
  "expected": "First Aid",
@@ -2197,7 +2284,7 @@
2197
  "actual": {
2198
  "model_output.classification.iab_content.mapping_mode": "exact",
2199
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2200
- "model_output.classification.iab_content.tier2.label": "Surgery",
2201
  "model_output.classification.iab_content.tier3.label": null
2202
  },
2203
  "expected": {
@@ -2219,7 +2306,7 @@
2219
  "path": "model_output.classification.iab_content.mapping_mode"
2220
  },
2221
  {
2222
- "actual": "Surgery",
2223
  "expected": "Wellness",
2224
  "path": "model_output.classification.iab_content.tier2.label"
2225
  },
@@ -2260,10 +2347,10 @@
2260
  },
2261
  {
2262
  "actual": {
2263
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2264
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
2265
  "model_output.classification.iab_content.tier2.label": "Business",
2266
- "model_output.classification.iab_content.tier3.label": null
2267
  },
2268
  "expected": {
2269
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2274,9 +2361,9 @@
2274
  "id": "careers-job-search-medium",
2275
  "mismatches": [
2276
  {
2277
- "actual": null,
2278
- "expected": "Sales",
2279
- "path": "model_output.classification.iab_content.tier3.label"
2280
  }
2281
  ],
2282
  "notes": "Cross-vertical medium IAB mapping case for Careers > Job Search.",
@@ -2286,9 +2373,9 @@
2286
  },
2287
  {
2288
  "actual": {
2289
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2290
  "model_output.classification.iab_content.tier1.label": "Genres",
2291
- "model_output.classification.iab_content.tier2.label": null
2292
  },
2293
  "expected": {
2294
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2303,7 +2390,12 @@
2303
  "path": "model_output.classification.iab_content.tier1.label"
2304
  },
2305
  {
2306
- "actual": null,
 
 
 
 
 
2307
  "expected": "Job Search",
2308
  "path": "model_output.classification.iab_content.tier2.label"
2309
  }
@@ -2315,9 +2407,9 @@
2315
  },
2316
  {
2317
  "actual": {
2318
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2319
- "model_output.classification.iab_content.tier1.label": "Personal Celebrations & Life Events",
2320
- "model_output.classification.iab_content.tier2.label": null
2321
  },
2322
  "expected": {
2323
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2327,12 +2419,17 @@
2327
  "id": "personal-finance-easy",
2328
  "mismatches": [
2329
  {
2330
- "actual": "Personal Celebrations & Life Events",
2331
  "expected": "Food & Drink",
2332
  "path": "model_output.classification.iab_content.tier1.label"
2333
  },
2334
  {
2335
- "actual": null,
 
 
 
 
 
2336
  "expected": "Food Movements",
2337
  "path": "model_output.classification.iab_content.tier2.label"
2338
  }
@@ -2434,8 +2531,8 @@
2434
  {
2435
  "actual": {
2436
  "model_output.classification.iab_content.mapping_mode": "exact",
2437
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
2438
- "model_output.classification.iab_content.tier2.label": "Content Production"
2439
  },
2440
  "expected": {
2441
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2445,7 +2542,7 @@
2445
  "id": "parenting-medium",
2446
  "mismatches": [
2447
  {
2448
- "actual": "Hobbies & Interests",
2449
  "expected": "Family and Relationships",
2450
  "path": "model_output.classification.iab_content.tier1.label"
2451
  },
@@ -2455,7 +2552,7 @@
2455
  "path": "model_output.classification.iab_content.mapping_mode"
2456
  },
2457
  {
2458
- "actual": "Content Production",
2459
  "expected": "Parenting",
2460
  "path": "model_output.classification.iab_content.tier2.label"
2461
  }
@@ -2470,7 +2567,7 @@
2470
  "model_output.classification.iab_content.mapping_mode": "exact",
2471
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2472
  "model_output.classification.iab_content.tier2.label": "Parenting",
2473
- "model_output.classification.iab_content.tier3.label": "Special Needs Kids"
2474
  },
2475
  "expected": {
2476
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2484,6 +2581,11 @@
2484
  "actual": "exact",
2485
  "expected": "nearest_equivalent",
2486
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
2487
  }
2488
  ],
2489
  "notes": "Cross-vertical hard IAB mapping case for Family and Relationships > Parenting.",
@@ -2515,9 +2617,9 @@
2515
  },
2516
  {
2517
  "actual": {
2518
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2519
- "model_output.classification.iab_content.tier1.label": "Food & Drink",
2520
- "model_output.classification.iab_content.tier2.label": null
2521
  },
2522
  "expected": {
2523
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2527,14 +2629,9 @@
2527
  "id": "gardening-medium",
2528
  "mismatches": [
2529
  {
2530
- "actual": "Food & Drink",
2531
- "expected": "Home & Garden",
2532
- "path": "model_output.classification.iab_content.tier1.label"
2533
- },
2534
- {
2535
- "actual": null,
2536
- "expected": "Gardening",
2537
- "path": "model_output.classification.iab_content.tier2.label"
2538
  }
2539
  ],
2540
  "notes": "Cross-vertical medium IAB mapping case for Home & Garden > Gardening.",
@@ -2591,8 +2688,8 @@
2591
  {
2592
  "actual": {
2593
  "model_output.classification.iab_content.mapping_mode": "exact",
2594
- "model_output.classification.iab_content.tier1.label": "Genres",
2595
- "model_output.classification.iab_content.tier2.label": "Horror",
2596
  "model_output.classification.iab_content.tier3.label": null
2597
  },
2598
  "expected": {
@@ -2604,7 +2701,7 @@
2604
  "id": "movies-medium",
2605
  "mismatches": [
2606
  {
2607
- "actual": "Genres",
2608
  "expected": "Video Gaming",
2609
  "path": "model_output.classification.iab_content.tier1.label"
2610
  },
@@ -2614,7 +2711,7 @@
2614
  "path": "model_output.classification.iab_content.mapping_mode"
2615
  },
2616
  {
2617
- "actual": "Horror",
2618
  "expected": "Video Game Genres",
2619
  "path": "model_output.classification.iab_content.tier2.label"
2620
  },
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 90,
5
+ "passed": 0,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_behavior_lock_cases.json",
10
  "count": 90,
11
+ "failed": 90,
12
+ "passed": 0,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "exact",
17
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
18
+ "model_output.classification.iab_content.tier2.label": "Insurance"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
+ "actual": "Personal Finance",
29
  "expected": "Travel",
30
  "path": "model_output.classification.iab_content.tier1.label"
31
  },
 
35
  "path": "model_output.classification.iab_content.mapping_mode"
36
  },
37
  {
38
+ "actual": "Insurance",
39
  "expected": "Travel Type",
40
  "path": "model_output.classification.iab_content.tier2.label"
41
  }
 
47
  },
48
  {
49
  "actual": {
50
+ "model_output.classification.iab_content.mapping_mode": "exact",
51
  "model_output.classification.iab_content.tier1.label": "Automotive",
52
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
53
  },
 
57
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
58
  },
59
  "id": "auto-buying-medium",
60
+ "mismatches": [
61
+ {
62
+ "actual": "exact",
63
+ "expected": "nearest_equivalent",
64
+ "path": "model_output.classification.iab_content.mapping_mode"
65
+ }
66
+ ],
67
  "notes": "Cross-vertical medium IAB mapping case for Automotive > Auto Buying and Selling.",
68
+ "pass": false,
69
  "status": "must_fix",
70
  "text": "Best used SUV for a family of four"
71
  },
72
  {
73
  "actual": {
74
+ "model_output.classification.iab_content.mapping_mode": "exact",
75
  "model_output.classification.iab_content.tier1.label": "Automotive",
76
+ "model_output.classification.iab_content.tier2.label": "Car Culture"
77
  },
78
  "expected": {
79
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
81
  "model_output.classification.iab_content.tier2.label": "Auto Type"
82
  },
83
  "id": "auto-buying-hard",
84
+ "mismatches": [
85
+ {
86
+ "actual": "exact",
87
+ "expected": "nearest_equivalent",
88
+ "path": "model_output.classification.iab_content.mapping_mode"
89
+ },
90
+ {
91
+ "actual": "Car Culture",
92
+ "expected": "Auto Type",
93
+ "path": "model_output.classification.iab_content.tier2.label"
94
+ }
95
+ ],
96
  "notes": "Cross-vertical hard IAB mapping case for Automotive > Auto Buying and Selling.",
97
+ "pass": false,
98
  "status": "must_fix",
99
  "text": "I need a shortlist of practical cars before making a purchase this month"
100
  },
101
  {
102
  "actual": {
103
+ "model_output.classification.iab_content.mapping_mode": "exact",
104
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
105
  "model_output.classification.iab_content.tier2.label": "Computing",
106
+ "model_output.classification.iab_content.tier3.label": "Software and Applications"
107
  },
108
  "expected": {
109
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
114
  "id": "sales-crm-easy",
115
  "mismatches": [
116
  {
117
+ "actual": "exact",
118
+ "expected": "nearest_equivalent",
119
+ "path": "model_output.classification.iab_content.mapping_mode"
120
  }
121
  ],
122
  "notes": "Cross-vertical easy IAB mapping case for Business and Finance > Business > Sales.",
 
126
  },
127
  {
128
  "actual": {
129
+ "model_output.classification.iab_content.mapping_mode": "exact",
130
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
131
+ "model_output.classification.iab_content.tier2.label": "Robotics",
132
  "model_output.classification.iab_content.tier3.label": null
133
  },
134
  "expected": {
 
140
  "id": "sales-crm-medium",
141
  "mismatches": [
142
  {
143
+ "actual": "exact",
144
+ "expected": "nearest_equivalent",
145
+ "path": "model_output.classification.iab_content.mapping_mode"
146
+ },
147
+ {
148
+ "actual": "Robotics",
149
  "expected": "Computing",
150
  "path": "model_output.classification.iab_content.tier2.label"
151
  },
 
162
  },
163
  {
164
  "actual": {
165
+ "model_output.classification.iab_content.mapping_mode": "exact",
166
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
167
  "model_output.classification.iab_content.tier2.label": "Business",
168
+ "model_output.classification.iab_content.tier3.label": "Sales"
169
  },
170
  "expected": {
171
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
176
  "id": "sales-crm-hard",
177
  "mismatches": [
178
  {
179
+ "actual": "exact",
180
+ "expected": "nearest_equivalent",
181
+ "path": "model_output.classification.iab_content.mapping_mode"
182
  }
183
  ],
184
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
 
188
  },
189
  {
190
  "actual": {
191
+ "model_output.classification.iab_content.mapping_mode": "exact",
192
+ "model_output.classification.iab_content.tier1.label": "Careers",
193
+ "model_output.classification.iab_content.tier2.label": "Job Search",
194
  "model_output.classification.iab_content.tier3.label": null
195
  },
196
  "expected": {
 
202
  "id": "marketing-tools-easy",
203
  "mismatches": [
204
  {
205
+ "actual": "Careers",
206
  "expected": "Technology & Computing",
207
  "path": "model_output.classification.iab_content.tier1.label"
208
  },
209
  {
210
+ "actual": "exact",
211
+ "expected": "nearest_equivalent",
212
+ "path": "model_output.classification.iab_content.mapping_mode"
213
+ },
214
+ {
215
+ "actual": "Job Search",
216
  "expected": "Computing",
217
  "path": "model_output.classification.iab_content.tier2.label"
218
  },
 
229
  },
230
  {
231
  "actual": {
232
+ "model_output.classification.iab_content.mapping_mode": "exact",
233
+ "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
234
+ "model_output.classification.iab_content.tier2.label": "Terrorism"
235
  },
236
  "expected": {
237
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
241
  "id": "marketing-tools-medium",
242
  "mismatches": [
243
  {
244
+ "actual": "Sensitive Topics",
245
  "expected": "Business and Finance",
246
  "path": "model_output.classification.iab_content.tier1.label"
247
  },
248
  {
249
+ "actual": "exact",
250
+ "expected": "nearest_equivalent",
251
+ "path": "model_output.classification.iab_content.mapping_mode"
252
+ },
253
+ {
254
+ "actual": "Terrorism",
255
  "expected": "Business",
256
  "path": "model_output.classification.iab_content.tier2.label"
257
  }
 
263
  },
264
  {
265
  "actual": {
266
+ "model_output.classification.iab_content.mapping_mode": "exact",
267
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
268
+ "model_output.classification.iab_content.tier2.label": "Home Utilities",
269
  "model_output.classification.iab_content.tier3.label": null
270
  },
271
  "expected": {
 
282
  "path": "model_output.classification.iab_content.tier1.label"
283
  },
284
  {
285
+ "actual": "exact",
286
+ "expected": "nearest_equivalent",
287
+ "path": "model_output.classification.iab_content.mapping_mode"
288
+ },
289
+ {
290
+ "actual": "Home Utilities",
291
  "expected": "Computing",
292
  "path": "model_output.classification.iab_content.tier2.label"
293
  },
 
304
  },
305
  {
306
  "actual": {
307
+ "model_output.classification.iab_content.mapping_mode": "exact",
308
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
309
  "model_output.classification.iab_content.tier2.label": "Computing",
310
+ "model_output.classification.iab_content.tier3.label": "Information and Network Security"
311
  },
312
  "expected": {
313
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
318
  "id": "business-it-easy",
319
  "mismatches": [
320
  {
321
+ "actual": "exact",
322
+ "expected": "nearest_equivalent",
323
+ "path": "model_output.classification.iab_content.mapping_mode"
324
+ },
325
+ {
326
+ "actual": "Information and Network Security",
327
  "expected": "Internet",
328
  "path": "model_output.classification.iab_content.tier3.label"
329
  }
 
335
  },
336
  {
337
  "actual": {
338
+ "model_output.classification.iab_content.mapping_mode": "exact",
339
+ "model_output.classification.iab_content.tier1.label": "Careers",
340
+ "model_output.classification.iab_content.tier2.label": "Job Search"
341
  },
342
  "expected": {
343
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
347
  "id": "business-it-medium",
348
  "mismatches": [
349
  {
350
+ "actual": "exact",
351
+ "expected": "nearest_equivalent",
352
+ "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
353
  }
354
  ],
355
  "notes": "Cross-vertical medium IAB mapping case for Business and Finance > Business > Business I.T..",
 
449
  {
450
  "actual": {
451
  "model_output.classification.iab_content.mapping_mode": "exact",
452
+ "model_output.classification.iab_content.tier1.label": "Attractions",
453
+ "model_output.classification.iab_content.tier2.label": "Bars & Restaurants"
454
  },
455
  "expected": {
456
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
459
  },
460
  "id": "dining-out-hard",
461
  "mismatches": [
462
+ {
463
+ "actual": "Attractions",
464
+ "expected": "Food & Drink",
465
+ "path": "model_output.classification.iab_content.tier1.label"
466
+ },
467
  {
468
  "actual": "exact",
469
  "expected": "nearest_equivalent",
470
  "path": "model_output.classification.iab_content.mapping_mode"
471
+ },
472
+ {
473
+ "actual": "Bars & Restaurants",
474
+ "expected": "Dining Out",
475
+ "path": "model_output.classification.iab_content.tier2.label"
476
  }
477
  ],
478
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
 
549
  {
550
  "actual": {
551
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
552
+ "model_output.classification.iab_content.tier1.label": "Science"
553
  },
554
  "expected": {
555
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
558
  "id": "artificial-intelligence-easy",
559
  "mismatches": [
560
  {
561
+ "actual": "Science",
562
  "expected": "Technology & Computing",
563
  "path": "model_output.classification.iab_content.tier1.label"
564
  }
 
618
  },
619
  {
620
  "actual": {
621
+ "model_output.classification.iab_content.mapping_mode": "exact",
622
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
623
  "model_output.classification.iab_content.tier2.label": "Computing"
624
  },
 
634
  "expected": "Business and Finance",
635
  "path": "model_output.classification.iab_content.tier1.label"
636
  },
637
+ {
638
+ "actual": "exact",
639
+ "expected": "nearest_equivalent",
640
+ "path": "model_output.classification.iab_content.mapping_mode"
641
+ },
642
  {
643
  "actual": "Computing",
644
  "expected": "Business",
 
652
  },
653
  {
654
  "actual": {
655
+ "model_output.classification.iab_content.mapping_mode": "exact",
656
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
657
  "model_output.classification.iab_content.tier2.label": "Computing",
658
+ "model_output.classification.iab_content.tier3.label": "Software and Applications",
659
  "model_output.classification.iab_content.tier4.label": null
660
  },
661
  "expected": {
 
668
  "id": "software-apps-medium",
669
  "mismatches": [
670
  {
671
+ "actual": "exact",
672
+ "expected": "nearest_equivalent",
673
+ "path": "model_output.classification.iab_content.mapping_mode"
674
+ },
675
+ {
676
+ "actual": "Software and Applications",
677
  "expected": "Internet",
678
  "path": "model_output.classification.iab_content.tier3.label"
679
  },
 
690
  },
691
  {
692
  "actual": {
693
+ "model_output.classification.iab_content.mapping_mode": "exact",
694
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
695
+ "model_output.classification.iab_content.tier2.label": "Virtual Reality",
696
  "model_output.classification.iab_content.tier3.label": null
697
  },
698
  "expected": {
 
704
  "id": "software-apps-hard",
705
  "mismatches": [
706
  {
707
+ "actual": "exact",
708
+ "expected": "nearest_equivalent",
709
+ "path": "model_output.classification.iab_content.mapping_mode"
710
  },
711
  {
712
+ "actual": "Virtual Reality",
713
  "expected": "Computing",
714
  "path": "model_output.classification.iab_content.tier2.label"
715
  },
 
774
  },
775
  {
776
  "actual": {
777
+ "model_output.classification.iab_content.mapping_mode": "exact",
778
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
779
  "model_output.classification.iab_content.tier2.label": "Computing",
780
+ "model_output.classification.iab_content.tier3.label": "Information and Network Security",
781
  "model_output.classification.iab_content.tier4.label": null
782
  },
783
  "expected": {
 
790
  "id": "communication-software-medium",
791
  "mismatches": [
792
  {
793
+ "actual": "exact",
794
+ "expected": "nearest_equivalent",
795
+ "path": "model_output.classification.iab_content.mapping_mode"
796
+ },
797
+ {
798
+ "actual": "Information and Network Security",
799
  "expected": "Software and Applications",
800
  "path": "model_output.classification.iab_content.tier3.label"
801
  },
 
812
  },
813
  {
814
  "actual": {
815
+ "model_output.classification.iab_content.mapping_mode": "exact",
816
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
817
+ "model_output.classification.iab_content.tier2.label": "Virtual Reality",
818
  "model_output.classification.iab_content.tier3.label": null,
819
  "model_output.classification.iab_content.tier4.label": null
820
  },
 
828
  "id": "communication-software-hard",
829
  "mismatches": [
830
  {
831
+ "actual": "exact",
832
+ "expected": "nearest_equivalent",
833
+ "path": "model_output.classification.iab_content.mapping_mode"
834
  },
835
  {
836
+ "actual": "Virtual Reality",
837
  "expected": "Computing",
838
  "path": "model_output.classification.iab_content.tier2.label"
839
  },
 
855
  },
856
  {
857
  "actual": {
858
+ "model_output.classification.iab_content.mapping_mode": "exact",
859
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
860
  "model_output.classification.iab_content.tier2.label": "Computing",
861
+ "model_output.classification.iab_content.tier3.label": "Internet",
862
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
863
  },
864
  "expected": {
865
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
871
  "id": "web-hosting-easy",
872
  "mismatches": [
873
  {
874
+ "actual": "exact",
875
+ "expected": "nearest_equivalent",
876
+ "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
877
  }
878
  ],
879
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
 
883
  },
884
  {
885
  "actual": {
886
+ "model_output.classification.iab_content.mapping_mode": "exact",
887
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
888
  "model_output.classification.iab_content.tier2.label": "Computing",
889
+ "model_output.classification.iab_content.tier3.label": "Internet",
890
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
891
  },
892
  "expected": {
893
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
899
  "id": "web-hosting-medium",
900
  "mismatches": [
901
  {
902
+ "actual": "exact",
903
+ "expected": "nearest_equivalent",
904
+ "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
905
  }
906
  ],
907
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
 
911
  },
912
  {
913
  "actual": {
914
+ "model_output.classification.iab_content.mapping_mode": "exact",
915
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
916
  "model_output.classification.iab_content.tier2.label": "Computing",
917
+ "model_output.classification.iab_content.tier3.label": "Internet",
918
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
919
  },
920
  "expected": {
921
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
927
  "id": "web-hosting-hard",
928
  "mismatches": [
929
  {
930
+ "actual": "exact",
931
+ "expected": "nearest_equivalent",
932
+ "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
933
  }
934
  ],
935
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
 
939
  },
940
  {
941
  "actual": {
942
+ "model_output.classification.iab_content.mapping_mode": "exact",
943
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
944
+ "model_output.classification.iab_content.tier2.label": "Computing",
945
+ "model_output.classification.iab_content.tier3.label": "Laptops"
946
  },
947
  "expected": {
948
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
953
  "id": "laptops-easy",
954
  "mismatches": [
955
  {
956
+ "actual": "exact",
957
+ "expected": "nearest_equivalent",
958
+ "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
959
  }
960
  ],
961
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
 
991
  },
992
  {
993
  "actual": {
994
+ "model_output.classification.iab_content.mapping_mode": "exact",
995
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
996
  "model_output.classification.iab_content.tier2.label": "Computing",
997
+ "model_output.classification.iab_content.tier3.label": "Laptops"
998
  },
999
  "expected": {
1000
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1004
  },
1005
  "id": "laptops-hard",
1006
  "mismatches": [
1007
+ {
1008
+ "actual": "exact",
1009
+ "expected": "nearest_equivalent",
1010
+ "path": "model_output.classification.iab_content.mapping_mode"
1011
+ },
1012
  {
1013
  "actual": "Computing",
1014
  "expected": "Consumer Electronics",
1015
  "path": "model_output.classification.iab_content.tier2.label"
1016
  },
1017
  {
1018
+ "actual": "Laptops",
1019
  "expected": "Smartphones",
1020
  "path": "model_output.classification.iab_content.tier3.label"
1021
  }
 
1027
  },
1028
  {
1029
  "actual": {
1030
+ "model_output.classification.iab_content.mapping_mode": "exact",
1031
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1032
  "model_output.classification.iab_content.tier2.label": "Computing",
1033
  "model_output.classification.iab_content.tier3.label": "Software and Applications",
1034
+ "model_output.classification.iab_content.tier4.label": "Computer Animation"
1035
  },
1036
  "expected": {
1037
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1043
  "id": "desktops-easy",
1044
  "mismatches": [
1045
  {
1046
+ "actual": "exact",
1047
+ "expected": "nearest_equivalent",
1048
+ "path": "model_output.classification.iab_content.mapping_mode"
1049
+ },
1050
+ {
1051
+ "actual": "Computer Animation",
1052
  "expected": "Photo Editing Software",
1053
  "path": "model_output.classification.iab_content.tier4.label"
1054
  }
 
1060
  },
1061
  {
1062
  "actual": {
1063
+ "model_output.classification.iab_content.mapping_mode": "exact",
1064
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1065
  "model_output.classification.iab_content.tier2.label": "Computing",
1066
+ "model_output.classification.iab_content.tier3.label": "Desktops"
1067
  },
1068
  "expected": {
1069
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1074
  "id": "desktops-medium",
1075
  "mismatches": [
1076
  {
1077
+ "actual": "exact",
1078
+ "expected": "nearest_equivalent",
1079
+ "path": "model_output.classification.iab_content.mapping_mode"
1080
  }
1081
  ],
1082
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
 
1139
  "model_output.classification.iab_content.mapping_mode": "exact",
1140
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1141
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1142
+ "model_output.classification.iab_content.tier3.label": "Smartphones"
1143
  },
1144
  "expected": {
1145
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1153
  "actual": "exact",
1154
  "expected": "nearest_equivalent",
1155
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1156
  }
1157
  ],
1158
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
 
1165
  "model_output.classification.iab_content.mapping_mode": "exact",
1166
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1167
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1168
+ "model_output.classification.iab_content.tier3.label": "Smartphones"
1169
  },
1170
  "expected": {
1171
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1179
  "actual": "exact",
1180
  "expected": "nearest_equivalent",
1181
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1182
  }
1183
  ],
1184
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
 
1190
  "actual": {
1191
  "model_output.classification.iab_content.mapping_mode": "exact",
1192
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1193
+ "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1194
+ "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1195
  },
1196
  "expected": {
1197
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1207
  "path": "model_output.classification.iab_content.mapping_mode"
1208
  },
1209
  {
1210
+ "actual": "Men's Fashion",
1211
  "expected": "Women's Fashion",
1212
  "path": "model_output.classification.iab_content.tier2.label"
1213
  },
1214
  {
1215
+ "actual": "Men's Shoes and Footwear",
1216
  "expected": "Women's Shoes and Footwear",
1217
  "path": "model_output.classification.iab_content.tier3.label"
1218
  }
 
1262
  "actual": {
1263
  "model_output.classification.iab_content.mapping_mode": "exact",
1264
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1265
+ "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1266
+ "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1267
  },
1268
  "expected": {
1269
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1277
  "actual": "exact",
1278
  "expected": "nearest_equivalent",
1279
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1280
  }
1281
  ],
1282
  "notes": "Cross-vertical hard IAB mapping case for Style & Fashion.",
 
1286
  },
1287
  {
1288
  "actual": {
1289
+ "model_output.classification.iab_content.mapping_mode": "exact",
1290
  "model_output.classification.iab_content.tier1.label": "Sports",
1291
+ "model_output.classification.iab_content.tier2.label": "Bodybuilding",
1292
  "model_output.classification.iab_content.tier3.label": null
1293
  },
1294
  "expected": {
 
1305
  "path": "model_output.classification.iab_content.tier1.label"
1306
  },
1307
  {
1308
+ "actual": "exact",
1309
+ "expected": "nearest_equivalent",
1310
+ "path": "model_output.classification.iab_content.mapping_mode"
1311
+ },
1312
+ {
1313
+ "actual": "Bodybuilding",
1314
  "expected": "Women's Fashion",
1315
  "path": "model_output.classification.iab_content.tier2.label"
1316
  },
 
1394
  },
1395
  {
1396
  "actual": {
1397
+ "model_output.classification.iab_content.mapping_mode": "exact",
1398
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1399
+ "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1400
  "model_output.classification.iab_content.tier3.label": null
1401
  },
1402
  "expected": {
 
1408
  "id": "mens-shoes-easy",
1409
  "mismatches": [
1410
  {
1411
+ "actual": "exact",
1412
+ "expected": "nearest_equivalent",
1413
+ "path": "model_output.classification.iab_content.mapping_mode"
1414
+ },
1415
+ {
1416
+ "actual": "Children's Clothing",
1417
  "expected": "Men's Fashion",
1418
  "path": "model_output.classification.iab_content.tier2.label"
1419
  },
 
1461
  },
1462
  {
1463
  "actual": {
1464
+ "model_output.classification.iab_content.mapping_mode": "exact",
1465
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1466
+ "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1467
  "model_output.classification.iab_content.tier3.label": null
1468
  },
1469
  "expected": {
 
1475
  "id": "mens-shoes-hard",
1476
  "mismatches": [
1477
  {
1478
+ "actual": "exact",
1479
+ "expected": "nearest_equivalent",
1480
+ "path": "model_output.classification.iab_content.mapping_mode"
1481
+ },
1482
+ {
1483
+ "actual": "Children's Clothing",
1484
  "expected": "Men's Fashion",
1485
  "path": "model_output.classification.iab_content.tier2.label"
1486
  },
 
1559
  "actual": {
1560
  "model_output.classification.iab_content.mapping_mode": "exact",
1561
  "model_output.classification.iab_content.tier1.label": "Travel",
1562
+ "model_output.classification.iab_content.tier2.label": "Travel Type"
1563
  },
1564
  "expected": {
1565
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1572
  "actual": "exact",
1573
  "expected": "nearest_equivalent",
1574
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1575
  }
1576
  ],
1577
  "notes": "Cross-vertical hard IAB mapping case for Travel > Travel Type > Hotels and Motels.",
 
1670
  },
1671
  {
1672
  "actual": {
1673
+ "model_output.classification.iab_content.mapping_mode": "exact",
1674
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1675
+ "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1676
+ "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1677
  },
1678
  "expected": {
1679
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1689
  "path": "model_output.classification.iab_content.tier1.label"
1690
  },
1691
  {
1692
+ "actual": "exact",
1693
+ "expected": "nearest_equivalent",
1694
+ "path": "model_output.classification.iab_content.mapping_mode"
1695
+ },
1696
+ {
1697
+ "actual": "Fitness and Exercise",
1698
  "expected": "Business",
1699
  "path": "model_output.classification.iab_content.tier2.label"
1700
  },
1701
  {
1702
+ "actual": "Running and Jogging",
1703
  "expected": "Green Solutions",
1704
  "path": "model_output.classification.iab_content.tier3.label"
1705
  }
 
1753
  {
1754
  "actual": {
1755
  "model_output.classification.iab_content.mapping_mode": "exact",
1756
+ "model_output.classification.iab_content.tier1.label": "Sports",
1757
+ "model_output.classification.iab_content.tier2.label": "Walking",
1758
  "model_output.classification.iab_content.tier3.label": null
1759
  },
1760
  "expected": {
 
1765
  },
1766
  "id": "running-and-jogging-hard",
1767
  "mismatches": [
1768
+ {
1769
+ "actual": "Sports",
1770
+ "expected": "Healthy Living",
1771
+ "path": "model_output.classification.iab_content.tier1.label"
1772
+ },
1773
  {
1774
  "actual": "exact",
1775
  "expected": "nearest_equivalent",
1776
  "path": "model_output.classification.iab_content.mapping_mode"
1777
  },
1778
+ {
1779
+ "actual": "Walking",
1780
+ "expected": "Fitness and Exercise",
1781
+ "path": "model_output.classification.iab_content.tier2.label"
1782
+ },
1783
  {
1784
  "actual": null,
1785
  "expected": "Running and Jogging",
 
1898
  {
1899
  "actual": {
1900
  "model_output.classification.iab_content.mapping_mode": "exact",
1901
+ "model_output.classification.iab_content.tier1.label": "Travel",
1902
+ "model_output.classification.iab_content.tier2.label": "Travel Type"
1903
  },
1904
  "expected": {
1905
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1908
  },
1909
  "id": "fiction-medium",
1910
  "mismatches": [
1911
+ {
1912
+ "actual": "Travel",
1913
+ "expected": "Books and Literature",
1914
+ "path": "model_output.classification.iab_content.tier1.label"
1915
+ },
1916
  {
1917
  "actual": "exact",
1918
  "expected": "nearest_equivalent",
1919
  "path": "model_output.classification.iab_content.mapping_mode"
1920
+ },
1921
+ {
1922
+ "actual": "Travel Type",
1923
+ "expected": "Fiction",
1924
+ "path": "model_output.classification.iab_content.tier2.label"
1925
  }
1926
  ],
1927
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
 
1955
  "actual": {
1956
  "model_output.classification.iab_content.mapping_mode": "exact",
1957
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1958
+ "model_output.classification.iab_content.tier2.label": "Interior Decorating"
1959
  },
1960
  "expected": {
1961
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1968
  "actual": "exact",
1969
  "expected": "nearest_equivalent",
1970
  "path": "model_output.classification.iab_content.mapping_mode"
1971
+ },
1972
+ {
1973
+ "actual": "Interior Decorating",
1974
+ "expected": "Remodeling & Construction",
1975
+ "path": "model_output.classification.iab_content.tier2.label"
1976
  }
1977
  ],
1978
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Home Improvement.",
 
1982
  },
1983
  {
1984
  "actual": {
1985
+ "model_output.classification.iab_content.mapping_mode": "exact",
1986
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1987
+ "model_output.classification.iab_content.tier2.label": "Interior Decorating",
1988
  "model_output.classification.iab_content.tier3.label": null
1989
  },
1990
  "expected": {
 
2001
  "path": "model_output.classification.iab_content.tier1.label"
2002
  },
2003
  {
2004
+ "actual": "exact",
2005
+ "expected": "nearest_equivalent",
2006
+ "path": "model_output.classification.iab_content.mapping_mode"
2007
+ },
2008
+ {
2009
+ "actual": "Interior Decorating",
2010
  "expected": "Personal Care",
2011
  "path": "model_output.classification.iab_content.tier2.label"
2012
  },
 
2057
  },
2058
  {
2059
  "actual": {
2060
+ "model_output.classification.iab_content.mapping_mode": "exact",
2061
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
2062
+ "model_output.classification.iab_content.tier2.label": "Augmented Reality"
2063
  },
2064
  "expected": {
2065
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2074
  "path": "model_output.classification.iab_content.tier1.label"
2075
  },
2076
  {
2077
+ "actual": "exact",
2078
+ "expected": "nearest_equivalent",
2079
+ "path": "model_output.classification.iab_content.mapping_mode"
2080
+ },
2081
+ {
2082
+ "actual": "Augmented Reality",
2083
  "expected": "Language Learning",
2084
  "path": "model_output.classification.iab_content.tier2.label"
2085
  }
 
2115
  },
2116
  {
2117
  "actual": {
2118
+ "model_output.classification.iab_content.mapping_mode": "exact",
2119
+ "model_output.classification.iab_content.tier1.label": "Careers"
2120
  },
2121
  "expected": {
2122
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2125
  "id": "online-education-hard",
2126
  "mismatches": [
2127
  {
2128
+ "actual": "exact",
2129
+ "expected": "nearest_equivalent",
2130
+ "path": "model_output.classification.iab_content.mapping_mode"
2131
  }
2132
  ],
2133
  "notes": "Cross-vertical hard IAB mapping case for Education > Online Education.",
 
2218
  },
2219
  {
2220
  "actual": {
2221
+ "model_output.classification.iab_content.mapping_mode": "exact",
2222
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2223
  "model_output.classification.iab_content.tier2.label": "Diseases and Conditions",
2224
+ "model_output.classification.iab_content.tier3.label": "Allergies"
2225
  },
2226
  "expected": {
2227
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2232
  "id": "medical-health-easy",
2233
  "mismatches": [
2234
  {
2235
+ "actual": "exact",
2236
+ "expected": "nearest_equivalent",
2237
+ "path": "model_output.classification.iab_content.mapping_mode"
2238
  }
2239
  ],
2240
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
 
2247
  "model_output.classification.iab_content.mapping_mode": "exact",
2248
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2249
  "model_output.classification.iab_content.tier2.label": "Diseases and Conditions",
2250
+ "model_output.classification.iab_content.tier3.label": "Bone and Joint Conditions",
2251
  "model_output.classification.iab_content.tier4.label": null
2252
  },
2253
  "expected": {
 
2264
  "expected": "nearest_equivalent",
2265
  "path": "model_output.classification.iab_content.mapping_mode"
2266
  },
2267
+ {
2268
+ "actual": "Bone and Joint Conditions",
2269
+ "expected": "Injuries",
2270
+ "path": "model_output.classification.iab_content.tier3.label"
2271
+ },
2272
  {
2273
  "actual": null,
2274
  "expected": "First Aid",
 
2284
  "actual": {
2285
  "model_output.classification.iab_content.mapping_mode": "exact",
2286
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2287
+ "model_output.classification.iab_content.tier2.label": null,
2288
  "model_output.classification.iab_content.tier3.label": null
2289
  },
2290
  "expected": {
 
2306
  "path": "model_output.classification.iab_content.mapping_mode"
2307
  },
2308
  {
2309
+ "actual": null,
2310
  "expected": "Wellness",
2311
  "path": "model_output.classification.iab_content.tier2.label"
2312
  },
 
2347
  },
2348
  {
2349
  "actual": {
2350
+ "model_output.classification.iab_content.mapping_mode": "exact",
2351
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
2352
  "model_output.classification.iab_content.tier2.label": "Business",
2353
+ "model_output.classification.iab_content.tier3.label": "Sales"
2354
  },
2355
  "expected": {
2356
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2361
  "id": "careers-job-search-medium",
2362
  "mismatches": [
2363
  {
2364
+ "actual": "exact",
2365
+ "expected": "nearest_equivalent",
2366
+ "path": "model_output.classification.iab_content.mapping_mode"
2367
  }
2368
  ],
2369
  "notes": "Cross-vertical medium IAB mapping case for Careers > Job Search.",
 
2373
  },
2374
  {
2375
  "actual": {
2376
+ "model_output.classification.iab_content.mapping_mode": "exact",
2377
  "model_output.classification.iab_content.tier1.label": "Genres",
2378
+ "model_output.classification.iab_content.tier2.label": "Talk Show"
2379
  },
2380
  "expected": {
2381
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2390
  "path": "model_output.classification.iab_content.tier1.label"
2391
  },
2392
  {
2393
+ "actual": "exact",
2394
+ "expected": "nearest_equivalent",
2395
+ "path": "model_output.classification.iab_content.mapping_mode"
2396
+ },
2397
+ {
2398
+ "actual": "Talk Show",
2399
  "expected": "Job Search",
2400
  "path": "model_output.classification.iab_content.tier2.label"
2401
  }
 
2407
  },
2408
  {
2409
  "actual": {
2410
+ "model_output.classification.iab_content.mapping_mode": "exact",
2411
+ "model_output.classification.iab_content.tier1.label": "Holidays",
2412
+ "model_output.classification.iab_content.tier2.label": "National & Civic Holidays"
2413
  },
2414
  "expected": {
2415
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2419
  "id": "personal-finance-easy",
2420
  "mismatches": [
2421
  {
2422
+ "actual": "Holidays",
2423
  "expected": "Food & Drink",
2424
  "path": "model_output.classification.iab_content.tier1.label"
2425
  },
2426
  {
2427
+ "actual": "exact",
2428
+ "expected": "nearest_equivalent",
2429
+ "path": "model_output.classification.iab_content.mapping_mode"
2430
+ },
2431
+ {
2432
+ "actual": "National & Civic Holidays",
2433
  "expected": "Food Movements",
2434
  "path": "model_output.classification.iab_content.tier2.label"
2435
  }
 
2531
  {
2532
  "actual": {
2533
  "model_output.classification.iab_content.mapping_mode": "exact",
2534
+ "model_output.classification.iab_content.tier1.label": "Genres",
2535
+ "model_output.classification.iab_content.tier2.label": "Family/Children"
2536
  },
2537
  "expected": {
2538
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2542
  "id": "parenting-medium",
2543
  "mismatches": [
2544
  {
2545
+ "actual": "Genres",
2546
  "expected": "Family and Relationships",
2547
  "path": "model_output.classification.iab_content.tier1.label"
2548
  },
 
2552
  "path": "model_output.classification.iab_content.mapping_mode"
2553
  },
2554
  {
2555
+ "actual": "Family/Children",
2556
  "expected": "Parenting",
2557
  "path": "model_output.classification.iab_content.tier2.label"
2558
  }
 
2567
  "model_output.classification.iab_content.mapping_mode": "exact",
2568
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2569
  "model_output.classification.iab_content.tier2.label": "Parenting",
2570
+ "model_output.classification.iab_content.tier3.label": null
2571
  },
2572
  "expected": {
2573
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2581
  "actual": "exact",
2582
  "expected": "nearest_equivalent",
2583
  "path": "model_output.classification.iab_content.mapping_mode"
2584
+ },
2585
+ {
2586
+ "actual": null,
2587
+ "expected": "Special Needs Kids",
2588
+ "path": "model_output.classification.iab_content.tier3.label"
2589
  }
2590
  ],
2591
  "notes": "Cross-vertical hard IAB mapping case for Family and Relationships > Parenting.",
 
2617
  },
2618
  {
2619
  "actual": {
2620
+ "model_output.classification.iab_content.mapping_mode": "exact",
2621
+ "model_output.classification.iab_content.tier1.label": "Home & Garden",
2622
+ "model_output.classification.iab_content.tier2.label": "Gardening"
2623
  },
2624
  "expected": {
2625
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2629
  "id": "gardening-medium",
2630
  "mismatches": [
2631
  {
2632
+ "actual": "exact",
2633
+ "expected": "nearest_equivalent",
2634
+ "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
2635
  }
2636
  ],
2637
  "notes": "Cross-vertical medium IAB mapping case for Home & Garden > Gardening.",
 
2688
  {
2689
  "actual": {
2690
  "model_output.classification.iab_content.mapping_mode": "exact",
2691
+ "model_output.classification.iab_content.tier1.label": "Entertainment",
2692
+ "model_output.classification.iab_content.tier2.label": "Movies",
2693
  "model_output.classification.iab_content.tier3.label": null
2694
  },
2695
  "expected": {
 
2701
  "id": "movies-medium",
2702
  "mismatches": [
2703
  {
2704
+ "actual": "Entertainment",
2705
  "expected": "Video Gaming",
2706
  "path": "model_output.classification.iab_content.tier1.label"
2707
  },
 
2711
  "path": "model_output.classification.iab_content.mapping_mode"
2712
  },
2713
  {
2714
+ "actual": "Movies",
2715
  "expected": "Video Game Genres",
2716
  "path": "model_output.classification.iab_content.tier2.label"
2717
  },
artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 63,
5
- "passed": 27,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
- "failed": 63,
12
- "passed": 27,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "exact",
17
- "model_output.classification.iab_content.tier1.label": "Automotive",
18
- "model_output.classification.iab_content.tier2.label": "Auto Rentals"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -24,13 +24,18 @@
24
  },
25
  "id": "auto-buying-easy",
26
  "mismatches": [
 
 
 
 
 
27
  {
28
  "actual": "exact",
29
  "expected": "nearest_equivalent",
30
  "path": "model_output.classification.iab_content.mapping_mode"
31
  },
32
  {
33
- "actual": "Auto Rentals",
34
  "expected": "Auto Buying and Selling",
35
  "path": "model_output.classification.iab_content.tier2.label"
36
  }
@@ -42,7 +47,7 @@
42
  },
43
  {
44
  "actual": {
45
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
46
  "model_output.classification.iab_content.tier1.label": "Automotive",
47
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
48
  },
@@ -53,6 +58,11 @@
53
  },
54
  "id": "auto-buying-medium",
55
  "mismatches": [
 
 
 
 
 
56
  {
57
  "actual": "Auto Body Styles",
58
  "expected": "Auto Buying and Selling",
@@ -66,9 +76,9 @@
66
  },
67
  {
68
  "actual": {
69
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
70
  "model_output.classification.iab_content.tier1.label": "Automotive",
71
- "model_output.classification.iab_content.tier2.label": "Auto Type"
72
  },
73
  "expected": {
74
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -78,7 +88,12 @@
78
  "id": "auto-buying-hard",
79
  "mismatches": [
80
  {
81
- "actual": "Auto Type",
 
 
 
 
 
82
  "expected": "Auto Buying and Selling",
83
  "path": "model_output.classification.iab_content.tier2.label"
84
  }
@@ -90,10 +105,10 @@
90
  },
91
  {
92
  "actual": {
93
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
94
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
95
  "model_output.classification.iab_content.tier2.label": "Computing",
96
- "model_output.classification.iab_content.tier3.label": null
97
  },
98
  "expected": {
99
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -108,18 +123,13 @@
108
  "expected": "Business and Finance",
109
  "path": "model_output.classification.iab_content.tier1.label"
110
  },
111
- {
112
- "actual": "nearest_equivalent",
113
- "expected": "exact",
114
- "path": "model_output.classification.iab_content.mapping_mode"
115
- },
116
  {
117
  "actual": "Computing",
118
  "expected": "Business",
119
  "path": "model_output.classification.iab_content.tier2.label"
120
  },
121
  {
122
- "actual": null,
123
  "expected": "Sales",
124
  "path": "model_output.classification.iab_content.tier3.label"
125
  }
@@ -131,9 +141,9 @@
131
  },
132
  {
133
  "actual": {
134
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
135
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
136
- "model_output.classification.iab_content.tier2.label": null,
137
  "model_output.classification.iab_content.tier3.label": null
138
  },
139
  "expected": {
@@ -150,12 +160,7 @@
150
  "path": "model_output.classification.iab_content.tier1.label"
151
  },
152
  {
153
- "actual": "nearest_equivalent",
154
- "expected": "exact",
155
- "path": "model_output.classification.iab_content.mapping_mode"
156
- },
157
- {
158
- "actual": null,
159
  "expected": "Business",
160
  "path": "model_output.classification.iab_content.tier2.label"
161
  },
@@ -172,10 +177,10 @@
172
  },
173
  {
174
  "actual": {
175
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
176
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
177
  "model_output.classification.iab_content.tier2.label": "Business",
178
- "model_output.classification.iab_content.tier3.label": null
179
  },
180
  "expected": {
181
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -184,28 +189,17 @@
184
  "model_output.classification.iab_content.tier3.label": "Sales"
185
  },
186
  "id": "sales-crm-hard",
187
- "mismatches": [
188
- {
189
- "actual": "nearest_equivalent",
190
- "expected": "exact",
191
- "path": "model_output.classification.iab_content.mapping_mode"
192
- },
193
- {
194
- "actual": null,
195
- "expected": "Sales",
196
- "path": "model_output.classification.iab_content.tier3.label"
197
- }
198
- ],
199
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
200
- "pass": false,
201
  "status": "must_fix",
202
  "text": "Need software to manage leads and pipeline for a startup sales team"
203
  },
204
  {
205
  "actual": {
206
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
207
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
208
- "model_output.classification.iab_content.tier2.label": null,
209
  "model_output.classification.iab_content.tier3.label": null
210
  },
211
  "expected": {
@@ -217,17 +211,12 @@
217
  "id": "marketing-tools-easy",
218
  "mismatches": [
219
  {
220
- "actual": "Hobbies & Interests",
221
  "expected": "Business and Finance",
222
  "path": "model_output.classification.iab_content.tier1.label"
223
  },
224
  {
225
- "actual": "nearest_equivalent",
226
- "expected": "exact",
227
- "path": "model_output.classification.iab_content.mapping_mode"
228
- },
229
- {
230
- "actual": null,
231
  "expected": "Business",
232
  "path": "model_output.classification.iab_content.tier2.label"
233
  },
@@ -244,9 +233,9 @@
244
  },
245
  {
246
  "actual": {
247
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
248
- "model_output.classification.iab_content.tier1.label": "Careers",
249
- "model_output.classification.iab_content.tier2.label": null,
250
  "model_output.classification.iab_content.tier3.label": null
251
  },
252
  "expected": {
@@ -258,17 +247,12 @@
258
  "id": "marketing-tools-medium",
259
  "mismatches": [
260
  {
261
- "actual": "Careers",
262
  "expected": "Business and Finance",
263
  "path": "model_output.classification.iab_content.tier1.label"
264
  },
265
  {
266
- "actual": "nearest_equivalent",
267
- "expected": "exact",
268
- "path": "model_output.classification.iab_content.mapping_mode"
269
- },
270
- {
271
- "actual": null,
272
  "expected": "Business",
273
  "path": "model_output.classification.iab_content.tier2.label"
274
  },
@@ -285,9 +269,9 @@
285
  },
286
  {
287
  "actual": {
288
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
289
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
290
- "model_output.classification.iab_content.tier2.label": null,
291
  "model_output.classification.iab_content.tier3.label": null
292
  },
293
  "expected": {
@@ -304,12 +288,7 @@
304
  "path": "model_output.classification.iab_content.tier1.label"
305
  },
306
  {
307
- "actual": "nearest_equivalent",
308
- "expected": "exact",
309
- "path": "model_output.classification.iab_content.mapping_mode"
310
- },
311
- {
312
- "actual": null,
313
  "expected": "Business",
314
  "path": "model_output.classification.iab_content.tier2.label"
315
  },
@@ -326,10 +305,10 @@
326
  },
327
  {
328
  "actual": {
329
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
330
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
331
  "model_output.classification.iab_content.tier2.label": "Computing",
332
- "model_output.classification.iab_content.tier3.label": null
333
  },
334
  "expected": {
335
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -344,18 +323,13 @@
344
  "expected": "Business and Finance",
345
  "path": "model_output.classification.iab_content.tier1.label"
346
  },
347
- {
348
- "actual": "nearest_equivalent",
349
- "expected": "exact",
350
- "path": "model_output.classification.iab_content.mapping_mode"
351
- },
352
  {
353
  "actual": "Computing",
354
  "expected": "Business",
355
  "path": "model_output.classification.iab_content.tier2.label"
356
  },
357
  {
358
- "actual": null,
359
  "expected": "Business I.T.",
360
  "path": "model_output.classification.iab_content.tier3.label"
361
  }
@@ -367,9 +341,9 @@
367
  },
368
  {
369
  "actual": {
370
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
371
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
372
- "model_output.classification.iab_content.tier2.label": null,
373
  "model_output.classification.iab_content.tier3.label": null
374
  },
375
  "expected": {
@@ -381,17 +355,12 @@
381
  "id": "business-it-medium",
382
  "mismatches": [
383
  {
384
- "actual": "Personal Finance",
385
  "expected": "Business and Finance",
386
  "path": "model_output.classification.iab_content.tier1.label"
387
  },
388
  {
389
- "actual": "nearest_equivalent",
390
- "expected": "exact",
391
- "path": "model_output.classification.iab_content.mapping_mode"
392
- },
393
- {
394
- "actual": null,
395
  "expected": "Business",
396
  "path": "model_output.classification.iab_content.tier2.label"
397
  },
@@ -492,8 +461,8 @@
492
  {
493
  "actual": {
494
  "model_output.classification.iab_content.mapping_mode": "exact",
495
- "model_output.classification.iab_content.tier1.label": "Food & Drink",
496
- "model_output.classification.iab_content.tier2.label": "Dining Out"
497
  },
498
  "expected": {
499
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -501,9 +470,20 @@
501
  "model_output.classification.iab_content.tier2.label": "Dining Out"
502
  },
503
  "id": "dining-out-hard",
504
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
505
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
506
- "pass": true,
507
  "status": "must_fix",
508
  "text": "Need a place to eat tonight where I can make a reservation online"
509
  },
@@ -547,7 +527,7 @@
547
  "actual": {
548
  "model_output.classification.iab_content.mapping_mode": "exact",
549
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
550
- "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
551
  },
552
  "expected": {
553
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -555,16 +535,22 @@
555
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
556
  },
557
  "id": "alcoholic-beverages-hard",
558
- "mismatches": [],
 
 
 
 
 
 
559
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Alcoholic Beverages.",
560
- "pass": true,
561
  "status": "must_fix",
562
  "text": "Want a spirit-forward drink recommendation, not a restaurant suggestion"
563
  },
564
  {
565
  "actual": {
566
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
567
- "model_output.classification.iab_content.tier1.label": "Sports",
568
  "model_output.classification.iab_content.tier2.label": null
569
  },
570
  "expected": {
@@ -575,7 +561,7 @@
575
  "id": "artificial-intelligence-easy",
576
  "mismatches": [
577
  {
578
- "actual": "Sports",
579
  "expected": "Technology & Computing",
580
  "path": "model_output.classification.iab_content.tier1.label"
581
  },
@@ -655,10 +641,10 @@
655
  },
656
  {
657
  "actual": {
658
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
659
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
660
  "model_output.classification.iab_content.tier2.label": "Computing",
661
- "model_output.classification.iab_content.tier3.label": null
662
  },
663
  "expected": {
664
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -667,29 +653,18 @@
667
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
668
  },
669
  "id": "software-apps-easy",
670
- "mismatches": [
671
- {
672
- "actual": "nearest_equivalent",
673
- "expected": "exact",
674
- "path": "model_output.classification.iab_content.mapping_mode"
675
- },
676
- {
677
- "actual": null,
678
- "expected": "Software and Applications",
679
- "path": "model_output.classification.iab_content.tier3.label"
680
- }
681
- ],
682
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
683
- "pass": false,
684
  "status": "must_fix",
685
  "text": "Best workflow software for a small operations team"
686
  },
687
  {
688
  "actual": {
689
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
690
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
691
  "model_output.classification.iab_content.tier2.label": "Computing",
692
- "model_output.classification.iab_content.tier3.label": null
693
  },
694
  "expected": {
695
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -698,28 +673,17 @@
698
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
699
  },
700
  "id": "software-apps-medium",
701
- "mismatches": [
702
- {
703
- "actual": "nearest_equivalent",
704
- "expected": "exact",
705
- "path": "model_output.classification.iab_content.mapping_mode"
706
- },
707
- {
708
- "actual": null,
709
- "expected": "Software and Applications",
710
- "path": "model_output.classification.iab_content.tier3.label"
711
- }
712
- ],
713
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
714
- "pass": false,
715
  "status": "must_fix",
716
  "text": "Need project management software for a distributed team"
717
  },
718
  {
719
  "actual": {
720
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
721
- "model_output.classification.iab_content.tier1.label": "Business and Finance",
722
- "model_output.classification.iab_content.tier2.label": null,
723
  "model_output.classification.iab_content.tier3.label": null
724
  },
725
  "expected": {
@@ -731,17 +695,7 @@
731
  "id": "software-apps-hard",
732
  "mismatches": [
733
  {
734
- "actual": "Business and Finance",
735
- "expected": "Technology & Computing",
736
- "path": "model_output.classification.iab_content.tier1.label"
737
- },
738
- {
739
- "actual": "nearest_equivalent",
740
- "expected": "exact",
741
- "path": "model_output.classification.iab_content.mapping_mode"
742
- },
743
- {
744
- "actual": null,
745
  "expected": "Computing",
746
  "path": "model_output.classification.iab_content.tier2.label"
747
  },
@@ -801,10 +755,10 @@
801
  },
802
  {
803
  "actual": {
804
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
805
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
806
  "model_output.classification.iab_content.tier2.label": "Computing",
807
- "model_output.classification.iab_content.tier3.label": null,
808
  "model_output.classification.iab_content.tier4.label": null
809
  },
810
  "expected": {
@@ -817,12 +771,7 @@
817
  "id": "communication-software-medium",
818
  "mismatches": [
819
  {
820
- "actual": "nearest_equivalent",
821
- "expected": "exact",
822
- "path": "model_output.classification.iab_content.mapping_mode"
823
- },
824
- {
825
- "actual": null,
826
  "expected": "Software and Applications",
827
  "path": "model_output.classification.iab_content.tier3.label"
828
  },
@@ -839,9 +788,9 @@
839
  },
840
  {
841
  "actual": {
842
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
843
- "model_output.classification.iab_content.tier1.label": "Careers",
844
- "model_output.classification.iab_content.tier2.label": null,
845
  "model_output.classification.iab_content.tier3.label": null,
846
  "model_output.classification.iab_content.tier4.label": null
847
  },
@@ -855,17 +804,7 @@
855
  "id": "communication-software-hard",
856
  "mismatches": [
857
  {
858
- "actual": "Careers",
859
- "expected": "Technology & Computing",
860
- "path": "model_output.classification.iab_content.tier1.label"
861
- },
862
- {
863
- "actual": "nearest_equivalent",
864
- "expected": "exact",
865
- "path": "model_output.classification.iab_content.mapping_mode"
866
- },
867
- {
868
- "actual": null,
869
  "expected": "Computing",
870
  "path": "model_output.classification.iab_content.tier2.label"
871
  },
@@ -887,11 +826,11 @@
887
  },
888
  {
889
  "actual": {
890
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
891
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
892
  "model_output.classification.iab_content.tier2.label": "Computing",
893
- "model_output.classification.iab_content.tier3.label": null,
894
- "model_output.classification.iab_content.tier4.label": null
895
  },
896
  "expected": {
897
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -901,35 +840,19 @@
901
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
902
  },
903
  "id": "web-hosting-easy",
904
- "mismatches": [
905
- {
906
- "actual": "nearest_equivalent",
907
- "expected": "exact",
908
- "path": "model_output.classification.iab_content.mapping_mode"
909
- },
910
- {
911
- "actual": null,
912
- "expected": "Internet",
913
- "path": "model_output.classification.iab_content.tier3.label"
914
- },
915
- {
916
- "actual": null,
917
- "expected": "Web Hosting",
918
- "path": "model_output.classification.iab_content.tier4.label"
919
- }
920
- ],
921
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
922
- "pass": false,
923
  "status": "must_fix",
924
  "text": "Vercel vs Netlify for website hosting"
925
  },
926
  {
927
  "actual": {
928
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
929
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
930
  "model_output.classification.iab_content.tier2.label": "Computing",
931
- "model_output.classification.iab_content.tier3.label": null,
932
- "model_output.classification.iab_content.tier4.label": null
933
  },
934
  "expected": {
935
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -939,35 +862,19 @@
939
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
940
  },
941
  "id": "web-hosting-medium",
942
- "mismatches": [
943
- {
944
- "actual": "nearest_equivalent",
945
- "expected": "exact",
946
- "path": "model_output.classification.iab_content.mapping_mode"
947
- },
948
- {
949
- "actual": null,
950
- "expected": "Internet",
951
- "path": "model_output.classification.iab_content.tier3.label"
952
- },
953
- {
954
- "actual": null,
955
- "expected": "Web Hosting",
956
- "path": "model_output.classification.iab_content.tier4.label"
957
- }
958
- ],
959
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
960
- "pass": false,
961
  "status": "must_fix",
962
  "text": "Best hosting platform for a startup website"
963
  },
964
  {
965
  "actual": {
966
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
967
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
968
  "model_output.classification.iab_content.tier2.label": "Computing",
969
- "model_output.classification.iab_content.tier3.label": null,
970
- "model_output.classification.iab_content.tier4.label": null
971
  },
972
  "expected": {
973
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -977,34 +884,18 @@
977
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
978
  },
979
  "id": "web-hosting-hard",
980
- "mismatches": [
981
- {
982
- "actual": "nearest_equivalent",
983
- "expected": "exact",
984
- "path": "model_output.classification.iab_content.mapping_mode"
985
- },
986
- {
987
- "actual": null,
988
- "expected": "Internet",
989
- "path": "model_output.classification.iab_content.tier3.label"
990
- },
991
- {
992
- "actual": null,
993
- "expected": "Web Hosting",
994
- "path": "model_output.classification.iab_content.tier4.label"
995
- }
996
- ],
997
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
998
- "pass": false,
999
  "status": "must_fix",
1000
  "text": "Need a managed hosting provider to deploy and run our marketing site"
1001
  },
1002
  {
1003
  "actual": {
1004
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1005
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1006
- "model_output.classification.iab_content.tier2.label": null,
1007
- "model_output.classification.iab_content.tier3.label": null
1008
  },
1009
  "expected": {
1010
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1013,25 +904,9 @@
1013
  "model_output.classification.iab_content.tier3.label": "Laptops"
1014
  },
1015
  "id": "laptops-easy",
1016
- "mismatches": [
1017
- {
1018
- "actual": "nearest_equivalent",
1019
- "expected": "exact",
1020
- "path": "model_output.classification.iab_content.mapping_mode"
1021
- },
1022
- {
1023
- "actual": null,
1024
- "expected": "Computing",
1025
- "path": "model_output.classification.iab_content.tier2.label"
1026
- },
1027
- {
1028
- "actual": null,
1029
- "expected": "Laptops",
1030
- "path": "model_output.classification.iab_content.tier3.label"
1031
- }
1032
- ],
1033
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
1034
- "pass": false,
1035
  "status": "must_fix",
1036
  "text": "Which laptop should I buy for college?"
1037
  },
@@ -1057,10 +932,10 @@
1057
  },
1058
  {
1059
  "actual": {
1060
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1061
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1062
  "model_output.classification.iab_content.tier2.label": "Computing",
1063
- "model_output.classification.iab_content.tier3.label": null
1064
  },
1065
  "expected": {
1066
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1069,26 +944,15 @@
1069
  "model_output.classification.iab_content.tier3.label": "Laptops"
1070
  },
1071
  "id": "laptops-hard",
1072
- "mismatches": [
1073
- {
1074
- "actual": "nearest_equivalent",
1075
- "expected": "exact",
1076
- "path": "model_output.classification.iab_content.mapping_mode"
1077
- },
1078
- {
1079
- "actual": null,
1080
- "expected": "Laptops",
1081
- "path": "model_output.classification.iab_content.tier3.label"
1082
- }
1083
- ],
1084
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
1085
- "pass": false,
1086
  "status": "must_fix",
1087
  "text": "Need a portable computer with good battery life for everyday work"
1088
  },
1089
  {
1090
  "actual": {
1091
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1092
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1093
  "model_output.classification.iab_content.tier2.label": "Computing",
1094
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
@@ -1101,11 +965,6 @@
1101
  },
1102
  "id": "desktops-easy",
1103
  "mismatches": [
1104
- {
1105
- "actual": "nearest_equivalent",
1106
- "expected": "exact",
1107
- "path": "model_output.classification.iab_content.mapping_mode"
1108
- },
1109
  {
1110
  "actual": "Software and Applications",
1111
  "expected": "Desktops",
@@ -1119,10 +978,10 @@
1119
  },
1120
  {
1121
  "actual": {
1122
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1123
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1124
  "model_output.classification.iab_content.tier2.label": "Computing",
1125
- "model_output.classification.iab_content.tier3.label": null
1126
  },
1127
  "expected": {
1128
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1131,20 +990,9 @@
1131
  "model_output.classification.iab_content.tier3.label": "Desktops"
1132
  },
1133
  "id": "desktops-medium",
1134
- "mismatches": [
1135
- {
1136
- "actual": "nearest_equivalent",
1137
- "expected": "exact",
1138
- "path": "model_output.classification.iab_content.mapping_mode"
1139
- },
1140
- {
1141
- "actual": null,
1142
- "expected": "Desktops",
1143
- "path": "model_output.classification.iab_content.tier3.label"
1144
- }
1145
- ],
1146
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
1147
- "pass": false,
1148
  "status": "must_fix",
1149
  "text": "Which desktop computer should I buy for a home office?"
1150
  },
@@ -1173,7 +1021,7 @@
1173
  "model_output.classification.iab_content.mapping_mode": "exact",
1174
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1175
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1176
- "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1177
  },
1178
  "expected": {
1179
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1182,15 +1030,9 @@
1182
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1183
  },
1184
  "id": "smartphones-easy",
1185
- "mismatches": [
1186
- {
1187
- "actual": "Wearable Technology",
1188
- "expected": "Smartphones",
1189
- "path": "model_output.classification.iab_content.tier3.label"
1190
- }
1191
- ],
1192
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1193
- "pass": false,
1194
  "status": "must_fix",
1195
  "text": "Best phone with a good camera under 700"
1196
  },
@@ -1199,7 +1041,7 @@
1199
  "model_output.classification.iab_content.mapping_mode": "exact",
1200
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1201
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1202
- "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1203
  },
1204
  "expected": {
1205
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1208,15 +1050,9 @@
1208
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1209
  },
1210
  "id": "smartphones-medium",
1211
- "mismatches": [
1212
- {
1213
- "actual": "Wearable Technology",
1214
- "expected": "Smartphones",
1215
- "path": "model_output.classification.iab_content.tier3.label"
1216
- }
1217
- ],
1218
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1219
- "pass": false,
1220
  "status": "must_fix",
1221
  "text": "Should I buy an iPhone or Pixel this year?"
1222
  },
@@ -1225,7 +1061,7 @@
1225
  "model_output.classification.iab_content.mapping_mode": "exact",
1226
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1227
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1228
- "model_output.classification.iab_content.tier3.label": "Wearable Technology"
1229
  },
1230
  "expected": {
1231
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1234,15 +1070,9 @@
1234
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1235
  },
1236
  "id": "smartphones-hard",
1237
- "mismatches": [
1238
- {
1239
- "actual": "Wearable Technology",
1240
- "expected": "Smartphones",
1241
- "path": "model_output.classification.iab_content.tier3.label"
1242
- }
1243
- ],
1244
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1245
- "pass": false,
1246
  "status": "must_fix",
1247
  "text": "Need a new smartphone with strong battery life and a clean software experience"
1248
  },
@@ -1314,9 +1144,9 @@
1314
  },
1315
  {
1316
  "actual": {
1317
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1318
  "model_output.classification.iab_content.tier1.label": "Sports",
1319
- "model_output.classification.iab_content.tier2.label": null,
1320
  "model_output.classification.iab_content.tier3.label": null
1321
  },
1322
  "expected": {
@@ -1333,12 +1163,7 @@
1333
  "path": "model_output.classification.iab_content.tier1.label"
1334
  },
1335
  {
1336
- "actual": "nearest_equivalent",
1337
- "expected": "exact",
1338
- "path": "model_output.classification.iab_content.mapping_mode"
1339
- },
1340
- {
1341
- "actual": null,
1342
  "expected": "Women's Fashion",
1343
  "path": "model_output.classification.iab_content.tier2.label"
1344
  },
@@ -1411,9 +1236,9 @@
1411
  },
1412
  {
1413
  "actual": {
1414
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1415
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1416
- "model_output.classification.iab_content.tier2.label": null,
1417
  "model_output.classification.iab_content.tier3.label": null
1418
  },
1419
  "expected": {
@@ -1425,12 +1250,7 @@
1425
  "id": "mens-shoes-easy",
1426
  "mismatches": [
1427
  {
1428
- "actual": "nearest_equivalent",
1429
- "expected": "exact",
1430
- "path": "model_output.classification.iab_content.mapping_mode"
1431
- },
1432
- {
1433
- "actual": null,
1434
  "expected": "Men's Fashion",
1435
  "path": "model_output.classification.iab_content.tier2.label"
1436
  },
@@ -1467,9 +1287,9 @@
1467
  },
1468
  {
1469
  "actual": {
1470
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1471
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1472
- "model_output.classification.iab_content.tier2.label": null,
1473
  "model_output.classification.iab_content.tier3.label": null
1474
  },
1475
  "expected": {
@@ -1481,12 +1301,7 @@
1481
  "id": "mens-shoes-hard",
1482
  "mismatches": [
1483
  {
1484
- "actual": "nearest_equivalent",
1485
- "expected": "exact",
1486
- "path": "model_output.classification.iab_content.mapping_mode"
1487
- },
1488
- {
1489
- "actual": null,
1490
  "expected": "Men's Fashion",
1491
  "path": "model_output.classification.iab_content.tier2.label"
1492
  },
@@ -1545,7 +1360,7 @@
1545
  "actual": {
1546
  "model_output.classification.iab_content.mapping_mode": "exact",
1547
  "model_output.classification.iab_content.tier1.label": "Travel",
1548
- "model_output.classification.iab_content.tier2.label": null,
1549
  "model_output.classification.iab_content.tier3.label": null
1550
  },
1551
  "expected": {
@@ -1556,11 +1371,6 @@
1556
  },
1557
  "id": "hotels-hard",
1558
  "mismatches": [
1559
- {
1560
- "actual": null,
1561
- "expected": "Travel Type",
1562
- "path": "model_output.classification.iab_content.tier2.label"
1563
- },
1564
  {
1565
  "actual": null,
1566
  "expected": "Hotels and Motels",
@@ -1645,10 +1455,10 @@
1645
  },
1646
  {
1647
  "actual": {
1648
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1649
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1650
- "model_output.classification.iab_content.tier2.label": null,
1651
- "model_output.classification.iab_content.tier3.label": null
1652
  },
1653
  "expected": {
1654
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1657,25 +1467,9 @@
1657
  "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1658
  },
1659
  "id": "running-and-jogging-easy",
1660
- "mismatches": [
1661
- {
1662
- "actual": "nearest_equivalent",
1663
- "expected": "exact",
1664
- "path": "model_output.classification.iab_content.mapping_mode"
1665
- },
1666
- {
1667
- "actual": null,
1668
- "expected": "Fitness and Exercise",
1669
- "path": "model_output.classification.iab_content.tier2.label"
1670
- },
1671
- {
1672
- "actual": null,
1673
- "expected": "Running and Jogging",
1674
- "path": "model_output.classification.iab_content.tier3.label"
1675
- }
1676
- ],
1677
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
1678
- "pass": false,
1679
  "status": "must_fix",
1680
  "text": "Best running plan for a first 10k"
1681
  },
@@ -1718,8 +1512,8 @@
1718
  {
1719
  "actual": {
1720
  "model_output.classification.iab_content.mapping_mode": "exact",
1721
- "model_output.classification.iab_content.tier1.label": "Healthy Living",
1722
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1723
  "model_output.classification.iab_content.tier3.label": null
1724
  },
1725
  "expected": {
@@ -1730,6 +1524,16 @@
1730
  },
1731
  "id": "running-and-jogging-hard",
1732
  "mismatches": [
 
 
 
 
 
 
 
 
 
 
1733
  {
1734
  "actual": null,
1735
  "expected": "Running and Jogging",
@@ -1827,8 +1631,8 @@
1827
  {
1828
  "actual": {
1829
  "model_output.classification.iab_content.mapping_mode": "exact",
1830
- "model_output.classification.iab_content.tier1.label": "Books and Literature",
1831
- "model_output.classification.iab_content.tier2.label": "Fiction"
1832
  },
1833
  "expected": {
1834
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1837,10 +1641,20 @@
1837
  },
1838
  "id": "fiction-medium",
1839
  "mismatches": [
 
 
 
 
 
1840
  {
1841
  "actual": "exact",
1842
  "expected": "nearest_equivalent",
1843
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1844
  }
1845
  ],
1846
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
@@ -1870,7 +1684,7 @@
1870
  "actual": {
1871
  "model_output.classification.iab_content.mapping_mode": "exact",
1872
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1873
- "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1874
  },
1875
  "expected": {
1876
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1880,7 +1694,7 @@
1880
  "id": "home-improvement-easy",
1881
  "mismatches": [
1882
  {
1883
- "actual": "Remodeling & Construction",
1884
  "expected": "Home Improvement",
1885
  "path": "model_output.classification.iab_content.tier2.label"
1886
  }
@@ -1892,9 +1706,9 @@
1892
  },
1893
  {
1894
  "actual": {
1895
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1896
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1897
- "model_output.classification.iab_content.tier2.label": null
1898
  },
1899
  "expected": {
1900
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1904,12 +1718,7 @@
1904
  "id": "home-improvement-medium",
1905
  "mismatches": [
1906
  {
1907
- "actual": "nearest_equivalent",
1908
- "expected": "exact",
1909
- "path": "model_output.classification.iab_content.mapping_mode"
1910
- },
1911
- {
1912
- "actual": null,
1913
  "expected": "Home Improvement",
1914
  "path": "model_output.classification.iab_content.tier2.label"
1915
  }
@@ -1950,9 +1759,9 @@
1950
  },
1951
  {
1952
  "actual": {
1953
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1954
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1955
- "model_output.classification.iab_content.tier2.label": null
1956
  },
1957
  "expected": {
1958
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1967,12 +1776,7 @@
1967
  "path": "model_output.classification.iab_content.tier1.label"
1968
  },
1969
  {
1970
- "actual": "nearest_equivalent",
1971
- "expected": "exact",
1972
- "path": "model_output.classification.iab_content.mapping_mode"
1973
- },
1974
- {
1975
- "actual": null,
1976
  "expected": "Online Education",
1977
  "path": "model_output.classification.iab_content.tier2.label"
1978
  }
@@ -2013,9 +1817,9 @@
2013
  },
2014
  {
2015
  "actual": {
2016
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2017
- "model_output.classification.iab_content.tier1.label": "Healthy Living",
2018
- "model_output.classification.iab_content.tier2.label": null
2019
  },
2020
  "expected": {
2021
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2025,17 +1829,12 @@
2025
  "id": "online-education-hard",
2026
  "mismatches": [
2027
  {
2028
- "actual": "Healthy Living",
2029
  "expected": "Education",
2030
  "path": "model_output.classification.iab_content.tier1.label"
2031
  },
2032
  {
2033
- "actual": "nearest_equivalent",
2034
- "expected": "exact",
2035
- "path": "model_output.classification.iab_content.mapping_mode"
2036
- },
2037
- {
2038
- "actual": null,
2039
  "expected": "Online Education",
2040
  "path": "model_output.classification.iab_content.tier2.label"
2041
  }
@@ -2107,7 +1906,7 @@
2107
  },
2108
  {
2109
  "actual": {
2110
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2111
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2112
  },
2113
  "expected": {
@@ -2115,15 +1914,9 @@
2115
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2116
  },
2117
  "id": "medical-health-easy",
2118
- "mismatches": [
2119
- {
2120
- "actual": "nearest_equivalent",
2121
- "expected": "exact",
2122
- "path": "model_output.classification.iab_content.mapping_mode"
2123
- }
2124
- ],
2125
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
2126
- "pass": false,
2127
  "status": "must_fix",
2128
  "text": "what do these allergy symptoms mean"
2129
  },
@@ -2185,7 +1978,7 @@
2185
  },
2186
  {
2187
  "actual": {
2188
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2189
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
2190
  "model_output.classification.iab_content.tier2.label": "Business"
2191
  },
@@ -2201,11 +1994,6 @@
2201
  "expected": "Careers",
2202
  "path": "model_output.classification.iab_content.tier1.label"
2203
  },
2204
- {
2205
- "actual": "nearest_equivalent",
2206
- "expected": "exact",
2207
- "path": "model_output.classification.iab_content.mapping_mode"
2208
- },
2209
  {
2210
  "actual": "Business",
2211
  "expected": "Job Search",
@@ -2219,9 +2007,9 @@
2219
  },
2220
  {
2221
  "actual": {
2222
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2223
  "model_output.classification.iab_content.tier1.label": "Genres",
2224
- "model_output.classification.iab_content.tier2.label": null
2225
  },
2226
  "expected": {
2227
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2236,12 +2024,7 @@
2236
  "path": "model_output.classification.iab_content.tier1.label"
2237
  },
2238
  {
2239
- "actual": "nearest_equivalent",
2240
- "expected": "exact",
2241
- "path": "model_output.classification.iab_content.mapping_mode"
2242
- },
2243
- {
2244
- "actual": null,
2245
  "expected": "Job Search",
2246
  "path": "model_output.classification.iab_content.tier2.label"
2247
  }
@@ -2253,9 +2036,9 @@
2253
  },
2254
  {
2255
  "actual": {
2256
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2257
- "model_output.classification.iab_content.tier1.label": "Personal Celebrations & Life Events",
2258
- "model_output.classification.iab_content.tier2.label": null
2259
  },
2260
  "expected": {
2261
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2265,17 +2048,12 @@
2265
  "id": "personal-finance-easy",
2266
  "mismatches": [
2267
  {
2268
- "actual": "Personal Celebrations & Life Events",
2269
  "expected": "Personal Finance",
2270
  "path": "model_output.classification.iab_content.tier1.label"
2271
  },
2272
  {
2273
- "actual": "nearest_equivalent",
2274
- "expected": "exact",
2275
- "path": "model_output.classification.iab_content.mapping_mode"
2276
- },
2277
- {
2278
- "actual": null,
2279
  "expected": "Financial Planning",
2280
  "path": "model_output.classification.iab_content.tier2.label"
2281
  }
@@ -2354,8 +2132,8 @@
2354
  {
2355
  "actual": {
2356
  "model_output.classification.iab_content.mapping_mode": "exact",
2357
- "model_output.classification.iab_content.tier1.label": "Hobbies & Interests",
2358
- "model_output.classification.iab_content.tier2.label": "Content Production"
2359
  },
2360
  "expected": {
2361
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2365,12 +2143,12 @@
2365
  "id": "parenting-medium",
2366
  "mismatches": [
2367
  {
2368
- "actual": "Hobbies & Interests",
2369
  "expected": "Family and Relationships",
2370
  "path": "model_output.classification.iab_content.tier1.label"
2371
  },
2372
  {
2373
- "actual": "Content Production",
2374
  "expected": "Parenting",
2375
  "path": "model_output.classification.iab_content.tier2.label"
2376
  }
@@ -2418,9 +2196,9 @@
2418
  },
2419
  {
2420
  "actual": {
2421
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2422
- "model_output.classification.iab_content.tier1.label": "Food & Drink",
2423
- "model_output.classification.iab_content.tier2.label": null
2424
  },
2425
  "expected": {
2426
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2428,25 +2206,9 @@
2428
  "model_output.classification.iab_content.tier2.label": "Gardening"
2429
  },
2430
  "id": "gardening-medium",
2431
- "mismatches": [
2432
- {
2433
- "actual": "Food & Drink",
2434
- "expected": "Home & Garden",
2435
- "path": "model_output.classification.iab_content.tier1.label"
2436
- },
2437
- {
2438
- "actual": "nearest_equivalent",
2439
- "expected": "exact",
2440
- "path": "model_output.classification.iab_content.mapping_mode"
2441
- },
2442
- {
2443
- "actual": null,
2444
- "expected": "Gardening",
2445
- "path": "model_output.classification.iab_content.tier2.label"
2446
- }
2447
- ],
2448
  "notes": "Cross-vertical medium IAB mapping case for Home & Garden > Gardening.",
2449
- "pass": false,
2450
  "status": "must_fix",
2451
  "text": "how often should i water tomato plants"
2452
  },
@@ -2489,8 +2251,8 @@
2489
  {
2490
  "actual": {
2491
  "model_output.classification.iab_content.mapping_mode": "exact",
2492
- "model_output.classification.iab_content.tier1.label": "Genres",
2493
- "model_output.classification.iab_content.tier2.label": "Horror"
2494
  },
2495
  "expected": {
2496
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2498,20 +2260,9 @@
2498
  "model_output.classification.iab_content.tier2.label": "Movies"
2499
  },
2500
  "id": "movies-medium",
2501
- "mismatches": [
2502
- {
2503
- "actual": "Genres",
2504
- "expected": "Entertainment",
2505
- "path": "model_output.classification.iab_content.tier1.label"
2506
- },
2507
- {
2508
- "actual": "Horror",
2509
- "expected": "Movies",
2510
- "path": "model_output.classification.iab_content.tier2.label"
2511
- }
2512
- ],
2513
  "notes": "Cross-vertical medium IAB mapping case for Entertainment > Movies.",
2514
- "pass": false,
2515
  "status": "must_fix",
2516
  "text": "Best thriller movies from the last few years"
2517
  },
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 49,
5
+ "passed": 41,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
+ "failed": 49,
12
+ "passed": 41,
13
  "results": [
14
  {
15
  "actual": {
16
  "model_output.classification.iab_content.mapping_mode": "exact",
17
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
18
+ "model_output.classification.iab_content.tier2.label": "Insurance"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
24
  },
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
+ {
28
+ "actual": "Personal Finance",
29
+ "expected": "Automotive",
30
+ "path": "model_output.classification.iab_content.tier1.label"
31
+ },
32
  {
33
  "actual": "exact",
34
  "expected": "nearest_equivalent",
35
  "path": "model_output.classification.iab_content.mapping_mode"
36
  },
37
  {
38
+ "actual": "Insurance",
39
  "expected": "Auto Buying and Selling",
40
  "path": "model_output.classification.iab_content.tier2.label"
41
  }
 
47
  },
48
  {
49
  "actual": {
50
+ "model_output.classification.iab_content.mapping_mode": "exact",
51
  "model_output.classification.iab_content.tier1.label": "Automotive",
52
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
53
  },
 
58
  },
59
  "id": "auto-buying-medium",
60
  "mismatches": [
61
+ {
62
+ "actual": "exact",
63
+ "expected": "nearest_equivalent",
64
+ "path": "model_output.classification.iab_content.mapping_mode"
65
+ },
66
  {
67
  "actual": "Auto Body Styles",
68
  "expected": "Auto Buying and Selling",
 
76
  },
77
  {
78
  "actual": {
79
+ "model_output.classification.iab_content.mapping_mode": "exact",
80
  "model_output.classification.iab_content.tier1.label": "Automotive",
81
+ "model_output.classification.iab_content.tier2.label": "Car Culture"
82
  },
83
  "expected": {
84
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
88
  "id": "auto-buying-hard",
89
  "mismatches": [
90
  {
91
+ "actual": "exact",
92
+ "expected": "nearest_equivalent",
93
+ "path": "model_output.classification.iab_content.mapping_mode"
94
+ },
95
+ {
96
+ "actual": "Car Culture",
97
  "expected": "Auto Buying and Selling",
98
  "path": "model_output.classification.iab_content.tier2.label"
99
  }
 
105
  },
106
  {
107
  "actual": {
108
+ "model_output.classification.iab_content.mapping_mode": "exact",
109
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
110
  "model_output.classification.iab_content.tier2.label": "Computing",
111
+ "model_output.classification.iab_content.tier3.label": "Software and Applications"
112
  },
113
  "expected": {
114
  "model_output.classification.iab_content.mapping_mode": "exact",
 
123
  "expected": "Business and Finance",
124
  "path": "model_output.classification.iab_content.tier1.label"
125
  },
 
 
 
 
 
126
  {
127
  "actual": "Computing",
128
  "expected": "Business",
129
  "path": "model_output.classification.iab_content.tier2.label"
130
  },
131
  {
132
+ "actual": "Software and Applications",
133
  "expected": "Sales",
134
  "path": "model_output.classification.iab_content.tier3.label"
135
  }
 
141
  },
142
  {
143
  "actual": {
144
+ "model_output.classification.iab_content.mapping_mode": "exact",
145
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
146
+ "model_output.classification.iab_content.tier2.label": "Robotics",
147
  "model_output.classification.iab_content.tier3.label": null
148
  },
149
  "expected": {
 
160
  "path": "model_output.classification.iab_content.tier1.label"
161
  },
162
  {
163
+ "actual": "Robotics",
 
 
 
 
 
164
  "expected": "Business",
165
  "path": "model_output.classification.iab_content.tier2.label"
166
  },
 
177
  },
178
  {
179
  "actual": {
180
+ "model_output.classification.iab_content.mapping_mode": "exact",
181
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
182
  "model_output.classification.iab_content.tier2.label": "Business",
183
+ "model_output.classification.iab_content.tier3.label": "Sales"
184
  },
185
  "expected": {
186
  "model_output.classification.iab_content.mapping_mode": "exact",
 
189
  "model_output.classification.iab_content.tier3.label": "Sales"
190
  },
191
  "id": "sales-crm-hard",
192
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
193
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
194
+ "pass": true,
195
  "status": "must_fix",
196
  "text": "Need software to manage leads and pipeline for a startup sales team"
197
  },
198
  {
199
  "actual": {
200
+ "model_output.classification.iab_content.mapping_mode": "exact",
201
+ "model_output.classification.iab_content.tier1.label": "Careers",
202
+ "model_output.classification.iab_content.tier2.label": "Job Search",
203
  "model_output.classification.iab_content.tier3.label": null
204
  },
205
  "expected": {
 
211
  "id": "marketing-tools-easy",
212
  "mismatches": [
213
  {
214
+ "actual": "Careers",
215
  "expected": "Business and Finance",
216
  "path": "model_output.classification.iab_content.tier1.label"
217
  },
218
  {
219
+ "actual": "Job Search",
 
 
 
 
 
220
  "expected": "Business",
221
  "path": "model_output.classification.iab_content.tier2.label"
222
  },
 
233
  },
234
  {
235
  "actual": {
236
+ "model_output.classification.iab_content.mapping_mode": "exact",
237
+ "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
238
+ "model_output.classification.iab_content.tier2.label": "Terrorism",
239
  "model_output.classification.iab_content.tier3.label": null
240
  },
241
  "expected": {
 
247
  "id": "marketing-tools-medium",
248
  "mismatches": [
249
  {
250
+ "actual": "Sensitive Topics",
251
  "expected": "Business and Finance",
252
  "path": "model_output.classification.iab_content.tier1.label"
253
  },
254
  {
255
+ "actual": "Terrorism",
 
 
 
 
 
256
  "expected": "Business",
257
  "path": "model_output.classification.iab_content.tier2.label"
258
  },
 
269
  },
270
  {
271
  "actual": {
272
+ "model_output.classification.iab_content.mapping_mode": "exact",
273
  "model_output.classification.iab_content.tier1.label": "Personal Finance",
274
+ "model_output.classification.iab_content.tier2.label": "Home Utilities",
275
  "model_output.classification.iab_content.tier3.label": null
276
  },
277
  "expected": {
 
288
  "path": "model_output.classification.iab_content.tier1.label"
289
  },
290
  {
291
+ "actual": "Home Utilities",
 
 
 
 
 
292
  "expected": "Business",
293
  "path": "model_output.classification.iab_content.tier2.label"
294
  },
 
305
  },
306
  {
307
  "actual": {
308
+ "model_output.classification.iab_content.mapping_mode": "exact",
309
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
310
  "model_output.classification.iab_content.tier2.label": "Computing",
311
+ "model_output.classification.iab_content.tier3.label": "Information and Network Security"
312
  },
313
  "expected": {
314
  "model_output.classification.iab_content.mapping_mode": "exact",
 
323
  "expected": "Business and Finance",
324
  "path": "model_output.classification.iab_content.tier1.label"
325
  },
 
 
 
 
 
326
  {
327
  "actual": "Computing",
328
  "expected": "Business",
329
  "path": "model_output.classification.iab_content.tier2.label"
330
  },
331
  {
332
+ "actual": "Information and Network Security",
333
  "expected": "Business I.T.",
334
  "path": "model_output.classification.iab_content.tier3.label"
335
  }
 
341
  },
342
  {
343
  "actual": {
344
+ "model_output.classification.iab_content.mapping_mode": "exact",
345
+ "model_output.classification.iab_content.tier1.label": "Careers",
346
+ "model_output.classification.iab_content.tier2.label": "Job Search",
347
  "model_output.classification.iab_content.tier3.label": null
348
  },
349
  "expected": {
 
355
  "id": "business-it-medium",
356
  "mismatches": [
357
  {
358
+ "actual": "Careers",
359
  "expected": "Business and Finance",
360
  "path": "model_output.classification.iab_content.tier1.label"
361
  },
362
  {
363
+ "actual": "Job Search",
 
 
 
 
 
364
  "expected": "Business",
365
  "path": "model_output.classification.iab_content.tier2.label"
366
  },
 
461
  {
462
  "actual": {
463
  "model_output.classification.iab_content.mapping_mode": "exact",
464
+ "model_output.classification.iab_content.tier1.label": "Attractions",
465
+ "model_output.classification.iab_content.tier2.label": "Bars & Restaurants"
466
  },
467
  "expected": {
468
  "model_output.classification.iab_content.mapping_mode": "exact",
 
470
  "model_output.classification.iab_content.tier2.label": "Dining Out"
471
  },
472
  "id": "dining-out-hard",
473
+ "mismatches": [
474
+ {
475
+ "actual": "Attractions",
476
+ "expected": "Food & Drink",
477
+ "path": "model_output.classification.iab_content.tier1.label"
478
+ },
479
+ {
480
+ "actual": "Bars & Restaurants",
481
+ "expected": "Dining Out",
482
+ "path": "model_output.classification.iab_content.tier2.label"
483
+ }
484
+ ],
485
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
486
+ "pass": false,
487
  "status": "must_fix",
488
  "text": "Need a place to eat tonight where I can make a reservation online"
489
  },
 
527
  "actual": {
528
  "model_output.classification.iab_content.mapping_mode": "exact",
529
  "model_output.classification.iab_content.tier1.label": "Food & Drink",
530
+ "model_output.classification.iab_content.tier2.label": "Non-Alcoholic Beverages"
531
  },
532
  "expected": {
533
  "model_output.classification.iab_content.mapping_mode": "exact",
 
535
  "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages"
536
  },
537
  "id": "alcoholic-beverages-hard",
538
+ "mismatches": [
539
+ {
540
+ "actual": "Non-Alcoholic Beverages",
541
+ "expected": "Alcoholic Beverages",
542
+ "path": "model_output.classification.iab_content.tier2.label"
543
+ }
544
+ ],
545
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Alcoholic Beverages.",
546
+ "pass": false,
547
  "status": "must_fix",
548
  "text": "Want a spirit-forward drink recommendation, not a restaurant suggestion"
549
  },
550
  {
551
  "actual": {
552
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
553
+ "model_output.classification.iab_content.tier1.label": "Science",
554
  "model_output.classification.iab_content.tier2.label": null
555
  },
556
  "expected": {
 
561
  "id": "artificial-intelligence-easy",
562
  "mismatches": [
563
  {
564
+ "actual": "Science",
565
  "expected": "Technology & Computing",
566
  "path": "model_output.classification.iab_content.tier1.label"
567
  },
 
641
  },
642
  {
643
  "actual": {
644
+ "model_output.classification.iab_content.mapping_mode": "exact",
645
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
646
  "model_output.classification.iab_content.tier2.label": "Computing",
647
+ "model_output.classification.iab_content.tier3.label": "Software and Applications"
648
  },
649
  "expected": {
650
  "model_output.classification.iab_content.mapping_mode": "exact",
 
653
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
654
  },
655
  "id": "software-apps-easy",
656
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
657
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
658
+ "pass": true,
659
  "status": "must_fix",
660
  "text": "Best workflow software for a small operations team"
661
  },
662
  {
663
  "actual": {
664
+ "model_output.classification.iab_content.mapping_mode": "exact",
665
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
666
  "model_output.classification.iab_content.tier2.label": "Computing",
667
+ "model_output.classification.iab_content.tier3.label": "Software and Applications"
668
  },
669
  "expected": {
670
  "model_output.classification.iab_content.mapping_mode": "exact",
 
673
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
674
  },
675
  "id": "software-apps-medium",
676
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
677
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
678
+ "pass": true,
679
  "status": "must_fix",
680
  "text": "Need project management software for a distributed team"
681
  },
682
  {
683
  "actual": {
684
+ "model_output.classification.iab_content.mapping_mode": "exact",
685
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
686
+ "model_output.classification.iab_content.tier2.label": "Virtual Reality",
687
  "model_output.classification.iab_content.tier3.label": null
688
  },
689
  "expected": {
 
695
  "id": "software-apps-hard",
696
  "mismatches": [
697
  {
698
+ "actual": "Virtual Reality",
 
 
 
 
 
 
 
 
 
 
699
  "expected": "Computing",
700
  "path": "model_output.classification.iab_content.tier2.label"
701
  },
 
755
  },
756
  {
757
  "actual": {
758
+ "model_output.classification.iab_content.mapping_mode": "exact",
759
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
760
  "model_output.classification.iab_content.tier2.label": "Computing",
761
+ "model_output.classification.iab_content.tier3.label": "Information and Network Security",
762
  "model_output.classification.iab_content.tier4.label": null
763
  },
764
  "expected": {
 
771
  "id": "communication-software-medium",
772
  "mismatches": [
773
  {
774
+ "actual": "Information and Network Security",
 
 
 
 
 
775
  "expected": "Software and Applications",
776
  "path": "model_output.classification.iab_content.tier3.label"
777
  },
 
788
  },
789
  {
790
  "actual": {
791
+ "model_output.classification.iab_content.mapping_mode": "exact",
792
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
793
+ "model_output.classification.iab_content.tier2.label": "Virtual Reality",
794
  "model_output.classification.iab_content.tier3.label": null,
795
  "model_output.classification.iab_content.tier4.label": null
796
  },
 
804
  "id": "communication-software-hard",
805
  "mismatches": [
806
  {
807
+ "actual": "Virtual Reality",
 
 
 
 
 
 
 
 
 
 
808
  "expected": "Computing",
809
  "path": "model_output.classification.iab_content.tier2.label"
810
  },
 
826
  },
827
  {
828
  "actual": {
829
+ "model_output.classification.iab_content.mapping_mode": "exact",
830
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
831
  "model_output.classification.iab_content.tier2.label": "Computing",
832
+ "model_output.classification.iab_content.tier3.label": "Internet",
833
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
834
  },
835
  "expected": {
836
  "model_output.classification.iab_content.mapping_mode": "exact",
 
840
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
841
  },
842
  "id": "web-hosting-easy",
843
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
844
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
845
+ "pass": true,
846
  "status": "must_fix",
847
  "text": "Vercel vs Netlify for website hosting"
848
  },
849
  {
850
  "actual": {
851
+ "model_output.classification.iab_content.mapping_mode": "exact",
852
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
853
  "model_output.classification.iab_content.tier2.label": "Computing",
854
+ "model_output.classification.iab_content.tier3.label": "Internet",
855
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
856
  },
857
  "expected": {
858
  "model_output.classification.iab_content.mapping_mode": "exact",
 
862
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
863
  },
864
  "id": "web-hosting-medium",
865
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
867
+ "pass": true,
868
  "status": "must_fix",
869
  "text": "Best hosting platform for a startup website"
870
  },
871
  {
872
  "actual": {
873
+ "model_output.classification.iab_content.mapping_mode": "exact",
874
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
875
  "model_output.classification.iab_content.tier2.label": "Computing",
876
+ "model_output.classification.iab_content.tier3.label": "Internet",
877
+ "model_output.classification.iab_content.tier4.label": "Web Hosting"
878
  },
879
  "expected": {
880
  "model_output.classification.iab_content.mapping_mode": "exact",
 
884
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
885
  },
886
  "id": "web-hosting-hard",
887
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
889
+ "pass": true,
890
  "status": "must_fix",
891
  "text": "Need a managed hosting provider to deploy and run our marketing site"
892
  },
893
  {
894
  "actual": {
895
+ "model_output.classification.iab_content.mapping_mode": "exact",
896
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
897
+ "model_output.classification.iab_content.tier2.label": "Computing",
898
+ "model_output.classification.iab_content.tier3.label": "Laptops"
899
  },
900
  "expected": {
901
  "model_output.classification.iab_content.mapping_mode": "exact",
 
904
  "model_output.classification.iab_content.tier3.label": "Laptops"
905
  },
906
  "id": "laptops-easy",
907
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
908
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Laptops.",
909
+ "pass": true,
910
  "status": "must_fix",
911
  "text": "Which laptop should I buy for college?"
912
  },
 
932
  },
933
  {
934
  "actual": {
935
+ "model_output.classification.iab_content.mapping_mode": "exact",
936
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
937
  "model_output.classification.iab_content.tier2.label": "Computing",
938
+ "model_output.classification.iab_content.tier3.label": "Laptops"
939
  },
940
  "expected": {
941
  "model_output.classification.iab_content.mapping_mode": "exact",
 
944
  "model_output.classification.iab_content.tier3.label": "Laptops"
945
  },
946
  "id": "laptops-hard",
947
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
948
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
949
+ "pass": true,
950
  "status": "must_fix",
951
  "text": "Need a portable computer with good battery life for everyday work"
952
  },
953
  {
954
  "actual": {
955
+ "model_output.classification.iab_content.mapping_mode": "exact",
956
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
957
  "model_output.classification.iab_content.tier2.label": "Computing",
958
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
 
965
  },
966
  "id": "desktops-easy",
967
  "mismatches": [
 
 
 
 
 
968
  {
969
  "actual": "Software and Applications",
970
  "expected": "Desktops",
 
978
  },
979
  {
980
  "actual": {
981
+ "model_output.classification.iab_content.mapping_mode": "exact",
982
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
983
  "model_output.classification.iab_content.tier2.label": "Computing",
984
+ "model_output.classification.iab_content.tier3.label": "Desktops"
985
  },
986
  "expected": {
987
  "model_output.classification.iab_content.mapping_mode": "exact",
 
990
  "model_output.classification.iab_content.tier3.label": "Desktops"
991
  },
992
  "id": "desktops-medium",
993
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
994
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
995
+ "pass": true,
996
  "status": "must_fix",
997
  "text": "Which desktop computer should I buy for a home office?"
998
  },
 
1021
  "model_output.classification.iab_content.mapping_mode": "exact",
1022
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1023
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1024
+ "model_output.classification.iab_content.tier3.label": "Smartphones"
1025
  },
1026
  "expected": {
1027
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1030
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1031
  },
1032
  "id": "smartphones-easy",
1033
+ "mismatches": [],
 
 
 
 
 
 
1034
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1035
+ "pass": true,
1036
  "status": "must_fix",
1037
  "text": "Best phone with a good camera under 700"
1038
  },
 
1041
  "model_output.classification.iab_content.mapping_mode": "exact",
1042
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1043
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1044
+ "model_output.classification.iab_content.tier3.label": "Smartphones"
1045
  },
1046
  "expected": {
1047
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1050
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1051
  },
1052
  "id": "smartphones-medium",
1053
+ "mismatches": [],
 
 
 
 
 
 
1054
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1055
+ "pass": true,
1056
  "status": "must_fix",
1057
  "text": "Should I buy an iPhone or Pixel this year?"
1058
  },
 
1061
  "model_output.classification.iab_content.mapping_mode": "exact",
1062
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1063
  "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1064
+ "model_output.classification.iab_content.tier3.label": "Smartphones"
1065
  },
1066
  "expected": {
1067
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1070
  "model_output.classification.iab_content.tier3.label": "Smartphones"
1071
  },
1072
  "id": "smartphones-hard",
1073
+ "mismatches": [],
 
 
 
 
 
 
1074
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Consumer Electronics > Smartphones.",
1075
+ "pass": true,
1076
  "status": "must_fix",
1077
  "text": "Need a new smartphone with strong battery life and a clean software experience"
1078
  },
 
1144
  },
1145
  {
1146
  "actual": {
1147
+ "model_output.classification.iab_content.mapping_mode": "exact",
1148
  "model_output.classification.iab_content.tier1.label": "Sports",
1149
+ "model_output.classification.iab_content.tier2.label": "Bodybuilding",
1150
  "model_output.classification.iab_content.tier3.label": null
1151
  },
1152
  "expected": {
 
1163
  "path": "model_output.classification.iab_content.tier1.label"
1164
  },
1165
  {
1166
+ "actual": "Bodybuilding",
 
 
 
 
 
1167
  "expected": "Women's Fashion",
1168
  "path": "model_output.classification.iab_content.tier2.label"
1169
  },
 
1236
  },
1237
  {
1238
  "actual": {
1239
+ "model_output.classification.iab_content.mapping_mode": "exact",
1240
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1241
+ "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1242
  "model_output.classification.iab_content.tier3.label": null
1243
  },
1244
  "expected": {
 
1250
  "id": "mens-shoes-easy",
1251
  "mismatches": [
1252
  {
1253
+ "actual": "Children's Clothing",
 
 
 
 
 
1254
  "expected": "Men's Fashion",
1255
  "path": "model_output.classification.iab_content.tier2.label"
1256
  },
 
1287
  },
1288
  {
1289
  "actual": {
1290
+ "model_output.classification.iab_content.mapping_mode": "exact",
1291
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1292
+ "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1293
  "model_output.classification.iab_content.tier3.label": null
1294
  },
1295
  "expected": {
 
1301
  "id": "mens-shoes-hard",
1302
  "mismatches": [
1303
  {
1304
+ "actual": "Children's Clothing",
 
 
 
 
 
1305
  "expected": "Men's Fashion",
1306
  "path": "model_output.classification.iab_content.tier2.label"
1307
  },
 
1360
  "actual": {
1361
  "model_output.classification.iab_content.mapping_mode": "exact",
1362
  "model_output.classification.iab_content.tier1.label": "Travel",
1363
+ "model_output.classification.iab_content.tier2.label": "Travel Type",
1364
  "model_output.classification.iab_content.tier3.label": null
1365
  },
1366
  "expected": {
 
1371
  },
1372
  "id": "hotels-hard",
1373
  "mismatches": [
 
 
 
 
 
1374
  {
1375
  "actual": null,
1376
  "expected": "Hotels and Motels",
 
1455
  },
1456
  {
1457
  "actual": {
1458
+ "model_output.classification.iab_content.mapping_mode": "exact",
1459
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1460
+ "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1461
+ "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1462
  },
1463
  "expected": {
1464
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1467
  "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1468
  },
1469
  "id": "running-and-jogging-easy",
1470
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1471
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
1472
+ "pass": true,
1473
  "status": "must_fix",
1474
  "text": "Best running plan for a first 10k"
1475
  },
 
1512
  {
1513
  "actual": {
1514
  "model_output.classification.iab_content.mapping_mode": "exact",
1515
+ "model_output.classification.iab_content.tier1.label": "Sports",
1516
+ "model_output.classification.iab_content.tier2.label": "Walking",
1517
  "model_output.classification.iab_content.tier3.label": null
1518
  },
1519
  "expected": {
 
1524
  },
1525
  "id": "running-and-jogging-hard",
1526
  "mismatches": [
1527
+ {
1528
+ "actual": "Sports",
1529
+ "expected": "Healthy Living",
1530
+ "path": "model_output.classification.iab_content.tier1.label"
1531
+ },
1532
+ {
1533
+ "actual": "Walking",
1534
+ "expected": "Fitness and Exercise",
1535
+ "path": "model_output.classification.iab_content.tier2.label"
1536
+ },
1537
  {
1538
  "actual": null,
1539
  "expected": "Running and Jogging",
 
1631
  {
1632
  "actual": {
1633
  "model_output.classification.iab_content.mapping_mode": "exact",
1634
+ "model_output.classification.iab_content.tier1.label": "Travel",
1635
+ "model_output.classification.iab_content.tier2.label": "Travel Type"
1636
  },
1637
  "expected": {
1638
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1641
  },
1642
  "id": "fiction-medium",
1643
  "mismatches": [
1644
+ {
1645
+ "actual": "Travel",
1646
+ "expected": "Books and Literature",
1647
+ "path": "model_output.classification.iab_content.tier1.label"
1648
+ },
1649
  {
1650
  "actual": "exact",
1651
  "expected": "nearest_equivalent",
1652
  "path": "model_output.classification.iab_content.mapping_mode"
1653
+ },
1654
+ {
1655
+ "actual": "Travel Type",
1656
+ "expected": "Fiction",
1657
+ "path": "model_output.classification.iab_content.tier2.label"
1658
  }
1659
  ],
1660
  "notes": "Cross-vertical medium IAB mapping case for Books and Literature > Fiction.",
 
1684
  "actual": {
1685
  "model_output.classification.iab_content.mapping_mode": "exact",
1686
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1687
+ "model_output.classification.iab_content.tier2.label": "Interior Decorating"
1688
  },
1689
  "expected": {
1690
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1694
  "id": "home-improvement-easy",
1695
  "mismatches": [
1696
  {
1697
+ "actual": "Interior Decorating",
1698
  "expected": "Home Improvement",
1699
  "path": "model_output.classification.iab_content.tier2.label"
1700
  }
 
1706
  },
1707
  {
1708
  "actual": {
1709
+ "model_output.classification.iab_content.mapping_mode": "exact",
1710
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1711
+ "model_output.classification.iab_content.tier2.label": "Interior Decorating"
1712
  },
1713
  "expected": {
1714
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1718
  "id": "home-improvement-medium",
1719
  "mismatches": [
1720
  {
1721
+ "actual": "Interior Decorating",
 
 
 
 
 
1722
  "expected": "Home Improvement",
1723
  "path": "model_output.classification.iab_content.tier2.label"
1724
  }
 
1759
  },
1760
  {
1761
  "actual": {
1762
+ "model_output.classification.iab_content.mapping_mode": "exact",
1763
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1764
+ "model_output.classification.iab_content.tier2.label": "Augmented Reality"
1765
  },
1766
  "expected": {
1767
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1776
  "path": "model_output.classification.iab_content.tier1.label"
1777
  },
1778
  {
1779
+ "actual": "Augmented Reality",
 
 
 
 
 
1780
  "expected": "Online Education",
1781
  "path": "model_output.classification.iab_content.tier2.label"
1782
  }
 
1817
  },
1818
  {
1819
  "actual": {
1820
+ "model_output.classification.iab_content.mapping_mode": "exact",
1821
+ "model_output.classification.iab_content.tier1.label": "Careers",
1822
+ "model_output.classification.iab_content.tier2.label": "Vocational Training"
1823
  },
1824
  "expected": {
1825
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1829
  "id": "online-education-hard",
1830
  "mismatches": [
1831
  {
1832
+ "actual": "Careers",
1833
  "expected": "Education",
1834
  "path": "model_output.classification.iab_content.tier1.label"
1835
  },
1836
  {
1837
+ "actual": "Vocational Training",
 
 
 
 
 
1838
  "expected": "Online Education",
1839
  "path": "model_output.classification.iab_content.tier2.label"
1840
  }
 
1906
  },
1907
  {
1908
  "actual": {
1909
+ "model_output.classification.iab_content.mapping_mode": "exact",
1910
  "model_output.classification.iab_content.tier1.label": "Medical Health"
1911
  },
1912
  "expected": {
 
1914
  "model_output.classification.iab_content.tier1.label": "Medical Health"
1915
  },
1916
  "id": "medical-health-easy",
1917
+ "mismatches": [],
 
 
 
 
 
 
1918
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
1919
+ "pass": true,
1920
  "status": "must_fix",
1921
  "text": "what do these allergy symptoms mean"
1922
  },
 
1978
  },
1979
  {
1980
  "actual": {
1981
+ "model_output.classification.iab_content.mapping_mode": "exact",
1982
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
1983
  "model_output.classification.iab_content.tier2.label": "Business"
1984
  },
 
1994
  "expected": "Careers",
1995
  "path": "model_output.classification.iab_content.tier1.label"
1996
  },
 
 
 
 
 
1997
  {
1998
  "actual": "Business",
1999
  "expected": "Job Search",
 
2007
  },
2008
  {
2009
  "actual": {
2010
+ "model_output.classification.iab_content.mapping_mode": "exact",
2011
  "model_output.classification.iab_content.tier1.label": "Genres",
2012
+ "model_output.classification.iab_content.tier2.label": "Talk Show"
2013
  },
2014
  "expected": {
2015
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2024
  "path": "model_output.classification.iab_content.tier1.label"
2025
  },
2026
  {
2027
+ "actual": "Talk Show",
 
 
 
 
 
2028
  "expected": "Job Search",
2029
  "path": "model_output.classification.iab_content.tier2.label"
2030
  }
 
2036
  },
2037
  {
2038
  "actual": {
2039
+ "model_output.classification.iab_content.mapping_mode": "exact",
2040
+ "model_output.classification.iab_content.tier1.label": "Holidays",
2041
+ "model_output.classification.iab_content.tier2.label": "National & Civic Holidays"
2042
  },
2043
  "expected": {
2044
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2048
  "id": "personal-finance-easy",
2049
  "mismatches": [
2050
  {
2051
+ "actual": "Holidays",
2052
  "expected": "Personal Finance",
2053
  "path": "model_output.classification.iab_content.tier1.label"
2054
  },
2055
  {
2056
+ "actual": "National & Civic Holidays",
 
 
 
 
 
2057
  "expected": "Financial Planning",
2058
  "path": "model_output.classification.iab_content.tier2.label"
2059
  }
 
2132
  {
2133
  "actual": {
2134
  "model_output.classification.iab_content.mapping_mode": "exact",
2135
+ "model_output.classification.iab_content.tier1.label": "Genres",
2136
+ "model_output.classification.iab_content.tier2.label": "Family/Children"
2137
  },
2138
  "expected": {
2139
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2143
  "id": "parenting-medium",
2144
  "mismatches": [
2145
  {
2146
+ "actual": "Genres",
2147
  "expected": "Family and Relationships",
2148
  "path": "model_output.classification.iab_content.tier1.label"
2149
  },
2150
  {
2151
+ "actual": "Family/Children",
2152
  "expected": "Parenting",
2153
  "path": "model_output.classification.iab_content.tier2.label"
2154
  }
 
2196
  },
2197
  {
2198
  "actual": {
2199
+ "model_output.classification.iab_content.mapping_mode": "exact",
2200
+ "model_output.classification.iab_content.tier1.label": "Home & Garden",
2201
+ "model_output.classification.iab_content.tier2.label": "Gardening"
2202
  },
2203
  "expected": {
2204
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2206
  "model_output.classification.iab_content.tier2.label": "Gardening"
2207
  },
2208
  "id": "gardening-medium",
2209
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2210
  "notes": "Cross-vertical medium IAB mapping case for Home & Garden > Gardening.",
2211
+ "pass": true,
2212
  "status": "must_fix",
2213
  "text": "how often should i water tomato plants"
2214
  },
 
2251
  {
2252
  "actual": {
2253
  "model_output.classification.iab_content.mapping_mode": "exact",
2254
+ "model_output.classification.iab_content.tier1.label": "Entertainment",
2255
+ "model_output.classification.iab_content.tier2.label": "Movies"
2256
  },
2257
  "expected": {
2258
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2260
  "model_output.classification.iab_content.tier2.label": "Movies"
2261
  },
2262
  "id": "movies-medium",
2263
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
2264
  "notes": "Cross-vertical medium IAB mapping case for Entertainment > Movies.",
2265
+ "pass": true,
2266
  "status": "must_fix",
2267
  "text": "Best thriller movies from the last few years"
2268
  },
artifacts/evaluation/latest/iab_quality_target_eval.json CHANGED
@@ -13,7 +13,7 @@
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
@@ -28,11 +28,6 @@
28
  "actual": null,
29
  "expected": "Auto Buying and Selling",
30
  "path": "model_output.classification.iab_content.tier2.label"
31
- },
32
- {
33
- "actual": "nearest_equivalent",
34
- "expected": "exact",
35
- "path": "model_output.classification.iab_content.mapping_mode"
36
  }
37
  ],
38
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
@@ -62,7 +57,7 @@
62
  },
63
  {
64
  "actual": {
65
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
66
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
67
  "model_output.classification.iab_content.tier2.label": null,
68
  "model_output.classification.iab_content.tier3.label": null
@@ -84,11 +79,6 @@
84
  "actual": null,
85
  "expected": "Laptops",
86
  "path": "model_output.classification.iab_content.tier3.label"
87
- },
88
- {
89
- "actual": "nearest_equivalent",
90
- "expected": "exact",
91
- "path": "model_output.classification.iab_content.mapping_mode"
92
  }
93
  ],
94
  "notes": "Common typo handling should still land in the laptops branch.",
@@ -98,10 +88,10 @@
98
  },
99
  {
100
  "actual": {
101
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
102
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
103
  "model_output.classification.iab_content.tier2.label": "Computing",
104
- "model_output.classification.iab_content.tier3.label": null
105
  },
106
  "expected": {
107
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -122,9 +112,14 @@
122
  "path": "model_output.classification.iab_content.tier2.label"
123
  },
124
  {
125
- "actual": null,
126
  "expected": "Sales",
127
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
128
  }
129
  ],
130
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
@@ -134,9 +129,9 @@
134
  },
135
  {
136
  "actual": {
137
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
138
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
139
- "model_output.classification.iab_content.tier2.label": null,
140
  "model_output.classification.iab_content.tier3.label": null
141
  },
142
  "expected": {
@@ -153,7 +148,7 @@
153
  "path": "model_output.classification.iab_content.tier1.label"
154
  },
155
  {
156
- "actual": null,
157
  "expected": "Business",
158
  "path": "model_output.classification.iab_content.tier2.label"
159
  },
@@ -161,11 +156,6 @@
161
  "actual": null,
162
  "expected": "Sales",
163
  "path": "model_output.classification.iab_content.tier3.label"
164
- },
165
- {
166
- "actual": "nearest_equivalent",
167
- "expected": "exact",
168
- "path": "model_output.classification.iab_content.mapping_mode"
169
  }
170
  ],
171
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
@@ -175,9 +165,9 @@
175
  },
176
  {
177
  "actual": {
178
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
179
  "model_output.classification.iab_content.tier1.label": "Careers",
180
- "model_output.classification.iab_content.tier2.label": null,
181
  "model_output.classification.iab_content.tier3.label": null
182
  },
183
  "expected": {
@@ -194,7 +184,7 @@
194
  "path": "model_output.classification.iab_content.tier1.label"
195
  },
196
  {
197
- "actual": null,
198
  "expected": "Business",
199
  "path": "model_output.classification.iab_content.tier2.label"
200
  },
@@ -202,11 +192,6 @@
202
  "actual": null,
203
  "expected": "Marketing and Advertising",
204
  "path": "model_output.classification.iab_content.tier3.label"
205
- },
206
- {
207
- "actual": "nearest_equivalent",
208
- "expected": "exact",
209
- "path": "model_output.classification.iab_content.mapping_mode"
210
  }
211
  ],
212
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
@@ -217,7 +202,7 @@
217
  {
218
  "actual": {
219
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
220
- "model_output.classification.iab_content.tier1.label": "Sports",
221
  "model_output.classification.iab_content.tier2.label": null
222
  },
223
  "expected": {
@@ -228,7 +213,7 @@
228
  "id": "ml-explanation-maps-to-ai",
229
  "mismatches": [
230
  {
231
- "actual": "Sports",
232
  "expected": "Technology & Computing",
233
  "path": "model_output.classification.iab_content.tier1.label"
234
  },
@@ -250,10 +235,10 @@
250
  },
251
  {
252
  "actual": {
253
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
254
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
255
  "model_output.classification.iab_content.tier2.label": "Computing",
256
- "model_output.classification.iab_content.tier3.label": null
257
  },
258
  "expected": {
259
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -274,9 +259,14 @@
274
  "path": "model_output.classification.iab_content.tier2.label"
275
  },
276
  {
277
- "actual": null,
278
  "expected": "Business I.T.",
279
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
280
  }
281
  ],
282
  "notes": "Credential and account help should map to business IT rather than generic business.",
@@ -304,9 +294,9 @@
304
  },
305
  {
306
  "actual": {
307
- "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
308
- "model_output.classification.iab_content.tier1.label": "Sports",
309
- "model_output.classification.iab_content.tier2.label": null,
310
  "model_output.classification.iab_content.tier3.label": null
311
  },
312
  "expected": {
@@ -318,12 +308,12 @@
318
  "id": "trial-signup-maps-to-software",
319
  "mismatches": [
320
  {
321
- "actual": "Sports",
322
  "expected": "Technology & Computing",
323
  "path": "model_output.classification.iab_content.tier1.label"
324
  },
325
  {
326
- "actual": null,
327
  "expected": "Computing",
328
  "path": "model_output.classification.iab_content.tier2.label"
329
  },
@@ -331,6 +321,11 @@
331
  "actual": null,
332
  "expected": "Software and Applications",
333
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
334
  }
335
  ],
336
  "notes": "Software action queries should map to the software/application branch.",
 
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "exact",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
 
28
  "actual": null,
29
  "expected": "Auto Buying and Selling",
30
  "path": "model_output.classification.iab_content.tier2.label"
 
 
 
 
 
31
  }
32
  ],
33
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
 
57
  },
58
  {
59
  "actual": {
60
+ "model_output.classification.iab_content.mapping_mode": "exact",
61
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
62
  "model_output.classification.iab_content.tier2.label": null,
63
  "model_output.classification.iab_content.tier3.label": null
 
79
  "actual": null,
80
  "expected": "Laptops",
81
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
82
  }
83
  ],
84
  "notes": "Common typo handling should still land in the laptops branch.",
 
88
  },
89
  {
90
  "actual": {
91
+ "model_output.classification.iab_content.mapping_mode": "exact",
92
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
93
  "model_output.classification.iab_content.tier2.label": "Computing",
94
+ "model_output.classification.iab_content.tier3.label": "Software and Applications"
95
  },
96
  "expected": {
97
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
112
  "path": "model_output.classification.iab_content.tier2.label"
113
  },
114
  {
115
+ "actual": "Software and Applications",
116
  "expected": "Sales",
117
  "path": "model_output.classification.iab_content.tier3.label"
118
+ },
119
+ {
120
+ "actual": "exact",
121
+ "expected": "nearest_equivalent",
122
+ "path": "model_output.classification.iab_content.mapping_mode"
123
  }
124
  ],
125
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
 
129
  },
130
  {
131
  "actual": {
132
+ "model_output.classification.iab_content.mapping_mode": "exact",
133
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
134
+ "model_output.classification.iab_content.tier2.label": "Robotics",
135
  "model_output.classification.iab_content.tier3.label": null
136
  },
137
  "expected": {
 
148
  "path": "model_output.classification.iab_content.tier1.label"
149
  },
150
  {
151
+ "actual": "Robotics",
152
  "expected": "Business",
153
  "path": "model_output.classification.iab_content.tier2.label"
154
  },
 
156
  "actual": null,
157
  "expected": "Sales",
158
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
159
  }
160
  ],
161
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
 
165
  },
166
  {
167
  "actual": {
168
+ "model_output.classification.iab_content.mapping_mode": "exact",
169
  "model_output.classification.iab_content.tier1.label": "Careers",
170
+ "model_output.classification.iab_content.tier2.label": "Job Search",
171
  "model_output.classification.iab_content.tier3.label": null
172
  },
173
  "expected": {
 
184
  "path": "model_output.classification.iab_content.tier1.label"
185
  },
186
  {
187
+ "actual": "Job Search",
188
  "expected": "Business",
189
  "path": "model_output.classification.iab_content.tier2.label"
190
  },
 
192
  "actual": null,
193
  "expected": "Marketing and Advertising",
194
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
195
  }
196
  ],
197
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
 
202
  {
203
  "actual": {
204
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
205
+ "model_output.classification.iab_content.tier1.label": "Science",
206
  "model_output.classification.iab_content.tier2.label": null
207
  },
208
  "expected": {
 
213
  "id": "ml-explanation-maps-to-ai",
214
  "mismatches": [
215
  {
216
+ "actual": "Science",
217
  "expected": "Technology & Computing",
218
  "path": "model_output.classification.iab_content.tier1.label"
219
  },
 
235
  },
236
  {
237
  "actual": {
238
+ "model_output.classification.iab_content.mapping_mode": "exact",
239
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
240
  "model_output.classification.iab_content.tier2.label": "Computing",
241
+ "model_output.classification.iab_content.tier3.label": "Information and Network Security"
242
  },
243
  "expected": {
244
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
259
  "path": "model_output.classification.iab_content.tier2.label"
260
  },
261
  {
262
+ "actual": "Information and Network Security",
263
  "expected": "Business I.T.",
264
  "path": "model_output.classification.iab_content.tier3.label"
265
+ },
266
+ {
267
+ "actual": "exact",
268
+ "expected": "nearest_equivalent",
269
+ "path": "model_output.classification.iab_content.mapping_mode"
270
  }
271
  ],
272
  "notes": "Credential and account help should map to business IT rather than generic business.",
 
294
  },
295
  {
296
  "actual": {
297
+ "model_output.classification.iab_content.mapping_mode": "exact",
298
+ "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
299
+ "model_output.classification.iab_content.tier2.label": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
300
  "model_output.classification.iab_content.tier3.label": null
301
  },
302
  "expected": {
 
308
  "id": "trial-signup-maps-to-software",
309
  "mismatches": [
310
  {
311
+ "actual": "Sensitive Topics",
312
  "expected": "Technology & Computing",
313
  "path": "model_output.classification.iab_content.tier1.label"
314
  },
315
  {
316
+ "actual": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
317
  "expected": "Computing",
318
  "path": "model_output.classification.iab_content.tier2.label"
319
  },
 
321
  "actual": null,
322
  "expected": "Software and Applications",
323
  "path": "model_output.classification.iab_content.tier3.label"
324
+ },
325
+ {
326
+ "actual": "exact",
327
+ "expected": "nearest_equivalent",
328
+ "path": "model_output.classification.iab_content.mapping_mode"
329
  }
330
  ],
331
  "notes": "Software action queries should map to the software/application branch.",
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,13,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4
- comparison,1,0,11,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0
5
- evaluation,1,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,1,1,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0
8
- signup,0,0,0,0,0,0,14,0,0,1,0,0,0,0,1,0,0,0
9
- purchase,0,0,0,0,0,0,1,13,0,0,0,1,0,0,0,0,0,0
10
- booking,0,0,0,0,0,0,5,0,8,0,1,1,0,0,0,0,0,0
11
- download,0,0,0,0,0,0,0,0,0,13,0,1,0,0,1,0,0,0
12
- contact_sales,0,0,0,1,0,0,0,0,0,0,14,0,0,0,0,0,0,0
13
- task_execution,0,0,0,0,0,0,0,0,0,0,0,17,1,0,0,0,0,0
14
- onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,1,15,1,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0
16
- account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,12,2,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,12,0,0
18
- follow_up,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,13,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,14,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,2,1,11,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,1,0,0,13,0,0,0,0,0,0,0,0,1,0,0,0,0,0
6
+ deal_seeking,0,1,0,0,13,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0
8
+ signup,0,0,0,0,0,0,15,1,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,0,13,0,0,0,0,2,0,0,0,0,0
10
+ booking,0,0,0,0,0,0,1,0,13,0,1,0,0,0,0,0,0,0
11
+ download,0,0,0,0,0,0,0,0,0,13,1,1,0,0,0,0,0,0
12
+ contact_sales,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0
13
+ task_execution,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,1,16,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,2,0,0
16
+ account_help,0,0,0,0,0,0,2,0,0,0,0,1,0,3,8,1,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,13,0,0
18
+ follow_up,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,14,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "accepted_accuracy": 0.8967,
3
- "accepted_coverage": 0.9783,
4
- "accuracy": 0.8773,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype_benchmark.jsonl",
@@ -12,15 +12,15 @@
12
  "accuracy": 0.913,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
- "macro_f1": 0.9111
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.814,
19
- "accepted_coverage": 0.9451,
20
- "accuracy": 0.7692,
21
  "count": 91,
22
- "fallback_rate": 0.0549,
23
- "macro_f1": 0.7704
24
  },
25
  "medium": {
26
  "accepted_accuracy": 0.957,
@@ -28,30 +28,30 @@
28
  "accuracy": 0.9468,
29
  "count": 94,
30
  "fallback_rate": 0.0106,
31
- "macro_f1": 0.9453
32
  }
33
  },
34
- "fallback_rate": 0.0217,
35
  "head": "intent_subtype",
36
- "macro_f1": 0.8767,
37
  "per_class_metrics": {
38
  "account_help": {
39
- "f1-score": 0.7741935483870968,
40
- "precision": 0.75,
41
- "recall": 0.8,
42
  "support": 15.0
43
  },
44
- "accuracy": 0.8772563176895307,
45
  "billing_help": {
46
- "f1-score": 0.8,
47
- "precision": 0.8,
48
- "recall": 0.8,
49
  "support": 15.0
50
  },
51
  "booking": {
52
- "f1-score": 0.6956521739130435,
53
  "precision": 1.0,
54
- "recall": 0.5333333333333333,
55
  "support": 15.0
56
  },
57
  "comparison": {
@@ -61,26 +61,26 @@
61
  "support": 15.0
62
  },
63
  "contact_sales": {
64
- "f1-score": 0.9333333333333333,
65
- "precision": 0.9333333333333333,
66
- "recall": 0.9333333333333333,
67
  "support": 15.0
68
  },
69
  "deal_seeking": {
70
- "f1-score": 0.9285714285714286,
71
- "precision": 1.0,
72
  "recall": 0.8666666666666667,
73
  "support": 15.0
74
  },
75
  "download": {
76
- "f1-score": 0.896551724137931,
77
- "precision": 0.9285714285714286,
78
  "recall": 0.8666666666666667,
79
  "support": 15.0
80
  },
81
  "education": {
82
- "f1-score": 0.9375,
83
- "precision": 0.8823529411764706,
84
  "recall": 1.0,
85
  "support": 15.0
86
  },
@@ -91,69 +91,69 @@
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
- "f1-score": 0.8484848484848485,
95
- "precision": 0.7777777777777778,
96
- "recall": 0.9333333333333333,
97
  "support": 15.0
98
  },
99
  "follow_up": {
100
- "f1-score": 0.9285714285714286,
101
  "precision": 1.0,
102
- "recall": 0.8666666666666667,
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
- "f1-score": 0.8767381318695205,
107
- "precision": 0.893520983060569,
108
- "recall": 0.8760257806826435,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
- "f1-score": 0.9090909090909091,
113
- "precision": 0.9375,
114
- "recall": 0.8823529411764706,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
- "f1-score": 0.896551724137931,
119
- "precision": 0.9285714285714286,
120
- "recall": 0.8666666666666667,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
124
- "f1-score": 1.0,
125
- "precision": 1.0,
126
  "recall": 1.0,
127
  "support": 16.0
128
  },
129
  "purchase": {
130
- "f1-score": 0.9285714285714286,
131
- "precision": 1.0,
132
  "recall": 0.8666666666666667,
133
  "support": 15.0
134
  },
135
  "signup": {
136
- "f1-score": 0.7777777777777778,
137
- "precision": 0.7,
138
- "recall": 0.875,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
- "f1-score": 0.8292682926829268,
143
- "precision": 0.7391304347826086,
144
- "recall": 0.9444444444444444,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
- "f1-score": 0.8823529411764706,
149
- "precision": 0.7894736842105263,
150
- "recall": 1.0,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
- "f1-score": 0.8765453432446891,
155
- "precision": 0.8918521903635431,
156
- "recall": 0.8772563176895307,
157
  "support": 277.0
158
  }
159
  },
 
1
  {
2
+ "accepted_accuracy": 0.9104,
3
+ "accepted_coverage": 0.9675,
4
+ "accuracy": 0.8917,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype_benchmark.jsonl",
 
12
  "accuracy": 0.913,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
+ "macro_f1": 0.9109
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.8554,
19
+ "accepted_coverage": 0.9121,
20
+ "accuracy": 0.8132,
21
  "count": 91,
22
+ "fallback_rate": 0.0879,
23
+ "macro_f1": 0.8025
24
  },
25
  "medium": {
26
  "accepted_accuracy": 0.957,
 
28
  "accuracy": 0.9468,
29
  "count": 94,
30
  "fallback_rate": 0.0106,
31
+ "macro_f1": 0.9469
32
  }
33
  },
34
+ "fallback_rate": 0.0325,
35
  "head": "intent_subtype",
36
+ "macro_f1": 0.8886,
37
  "per_class_metrics": {
38
  "account_help": {
39
+ "f1-score": 0.64,
40
+ "precision": 0.8,
41
+ "recall": 0.5333333333333333,
42
  "support": 15.0
43
  },
44
+ "accuracy": 0.8916967509025271,
45
  "billing_help": {
46
+ "f1-score": 0.8387096774193549,
47
+ "precision": 0.8125,
48
+ "recall": 0.8666666666666667,
49
  "support": 15.0
50
  },
51
  "booking": {
52
+ "f1-score": 0.9285714285714286,
53
  "precision": 1.0,
54
+ "recall": 0.8666666666666667,
55
  "support": 15.0
56
  },
57
  "comparison": {
 
61
  "support": 15.0
62
  },
63
  "contact_sales": {
64
+ "f1-score": 0.9375,
65
+ "precision": 0.8823529411764706,
66
+ "recall": 1.0,
67
  "support": 15.0
68
  },
69
  "deal_seeking": {
70
+ "f1-score": 0.896551724137931,
71
+ "precision": 0.9285714285714286,
72
  "recall": 0.8666666666666667,
73
  "support": 15.0
74
  },
75
  "download": {
76
+ "f1-score": 0.9285714285714286,
77
+ "precision": 1.0,
78
  "recall": 0.8666666666666667,
79
  "support": 15.0
80
  },
81
  "education": {
82
+ "f1-score": 0.9090909090909091,
83
+ "precision": 0.8333333333333334,
84
  "recall": 1.0,
85
  "support": 15.0
86
  },
 
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
+ "f1-score": 0.896551724137931,
95
+ "precision": 0.9285714285714286,
96
+ "recall": 0.8666666666666667,
97
  "support": 15.0
98
  },
99
  "follow_up": {
100
+ "f1-score": 0.9655172413793104,
101
  "precision": 1.0,
102
+ "recall": 0.9333333333333333,
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
+ "f1-score": 0.8886471209737711,
107
+ "precision": 0.8965122159975102,
108
+ "recall": 0.8895561002178651,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
+ "f1-score": 0.8648648648648649,
113
+ "precision": 0.8,
114
+ "recall": 0.9411764705882353,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
+ "f1-score": 0.9032258064516129,
119
+ "precision": 0.875,
120
+ "recall": 0.9333333333333333,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
124
+ "f1-score": 0.9696969696969697,
125
+ "precision": 0.9411764705882353,
126
  "recall": 1.0,
127
  "support": 16.0
128
  },
129
  "purchase": {
130
+ "f1-score": 0.896551724137931,
131
+ "precision": 0.9285714285714286,
132
  "recall": 0.8666666666666667,
133
  "support": 15.0
134
  },
135
  "signup": {
136
+ "f1-score": 0.8823529411764706,
137
+ "precision": 0.8333333333333334,
138
+ "recall": 0.9375,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
+ "f1-score": 0.9230769230769231,
143
+ "precision": 0.8571428571428571,
144
+ "recall": 1.0,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
+ "f1-score": 0.8,
149
+ "precision": 0.8,
150
+ "recall": 0.8,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
+ "f1-score": 0.8891181699377334,
155
+ "precision": 0.8953221541324111,
156
+ "recall": 0.8916967509025271,
157
  "support": 277.0
158
  }
159
  },
artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv CHANGED
@@ -15,5 +15,5 @@ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
- follow_up,1,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
+ follow_up,1,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,7,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_extended_cases_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.8491,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8491,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.7764,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
@@ -15,7 +15,7 @@
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
18
- "accuracy": 0.8490566037735849,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.9,
45
- "precision": 0.8181818181818182,
46
  "recall": 1.0,
47
  "support": 9.0
48
  },
@@ -71,15 +71,15 @@
71
  "support": 3.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.8,
75
  "precision": 1.0,
76
- "recall": 0.6666666666666666,
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.474472286972287,
81
- "precision": 0.46035754369087706,
82
- "recall": 0.5185185185185186,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
@@ -125,9 +125,9 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.823438668249989,
129
- "precision": 0.8324076342944268,
130
- "recall": 0.8490566037735849,
131
  "support": 53.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8302,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8302,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.7668,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
 
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
18
+ "accuracy": 0.8301886792452831,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.8571428571428571,
45
+ "precision": 0.75,
46
  "recall": 1.0,
47
  "support": 9.0
48
  },
 
71
  "support": 3.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.7368421052631579,
75
  "precision": 1.0,
76
+ "recall": 0.5833333333333334,
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.46858256266151,
81
+ "precision": 0.4565696649029982,
82
+ "recall": 0.513888888888889,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8018611395225099,
129
+ "precision": 0.8208295896975142,
130
+ "recall": 0.8301886792452831,
131
  "support": 53.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv CHANGED
@@ -2,11 +2,11 @@
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,2,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_hard_cases_report.json CHANGED
@@ -7,7 +7,7 @@
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.846,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 0.9666666666666667,
57
- "precision": 0.9354838709677419,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
@@ -65,9 +65,9 @@
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.0,
69
- "precision": 0.0,
70
- "recall": 0.0,
71
  "support": 6.0
72
  },
73
  "follow_up": {
@@ -77,9 +77,9 @@
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.7049919484702094,
81
- "precision": 0.7038231780167263,
82
- "recall": 0.7305555555555556,
83
  "support": 94.0
84
  },
85
  "onboarding_setup": {
@@ -89,8 +89,8 @@
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.8,
93
- "precision": 0.6666666666666666,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
@@ -101,14 +101,14 @@
101
  "support": 10.0
102
  },
103
  "purchase": {
104
- "f1-score": 1.0,
105
  "precision": 1.0,
106
- "recall": 1.0,
107
  "support": 3.0
108
  },
109
  "signup": {
110
- "f1-score": 1.0,
111
- "precision": 1.0,
112
  "recall": 1.0,
113
  "support": 3.0
114
  },
@@ -125,8 +125,8 @@
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8721091581868641,
129
- "precision": 0.8648478609013955,
130
  "recall": 0.8936170212765957,
131
  "support": 94.0
132
  }
 
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8447,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9508196721311475,
57
+ "precision": 0.90625,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
 
65
  "support": 0.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.25,
69
+ "precision": 0.5,
70
+ "recall": 0.16666666666666666,
71
  "support": 6.0
72
  },
73
  "follow_up": {
 
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.7038911013311109,
81
+ "precision": 0.7234953703703704,
82
+ "recall": 0.7212962962962962,
83
  "support": 94.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.8888888888888888,
93
+ "precision": 0.8,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
 
101
  "support": 10.0
102
  },
103
  "purchase": {
104
+ "f1-score": 0.8,
105
  "precision": 1.0,
106
+ "recall": 0.6666666666666666,
107
  "support": 3.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.8571428571428571,
111
+ "precision": 0.75,
112
  "recall": 1.0,
113
  "support": 3.0
114
  },
 
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8798004011763282,
129
+ "precision": 0.8911125886524823,
130
  "recall": 0.8936170212765957,
131
  "support": 94.0
132
  }
artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv CHANGED
@@ -2,9 +2,9 @@
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
- provider_selection,0,0,0,1,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
@@ -12,7 +12,7 @@ download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
  follow_up,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,8,0
 
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
+ provider_selection,0,0,0,1,0,4,0,0,0,0,0,0,1,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
 
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
  follow_up,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,8,0
artifacts/evaluation/latest/intent_subtype_test_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.9143,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9143,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.8855,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 1.0,
@@ -15,7 +15,7 @@
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.9142857142857143,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.8,
45
- "precision": 0.6666666666666666,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
@@ -65,9 +65,9 @@
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.5,
69
- "precision": 0.5,
70
- "recall": 0.5,
71
  "support": 2.0
72
  },
73
  "follow_up": {
@@ -77,14 +77,14 @@
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.6887592108100274,
81
- "precision": 0.7,
82
- "recall": 0.6978114478114478,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 1.0,
87
- "precision": 1.0,
88
  "recall": 1.0,
89
  "support": 4.0
90
  },
@@ -95,9 +95,9 @@
95
  "support": 8.0
96
  },
97
  "provider_selection": {
98
- "f1-score": 0.9090909090909091,
99
  "precision": 1.0,
100
- "recall": 0.8333333333333334,
101
  "support": 6.0
102
  },
103
  "purchase": {
@@ -113,21 +113,21 @@
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.8571428571428571,
117
- "precision": 0.75,
118
  "recall": 1.0,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 0.6666666666666666,
123
  "precision": 1.0,
124
- "recall": 0.5,
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.9126080539458813,
129
- "precision": 0.9307142857142858,
130
- "recall": 0.9142857142857143,
131
  "support": 70.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.9,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8531,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 1.0,
 
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.9,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.6666666666666666,
45
+ "precision": 0.5,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
 
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.0,
69
+ "precision": 0.0,
70
+ "recall": 0.0,
71
  "support": 2.0
72
  },
73
  "follow_up": {
 
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.6635221022395855,
81
+ "precision": 0.6578042328042328,
82
+ "recall": 0.6885521885521885,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 0.8888888888888888,
87
+ "precision": 0.8,
88
  "recall": 1.0,
89
  "support": 4.0
90
  },
 
95
  "support": 8.0
96
  },
97
  "provider_selection": {
98
+ "f1-score": 0.8,
99
  "precision": 1.0,
100
+ "recall": 0.6666666666666666,
101
  "support": 6.0
102
  },
103
  "purchase": {
 
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.9230769230769231,
117
+ "precision": 0.8571428571428571,
118
  "recall": 1.0,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 1.0,
123
  "precision": 1.0,
124
+ "recall": 1.0,
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8939882610403741,
129
+ "precision": 0.9094217687074829,
130
+ "recall": 0.9,
131
  "support": 70.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4
  comparison,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,3,6,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,10,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,2,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0
10
- booking,0,0,0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,2,0,1,0,6,0,0,0,0,0,0,0
13
- task_execution,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0,0,0,0
14
- onboarding_setup,0,0,0,0,0,0,0,0,1,1,0,0,15,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,10,1,0,2,0
16
- account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,4,0,0
18
- follow_up,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,31,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,29,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4
  comparison,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,4,4,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,10,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,2,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,2,4,0,0,0,0,0,0,0,0,0,0
10
+ booking,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,2,0,1,0,6,0,0,0,0,0,0,0
13
+ task_execution,0,0,0,0,0,0,1,0,0,1,0,17,0,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,1,0,0,0,16,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,10,2,0,1,0
16
+ account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,2,5,0,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0
18
+ follow_up,0,0,0,0,1,0,0,0,0,0,0,5,0,0,0,0,30,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
artifacts/evaluation/latest/intent_subtype_train_report.json CHANGED
@@ -1,36 +1,36 @@
1
  {
2
- "accepted_accuracy": 0.9042,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9042,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.8789,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.875,
14
- "precision": 0.7777777777777778,
15
- "recall": 1.0,
16
  "support": 7.0
17
  },
18
- "accuracy": 0.9041533546325878,
19
  "billing_help": {
20
- "f1-score": 0.8,
21
  "precision": 1.0,
22
- "recall": 0.6666666666666666,
23
  "support": 6.0
24
  },
25
  "booking": {
26
- "f1-score": 0.6,
27
- "precision": 0.6,
28
- "recall": 0.6,
29
  "support": 5.0
30
  },
31
  "comparison": {
32
- "f1-score": 1.0,
33
- "precision": 1.0,
34
  "recall": 1.0,
35
  "support": 15.0
36
  },
@@ -41,8 +41,8 @@
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.9523809523809523,
45
- "precision": 1.0,
46
  "recall": 0.9090909090909091,
47
  "support": 11.0
48
  },
@@ -53,8 +53,8 @@
53
  "support": 8.0
54
  },
55
  "education": {
56
- "f1-score": 0.9719626168224299,
57
- "precision": 0.9454545454545454,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
@@ -65,33 +65,33 @@
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.64,
69
  "precision": 1.0,
70
- "recall": 0.47058823529411764,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.8611111111111112,
75
- "precision": 0.8611111111111112,
76
- "recall": 0.8611111111111112,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.8788540112011829,
81
- "precision": 0.9092031425364758,
82
- "recall": 0.8729694019289211,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 0.9375,
87
  "precision": 1.0,
88
- "recall": 0.8823529411764706,
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.8955223880597015,
93
- "precision": 0.8333333333333334,
94
- "recall": 0.967741935483871,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
@@ -101,33 +101,33 @@
101
  "support": 25.0
102
  },
103
  "purchase": {
104
- "f1-score": 1.0,
105
  "precision": 1.0,
106
- "recall": 1.0,
107
  "support": 6.0
108
  },
109
  "signup": {
110
- "f1-score": 0.8888888888888888,
111
- "precision": 0.8,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.8837209302325582,
117
- "precision": 0.7916666666666666,
118
- "recall": 1.0,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 0.8333333333333334,
123
- "precision": 0.9090909090909091,
124
  "recall": 0.7692307692307693,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8995750585641842,
129
- "precision": 0.9142834091715881,
130
- "recall": 0.9041533546325878,
131
  "support": 313.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8978,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8978,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.877,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.7142857142857143,
14
+ "precision": 0.7142857142857143,
15
+ "recall": 0.7142857142857143,
16
  "support": 7.0
17
  },
18
+ "accuracy": 0.8977635782747604,
19
  "billing_help": {
20
+ "f1-score": 1.0,
21
  "precision": 1.0,
22
+ "recall": 1.0,
23
  "support": 6.0
24
  },
25
  "booking": {
26
+ "f1-score": 0.8333333333333334,
27
+ "precision": 0.7142857142857143,
28
+ "recall": 1.0,
29
  "support": 5.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.967741935483871,
33
+ "precision": 0.9375,
34
  "recall": 1.0,
35
  "support": 15.0
36
  },
 
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.9090909090909091,
45
+ "precision": 0.9090909090909091,
46
  "recall": 0.9090909090909091,
47
  "support": 11.0
48
  },
 
53
  "support": 8.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9629629629629629,
57
+ "precision": 0.9285714285714286,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
 
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.6923076923076923,
69
  "precision": 1.0,
70
+ "recall": 0.5294117647058824,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.8571428571428571,
75
+ "precision": 0.8823529411764706,
76
+ "recall": 0.8333333333333334,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.8770498618135788,
81
+ "precision": 0.8988923431325393,
82
+ "recall": 0.876671278202288,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 0.9696969696969697,
87
  "precision": 1.0,
88
+ "recall": 0.9411764705882353,
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.90625,
93
+ "precision": 0.8787878787878788,
94
+ "recall": 0.9354838709677419,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
 
101
  "support": 25.0
102
  },
103
  "purchase": {
104
+ "f1-score": 0.8,
105
  "precision": 1.0,
106
+ "recall": 0.6666666666666666,
107
  "support": 6.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.8648648648648649,
111
+ "precision": 0.7619047619047619,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8292682926829268,
117
+ "precision": 0.7727272727272727,
118
+ "recall": 0.8947368421052632,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 0.8,
123
+ "precision": 0.8333333333333334,
124
  "recall": 0.7692307692307693,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.894423568060199,
129
+ "precision": 0.9063956713482179,
130
+ "recall": 0.8977635782747604,
131
  "support": 313.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv CHANGED
@@ -1,6 +1,6 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -14,6 +14,6 @@ task_execution,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0
14
  onboarding_setup,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
18
  follow_up,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,9,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,9,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
 
14
  onboarding_setup,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
18
  follow_up,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,9,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_subtype_val_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.8734,
3
  "accepted_coverage": 0.9875,
4
- "accuracy": 0.875,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/val.jsonl",
8
  "fallback_rate": 0.0125,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.7429,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
@@ -15,11 +15,11 @@
15
  "recall": 0.5,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.875,
19
  "billing_help": {
20
- "f1-score": 1.0,
21
- "precision": 1.0,
22
- "recall": 1.0,
23
  "support": 1.0
24
  },
25
  "booking": {
@@ -29,8 +29,8 @@
29
  "support": 3.0
30
  },
31
  "comparison": {
32
- "f1-score": 0.6666666666666666,
33
- "precision": 1.0,
34
  "recall": 0.5,
35
  "support": 4.0
36
  },
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.6666666666666666,
45
- "precision": 0.5,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
@@ -77,9 +77,9 @@
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.660381593714927,
81
- "precision": 0.6597643097643098,
82
- "recall": 0.695959595959596,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
@@ -89,9 +89,9 @@
89
  "support": 5.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.9090909090909091,
93
- "precision": 0.9090909090909091,
94
- "recall": 0.9090909090909091,
95
  "support": 11.0
96
  },
97
  "provider_selection": {
@@ -113,8 +113,8 @@
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.8888888888888888,
117
- "precision": 0.8,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
@@ -125,9 +125,9 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8673611111111112,
129
- "precision": 0.8841666666666667,
130
- "recall": 0.875,
131
  "support": 80.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8608,
3
  "accepted_coverage": 0.9875,
4
+ "accuracy": 0.85,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/val.jsonl",
8
  "fallback_rate": 0.0125,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.6722,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
 
15
  "recall": 0.5,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.85,
19
  "billing_help": {
20
+ "f1-score": 0.0,
21
+ "precision": 0.0,
22
+ "recall": 0.0,
23
  "support": 1.0
24
  },
25
  "booking": {
 
29
  "support": 3.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.5,
33
+ "precision": 0.5,
34
  "recall": 0.5,
35
  "support": 4.0
36
  },
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.8,
45
+ "precision": 0.6666666666666666,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
 
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.5974890931031281,
81
+ "precision": 0.5811447811447812,
82
+ "recall": 0.6353535353535353,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 5.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.8571428571428571,
93
+ "precision": 0.9,
94
+ "recall": 0.8181818181818182,
95
  "support": 11.0
96
  },
97
  "provider_selection": {
 
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8421052631578947,
117
+ "precision": 0.7272727272727273,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8380398913951546,
129
+ "precision": 0.8423106060606059,
130
+ "recall": 0.85,
131
  "support": 80.0
132
  }
133
  },
artifacts/evaluation/latest/intent_type_hard_cases_report.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "accepted_accuracy": 1.0,
3
- "accepted_coverage": 1.0,
4
  "accuracy": 1.0,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
  "dataset_path": "/content/agentic-intent-classifier/data/hard_cases.jsonl",
8
- "fallback_rate": 0.0,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
11
  "per_class_metrics": {
 
1
  {
2
  "accepted_accuracy": 1.0,
3
+ "accepted_coverage": 0.9836,
4
  "accuracy": 1.0,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
  "dataset_path": "/content/agentic-intent-classifier/data/hard_cases.jsonl",
8
+ "fallback_rate": 0.0164,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
11
  "per_class_metrics": {
artifacts/evaluation/latest/summary.json CHANGED
The diff for this file is too large to render. See raw diff
 
training/run_full_training_pipeline.py CHANGED
@@ -3,17 +3,27 @@ from __future__ import annotations
3
  import argparse
4
  import subprocess
5
  import sys
 
 
6
  from pathlib import Path
7
 
8
  BASE_DIR = Path(__file__).resolve().parent.parent
9
 
10
 
11
  def run_step(args: list[str]) -> None:
12
- print(f"\n==> Running: {' '.join(args)}")
 
 
 
13
  subprocess.run(args, cwd=BASE_DIR, check=True)
 
 
 
14
 
15
 
16
  def main() -> None:
 
 
17
  parser = argparse.ArgumentParser(
18
  description=(
19
  "Run the full multi-head training pipeline: multitask intent, IAB classifier, calibration for all heads, "
@@ -106,6 +116,15 @@ def main() -> None:
106
  if args.smoke_test:
107
  run_step([python, "combined_inference.py", args.smoke_test_query])
108
 
 
 
 
 
 
 
 
 
 
109
 
110
  if __name__ == "__main__":
111
  main()
 
3
  import argparse
4
  import subprocess
5
  import sys
6
+ import time
7
+ from datetime import datetime, timezone
8
  from pathlib import Path
9
 
10
  BASE_DIR = Path(__file__).resolve().parent.parent
11
 
12
 
13
  def run_step(args: list[str]) -> None:
14
+ cmd = " ".join(args)
15
+ started_at = time.perf_counter()
16
+ started_wall = datetime.now(timezone.utc).isoformat()
17
+ print(f"\n==> Running: {cmd}\n start: {started_wall}")
18
  subprocess.run(args, cwd=BASE_DIR, check=True)
19
+ elapsed_s = time.perf_counter() - started_at
20
+ ended_wall = datetime.now(timezone.utc).isoformat()
21
+ print(f" end: {ended_wall}\n took: {elapsed_s:.2f}s")
22
 
23
 
24
  def main() -> None:
25
+ pipeline_start = time.perf_counter()
26
+ pipeline_start_wall = datetime.now(timezone.utc).isoformat()
27
  parser = argparse.ArgumentParser(
28
  description=(
29
  "Run the full multi-head training pipeline: multitask intent, IAB classifier, calibration for all heads, "
 
116
  if args.smoke_test:
117
  run_step([python, "combined_inference.py", args.smoke_test_query])
118
 
119
+ pipeline_elapsed_s = time.perf_counter() - pipeline_start
120
+ pipeline_end_wall = datetime.now(timezone.utc).isoformat()
121
+ print(
122
+ f"\n==> Pipeline complete\n"
123
+ f" start: {pipeline_start_wall}\n"
124
+ f" end: {pipeline_end_wall}\n"
125
+ f" total: {pipeline_elapsed_s:.2f}s"
126
+ )
127
+
128
 
129
  if __name__ == "__main__":
130
  main()
training/upload_to_hf.py CHANGED
@@ -11,6 +11,8 @@ from __future__ import annotations
11
  import argparse
12
  import os
13
  import sys
 
 
14
  from pathlib import Path
15
 
16
 
@@ -46,6 +48,16 @@ def _parse_args() -> argparse.Namespace:
46
  action="store_true",
47
  help="Upload artifacts/calibration directory.",
48
  )
 
 
 
 
 
 
 
 
 
 
49
  parser.add_argument(
50
  "--multitask-dir",
51
  default="multitask_intent_model_output",
@@ -70,6 +82,8 @@ def _parse_args() -> argparse.Namespace:
70
 
71
 
72
  def main() -> int:
 
 
73
  args = _parse_args()
74
  if not args.token:
75
  print("Missing HF token. Provide --token or set env HF_TOKEN.", file=sys.stderr)
@@ -80,6 +94,7 @@ def main() -> int:
80
  multitask_dir = (repo_root / args.multitask_dir).resolve()
81
  iab_dir = (repo_root / args.iab_dir).resolve()
82
  calibration_dir = (repo_root / args.calibration_dir).resolve()
 
83
 
84
  to_upload: list[tuple[str, Path]] = []
85
  if args.include_multitask:
@@ -88,6 +103,8 @@ def main() -> int:
88
  to_upload.append(("iab_classifier_model_output", iab_dir))
89
  if args.include_calibration:
90
  to_upload.append(("artifacts/calibration", calibration_dir))
 
 
91
 
92
  if not to_upload:
93
  print("Nothing to upload. Pass --include-multitask, --include-iab, and/or --include-calibration.", file=sys.stderr)
@@ -110,6 +127,20 @@ def main() -> int:
110
  if args.dry_run:
111
  print(f"[DRY] Would upload {local_dir} -> {args.repo_id}:{repo_path}")
112
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  print(f"[UPLOAD] {local_dir} -> {args.repo_id}:{repo_path}")
114
  api.upload_folder(
115
  repo_id=args.repo_id,
@@ -117,8 +148,11 @@ def main() -> int:
117
  folder_path=str(local_dir),
118
  path_in_repo=repo_path,
119
  )
 
120
 
121
- print("Upload complete.")
 
 
122
  return 0
123
 
124
 
 
11
  import argparse
12
  import os
13
  import sys
14
+ import time
15
+ from datetime import datetime, timezone
16
  from pathlib import Path
17
 
18
 
 
48
  action="store_true",
49
  help="Upload artifacts/calibration directory.",
50
  )
51
+ parser.add_argument(
52
+ "--include-hf-readme",
53
+ action="store_true",
54
+ help="Upload a Hugging Face model card file as README.md in the Hub repo root.",
55
+ )
56
+ parser.add_argument(
57
+ "--hf-readme-path",
58
+ default="HF_MODEL_CARD.md",
59
+ help="Local path to the HF model card markdown to upload as README.md (relative to repo root).",
60
+ )
61
  parser.add_argument(
62
  "--multitask-dir",
63
  default="multitask_intent_model_output",
 
82
 
83
 
84
  def main() -> int:
85
+ started_at = time.perf_counter()
86
+ started_wall = datetime.now(timezone.utc).isoformat()
87
  args = _parse_args()
88
  if not args.token:
89
  print("Missing HF token. Provide --token or set env HF_TOKEN.", file=sys.stderr)
 
94
  multitask_dir = (repo_root / args.multitask_dir).resolve()
95
  iab_dir = (repo_root / args.iab_dir).resolve()
96
  calibration_dir = (repo_root / args.calibration_dir).resolve()
97
+ hf_readme_path = (repo_root / args.hf_readme_path).resolve()
98
 
99
  to_upload: list[tuple[str, Path]] = []
100
  if args.include_multitask:
 
103
  to_upload.append(("iab_classifier_model_output", iab_dir))
104
  if args.include_calibration:
105
  to_upload.append(("artifacts/calibration", calibration_dir))
106
+ if args.include_hf_readme:
107
+ to_upload.append(("README.md", hf_readme_path))
108
 
109
  if not to_upload:
110
  print("Nothing to upload. Pass --include-multitask, --include-iab, and/or --include-calibration.", file=sys.stderr)
 
127
  if args.dry_run:
128
  print(f"[DRY] Would upload {local_dir} -> {args.repo_id}:{repo_path}")
129
  continue
130
+ # Upload single README.md file (Hub model card) vs directories
131
+ if repo_path == "README.md":
132
+ step_start = time.perf_counter()
133
+ print(f"[UPLOAD] {local_dir} -> {args.repo_id}:README.md")
134
+ api.upload_file(
135
+ repo_id=args.repo_id,
136
+ repo_type="model",
137
+ path_or_fileobj=str(local_dir),
138
+ path_in_repo="README.md",
139
+ )
140
+ print(f"[DONE ] README.md took {(time.perf_counter() - step_start):.2f}s")
141
+ continue
142
+
143
+ step_start = time.perf_counter()
144
  print(f"[UPLOAD] {local_dir} -> {args.repo_id}:{repo_path}")
145
  api.upload_folder(
146
  repo_id=args.repo_id,
 
148
  folder_path=str(local_dir),
149
  path_in_repo=repo_path,
150
  )
151
+ print(f"[DONE ] {repo_path} took {(time.perf_counter() - step_start):.2f}s")
152
 
153
+ ended_wall = datetime.now(timezone.utc).isoformat()
154
+ elapsed_s = time.perf_counter() - started_at
155
+ print(f"Upload complete.\nstart: {started_wall}\nend: {ended_wall}\ntotal: {elapsed_s:.2f}s")
156
  return 0
157
 
158