manikumargouni commited on
Commit
53d5d9f
·
verified ·
1 Parent(s): 672a2ca

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +462 -182
  2. artifacts/calibration/decision_phase.json +13 -13
  3. artifacts/calibration/iab_content.json +14 -14
  4. artifacts/calibration/intent_subtype.json +9 -9
  5. artifacts/calibration/intent_type.json +14 -14
  6. artifacts/evaluation/latest/combined_demo_benchmark.json +146 -170
  7. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv +2 -2
  8. artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json +17 -17
  9. artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv +2 -2
  10. artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json +17 -17
  11. artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv +2 -2
  12. artifacts/evaluation/latest/decision_phase_test_report.json +14 -14
  13. artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv +2 -2
  14. artifacts/evaluation/latest/decision_phase_train_report.json +17 -17
  15. artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv +2 -2
  16. artifacts/evaluation/latest/decision_phase_val_report.json +19 -19
  17. artifacts/evaluation/latest/iab_behavior_lock_regression.json +39 -44
  18. artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json +57 -57
  19. artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json +58 -58
  20. artifacts/evaluation/latest/iab_content_extended_cases_report.json +26 -26
  21. artifacts/evaluation/latest/iab_content_hard_cases_report.json +6 -6
  22. artifacts/evaluation/latest/iab_content_test_report.json +29 -29
  23. artifacts/evaluation/latest/iab_content_train_report.json +29 -29
  24. artifacts/evaluation/latest/iab_content_val_report.json +29 -29
  25. artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json +303 -345
  26. artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json +488 -238
  27. artifacts/evaluation/latest/iab_quality_target_eval.json +40 -35
  28. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv +14 -14
  29. artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json +59 -59
  30. artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv +3 -3
  31. artifacts/evaluation/latest/intent_subtype_extended_cases_report.json +21 -21
  32. artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv +1 -1
  33. artifacts/evaluation/latest/intent_subtype_hard_cases_report.json +9 -9
  34. artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv +5 -5
  35. artifacts/evaluation/latest/intent_subtype_test_report.json +22 -22
  36. artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv +8 -8
  37. artifacts/evaluation/latest/intent_subtype_train_report.json +44 -44
  38. artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv +4 -4
  39. artifacts/evaluation/latest/intent_subtype_val_report.json +26 -26
  40. artifacts/evaluation/latest/intent_type_hard_cases_report.json +2 -2
  41. artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv +3 -3
  42. artifacts/evaluation/latest/intent_type_test_report.json +23 -23
  43. artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv +1 -1
  44. artifacts/evaluation/latest/intent_type_third_wave_cases_report.json +13 -13
  45. artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv +1 -1
  46. artifacts/evaluation/latest/intent_type_train_report.json +16 -16
  47. artifacts/evaluation/latest/intent_type_val_confusion_matrix.csv +2 -2
  48. artifacts/evaluation/latest/intent_type_val_report.json +20 -20
  49. artifacts/evaluation/latest/summary.json +0 -0
  50. iab_classifier_model_output/train_metrics.json +12 -12
README.md CHANGED
@@ -1,57 +1,10 @@
1
- ---
2
- language:
3
- - en
4
- library_name: transformers
5
- pipeline_tag: text-classification
6
- base_model: distilbert-base-uncased
7
- metrics:
8
- - accuracy
9
- - f1
10
- tags:
11
- - intent-classification
12
- - multitask
13
- - iab
14
- - conversational-ai
15
- - adtech
16
- - calibrated-confidence
17
- license: apache-2.0
18
- ---
19
-
20
- # admesh/agentic-intent-classifier
21
-
22
- Production-ready intent + IAB classifier bundle for conversational traffic.
23
-
24
- Combines multitask intent modeling, supervised IAB content classification, and per-head confidence calibration to support safe monetization decisions in real time.
25
-
26
- ## Links
27
-
28
- - Hugging Face: https://huggingface.co/admesh/agentic-intent-classifier
29
- - GitHub: https://github.com/GouniManikumar12/agentic-intent-classifier
30
 
31
- ## What It Predicts
32
 
33
- | Field | Description |
34
- |---|---|
35
- | `intent.type` | `commercial`, `informational`, `navigational`, `transactional`, … |
36
- | `intent.subtype` | `product_discovery`, `comparison`, `how_to`, … |
37
- | `intent.decision_phase` | `awareness`, `consideration`, `decision`, … |
38
- | `iab_content` | IAB Content Taxonomy 3.0 tier1 / tier2 / tier3 labels |
39
- | `component_confidence` | Per-head calibrated confidence with threshold flags |
40
- | `system_decision` | Monetization eligibility, opportunity type, policy |
41
-
42
- ---
43
-
44
- ## Deployment Options
45
-
46
- ### 0. Colab / Kaggle Quickstart (copy/paste)
47
-
48
- ```python
49
- !pip -q install -U pip
50
- !pip -q install -U "torch==2.10.0" "torchvision==0.25.0" "torchaudio==2.10.0"
51
- !pip -q install -U "transformers>=4.36.0" "huggingface_hub>=0.20.0" "safetensors>=0.4.0"
52
- ```
53
 
54
- Restart the runtime after installs (**Runtime Restart runtime**) so the new Torch version is actually used.
55
 
56
  ```python
57
  from transformers import pipeline
@@ -59,212 +12,539 @@ from transformers import pipeline
59
  clf = pipeline(
60
  "admesh-intent",
61
  model="admesh/agentic-intent-classifier",
62
- trust_remote_code=True, # required (custom pipeline + multi-model bundle)
63
  )
64
 
65
  out = clf("Which laptop should I buy for college?")
66
- print(out["meta"])
67
  print(out["model_output"]["classification"]["intent"])
 
 
68
  ```
69
 
70
- ---
 
 
71
 
72
- ## Latency / inference timing (quick check)
73
 
74
- The first call includes model/code loading. Warm up once, then measure:
75
 
76
  ```python
77
  import time
 
 
 
78
  q = "Which laptop should I buy for college?"
79
 
80
  _ = clf("warm up")
81
  t0 = time.perf_counter()
82
  out = clf(q)
83
- print(f"latency_ms={(time.perf_counter() - t0) * 1000:.1f}")
 
 
 
84
  ```
85
 
86
- ### 1. `transformers.pipeline()` anywhere (Python)
87
 
88
  ```python
89
- from transformers import pipeline
90
 
91
- clf = pipeline(
92
- "admesh-intent",
93
- model="admesh/agentic-intent-classifier",
94
- trust_remote_code=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  )
 
 
 
 
 
96
 
 
 
97
  result = clf("Which laptop should I buy for college?")
 
 
 
 
 
 
 
 
 
 
98
  ```
99
 
100
- Batch and custom thresholds:
101
 
102
  ```python
103
- # batch
104
  results = clf([
105
  "Best running shoes under $100",
106
- "How does TCP work?",
107
  "Buy noise-cancelling headphones",
108
  ])
109
 
110
- # custom confidence thresholds
111
  result = clf(
112
- "Buy headphones",
113
  threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
114
  )
115
  ```
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  ---
118
 
119
- ### 2. HF Inference Endpoints (managed, deploy to AWS / Azure / GCP)
120
 
121
- 1. Go to https://ui.endpoints.huggingface.co
122
- 2. **New Endpoint** → select `admesh/agentic-intent-classifier`
123
- 3. Framework: **PyTorch** — Task: **Text Classification**
124
- 4. Enable **"Load with trust_remote_code"**
125
- 5. Deploy
 
 
126
 
127
- The endpoint serves the same `pipeline()` interface above via REST:
128
 
129
  ```bash
130
- curl https://<your-endpoint>.endpoints.huggingface.cloud \
131
- -H "Authorization: Bearer $HF_TOKEN" \
132
- -H "Content-Type: application/json" \
133
- -d '{"inputs": "Which laptop should I buy for college?"}'
134
  ```
135
 
136
- ---
137
 
138
- ### 3. HF Spaces (Gradio / Streamlit demo)
 
 
 
139
 
140
- ```python
141
- # app.py for a Gradio Space
142
- import gradio as gr
143
- from transformers import pipeline
144
 
145
- clf = pipeline(
146
- "admesh-intent",
147
- model="admesh/agentic-intent-classifier",
148
- trust_remote_code=True,
149
- )
150
 
151
- def classify(text):
152
- return clf(text)
153
 
154
- gr.Interface(fn=classify, inputs="text", outputs="json").launch()
 
 
 
155
  ```
156
 
157
- ---
158
 
159
- ### 4. Local / notebook via `snapshot_download`
 
 
 
160
 
161
- ```python
162
- import sys
163
- from huggingface_hub import snapshot_download
164
 
165
- local_dir = snapshot_download(
166
- repo_id="admesh/agentic-intent-classifier",
167
- repo_type="model",
168
- )
169
- sys.path.insert(0, local_dir)
170
 
171
- from pipeline import AdmeshIntentPipeline
172
- clf = AdmeshIntentPipeline()
173
- result = clf("I need a CRM for a 5-person startup")
 
 
 
 
174
  ```
175
 
176
- Or the one-liner factory:
177
 
178
- ```python
179
- from pipeline import AdmeshIntentPipeline
180
- clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
 
 
 
 
 
 
 
 
 
 
181
  ```
182
 
183
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- ## Troubleshooting (avoid environment errors)
186
 
187
- ### `No module named 'combined_inference'` (or similar)
 
 
 
188
 
189
- This means the Hub repo root is missing required Python files. Ensure these exist at the **root of the model repo** (same level as `pipeline.py`):
 
 
 
 
 
190
 
191
- - `pipeline.py`, `config.json`, `config.py`
192
- - `combined_inference.py`, `schemas.py`
193
- - `model_runtime.py`, `multitask_runtime.py`, `multitask_model.py`
194
- - `inference_intent_type.py`, `inference_subtype.py`, `inference_decision_phase.py`, `inference_iab_classifier.py`
195
- - `iab_classifier.py`, `iab_taxonomy.py`
196
 
197
- ### `does not appear to have a file named model.safetensors`
 
 
 
 
 
 
 
 
 
 
198
 
199
- Transformers requires a standard checkpoint at the repo root for `pipeline()` to initialize. This repo includes a **small dummy** `model.safetensors` + tokenizer files at the root for compatibility; the *real* production weights live in:
200
 
201
- - `multitask_intent_model_output/`
202
- - `iab_classifier_model_output/`
203
  - `artifacts/calibration/`
 
204
 
205
- ---
206
 
207
- ## Example Output
208
-
209
- ```json
210
- {
211
- "model_output": {
212
- "classification": {
213
- "iab_content": {
214
- "taxonomy": "IAB Content Taxonomy",
215
- "taxonomy_version": "3.0",
216
- "tier1": {"id": "552", "label": "Style & Fashion"},
217
- "tier2": {"id": "579", "label": "Men's Fashion"},
218
- "mapping_mode": "exact",
219
- "mapping_confidence": 0.73
220
- },
221
- "intent": {
222
- "type": "commercial",
223
- "subtype": "product_discovery",
224
- "decision_phase": "consideration",
225
- "confidence": 0.9549,
226
- "commercial_score": 0.656
227
- }
228
- }
229
- },
230
- "system_decision": {
231
- "policy": {
232
- "monetization_eligibility": "allowed_with_caution",
233
- "eligibility_reason": "commercial_discovery_signal_present"
234
- },
235
- "opportunity": {"type": "soft_recommendation", "strength": "medium"}
236
- },
237
- "meta": {
238
- "system_version": "0.6.0-phase4",
239
- "calibration_enabled": true,
240
- "iab_mapping_is_placeholder": false
241
- }
242
- }
243
- ```
244
-
245
- ## Reproducible Revision
246
 
247
- ```python
248
- from huggingface_hub import snapshot_download
249
- local_dir = snapshot_download(
250
- repo_id="admesh/agentic-intent-classifier",
251
- repo_type="model",
252
- revision="0584798f8efee6beccd778b0afa06782ab5add60",
253
- )
 
 
 
 
 
254
  ```
255
 
256
- ## Included Artifacts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- | Path | Contents |
259
- |---|---|
260
- | `multitask_intent_model_output/` | DistilBERT multitask weights + tokenizer |
261
- | `iab_classifier_model_output/` | IAB content classifier weights + tokenizer |
262
- | `artifacts/calibration/` | Per-head temperature + threshold JSONs |
263
- | `pipeline.py` | `AdmeshIntentPipeline` (transformers.Pipeline subclass) |
264
- | `combined_inference.py` | Core inference logic |
265
 
266
- ## Notes
267
 
268
- - `trust_remote_code=True` is required because this model uses a custom multi-head architecture that does not map to a single standard `AutoModel` checkpoint.
269
- - `meta.iab_mapping_is_placeholder: true` means IAB artifacts were missing or skipped; train and calibrate IAB for full production accuracy.
270
- - For long-running servers, instantiate once and reuse — models are cached in memory after the first call.
 
 
1
+ # Agentic Intent Classifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ `agentic-intent-classifier` is a multi-head query classification stack for conversational traffic.
4
 
5
+ ## Quickstart (recommended): run from Hugging Face Hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ This is the easiest way for developers to test the full production stack (multitask intent + IAB + calibration) without training locally.
8
 
9
  ```python
10
  from transformers import pipeline
 
12
  clf = pipeline(
13
  "admesh-intent",
14
  model="admesh/agentic-intent-classifier",
15
+ trust_remote_code=True,
16
  )
17
 
18
  out = clf("Which laptop should I buy for college?")
 
19
  print(out["model_output"]["classification"]["intent"])
20
+ print(out["model_output"]["classification"]["iab_content"])
21
+ print(out["meta"])
22
  ```
23
 
24
+ If you’re running in Colab/Kaggle and see Torch version conflicts, follow `COLAB_SETUP.md`.
25
+
26
+ ## Latency / inference timing (developer quick check)
27
 
28
+ The first call includes model/code loading; measure latency after a warm-up call.
29
 
30
+ Single query:
31
 
32
  ```python
33
  import time
34
+ from transformers import pipeline
35
+
36
+ clf = pipeline("admesh-intent", model="admesh/agentic-intent-classifier", trust_remote_code=True)
37
  q = "Which laptop should I buy for college?"
38
 
39
  _ = clf("warm up")
40
  t0 = time.perf_counter()
41
  out = clf(q)
42
+ dt_ms = (time.perf_counter() - t0) * 1000
43
+
44
+ print(f"latency_ms={dt_ms:.1f}")
45
+ print(out["model_output"]["classification"]["intent"])
46
  ```
47
 
48
+ Warm p50 / p95 over 20 runs:
49
 
50
  ```python
51
+ import time, statistics
52
 
53
+ times = []
54
+ for _ in range(20):
55
+ t0 = time.perf_counter()
56
+ _ = clf(q)
57
+ times.append((time.perf_counter() - t0) * 1000)
58
+
59
+ times_sorted = sorted(times)
60
+ print(f"p50={statistics.median(times):.1f}ms p95={times_sorted[int(0.95*len(times))-1]:.1f}ms mean={statistics.mean(times):.1f}ms")
61
+ ```
62
+
63
+ It currently produces:
64
+
65
+ - `intent.type`
66
+ - `intent.subtype`
67
+ - `intent.decision_phase`
68
+ - `iab_content`
69
+ - calibrated confidence per head
70
+ - combined fallback / policy / opportunity decisions
71
+
72
+ The repo is beyond the original v0.1 baseline. It now includes:
73
+
74
+ - shared config and label ownership
75
+ - reusable model runtime
76
+ - calibrated confidence and threshold gating
77
+ - combined inference with fallback/policy logic
78
+ - request/response validation in the demo API
79
+ - repeatable evaluation and regression suites
80
+ - full-TSV IAB taxonomy retrieval support through tier4
81
+ - a local embedding index for taxonomy-node retrieval over IAB content paths
82
+ - a separate synthetic full-intent-taxonomy augmentation dataset for non-IAB heads
83
+ - a dedicated intent-type difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
84
+ - a dedicated decision-phase difficulty dataset and held-out benchmark with `easy`, `medium`, and `hard` cases
85
+
86
+ Generated model weights are intentionally not committed.
87
+
88
+ ## Current Taxonomy
89
+
90
+ ### `intent.type`
91
+
92
+ - `informational`
93
+ - `exploratory`
94
+ - `commercial`
95
+ - `transactional`
96
+ - `support`
97
+ - `personal_reflection`
98
+ - `creative_generation`
99
+ - `chit_chat`
100
+ - `ambiguous`
101
+ - `prohibited`
102
+
103
+ ### `intent.decision_phase`
104
+
105
+ - `awareness`
106
+ - `research`
107
+ - `consideration`
108
+ - `decision`
109
+ - `action`
110
+ - `post_purchase`
111
+ - `support`
112
+
113
+ ### `intent.subtype`
114
+
115
+ - `education`
116
+ - `product_discovery`
117
+ - `comparison`
118
+ - `evaluation`
119
+ - `deal_seeking`
120
+ - `provider_selection`
121
+ - `signup`
122
+ - `purchase`
123
+ - `booking`
124
+ - `download`
125
+ - `contact_sales`
126
+ - `task_execution`
127
+ - `onboarding_setup`
128
+ - `troubleshooting`
129
+ - `account_help`
130
+ - `billing_help`
131
+ - `follow_up`
132
+ - `emotional_reflection`
133
+
134
+ ### `iab_content`
135
+
136
+ - candidates are derived from every row in [data/iab-content/Content Taxonomy 3.0.tsv](data/iab-content/Content%20Taxonomy%203.0.tsv)
137
+ - retrieval output supports `tier1`, `tier2`, `tier3`, and optional `tier4`
138
+
139
+ ## What The System Does
140
+
141
+ - runs three classifier heads:
142
+ - `intent_type`
143
+ - `intent_subtype`
144
+ - `decision_phase`
145
+ - resolves `iab_content` through a local embedding index over taxonomy nodes plus generic label/path reranking
146
+ - applies calibration artifacts when present
147
+ - computes `commercial_score`
148
+ - applies fallback when confidence is too weak or policy-safe blocking is required
149
+ - emits a schema-validated combined envelope
150
+
151
+ ## What The System Does Not Do
152
+
153
+ - it is not a multi-turn memory system
154
+ - it is not a production-optimized low-latency serving path
155
+ - it is not yet trained on large real-traffic human-labeled intent data
156
+ - combined decision logic is still heuristic, even though it is materially stronger than the original baseline
157
+
158
+ ## Project Layout
159
+
160
+ - [config.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/config.py): labels, thresholds, artifact paths, model paths
161
+ - [model_runtime.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/model_runtime.py): shared calibrated inference runtime
162
+ - [combined_inference.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/combined_inference.py): composed system response
163
+ - [inference_intent_type.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_intent_type.py): direct `intent_type` inference entrypoint
164
+ - [inference_iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/inference_iab_classifier.py): direct supervised `iab_content` inference entrypoint
165
+ - [schemas.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/schemas.py): request/response validation
166
+ - [demo_api.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/demo_api.py): local validated API
167
+ - [iab_taxonomy.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_taxonomy.py): full taxonomy parser/index
168
+ - [iab_classifier.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_classifier.py): supervised IAB runtime with taxonomy-aware parent fallback
169
+ - [iab_retrieval.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/iab_retrieval.py): optional shadow retrieval baseline
170
+ - [training/build_full_intent_taxonomy_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_full_intent_taxonomy_dataset.py): separate synthetic intent augmentation dataset
171
+ - [training/build_intent_type_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_intent_type_difficulty_dataset.py): extra `intent_type` augmentation plus held-out difficulty benchmark
172
+ - [training/build_decision_phase_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_decision_phase_difficulty_dataset.py): extra `decision_phase` augmentation plus held-out difficulty benchmark
173
+ - [training/build_subtype_difficulty_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_difficulty_dataset.py): extra `intent_subtype` augmentation plus held-out difficulty benchmark
174
+ - [training/build_subtype_dataset.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_subtype_dataset.py): subtype dataset generation from existing corpora
175
+ - [training/train_iab.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/train_iab.py): train the supervised IAB classifier head
176
+ - [training/build_iab_taxonomy_embeddings.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/build_iab_taxonomy_embeddings.py): build local IAB node embedding artifacts
177
+ - [training/run_full_training_pipeline.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/training/run_full_training_pipeline.py): full multi-head training/calibration/eval pipeline
178
+ - [evaluation/run_evaluation.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_evaluation.py): repeatable benchmark runner
179
+ - [evaluation/run_regression_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_regression_suite.py): known-failure regression runner
180
+ - [evaluation/run_iab_mapping_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_mapping_suite.py): IAB behavior-lock regression runner
181
+ - [evaluation/run_iab_quality_suite.py](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/evaluation/run_iab_quality_suite.py): curated IAB quality-target runner
182
+ - [known_limitations.md](/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/known_limitations.md): current gaps and caveats
183
+
184
+ ## Quickstart: Run From Hugging Face
185
+
186
+ Download the trained bundle and run inference in three lines — no local training required.
187
+
188
+ ```python
189
+ import sys
190
+ from huggingface_hub import snapshot_download
191
+
192
+ # Download the full bundle (models + calibration + code)
193
+ local_dir = snapshot_download(
194
+ repo_id="admesh/agentic-intent-classifier",
195
+ repo_type="model",
196
  )
197
+ sys.path.insert(0, local_dir)
198
+
199
+ # Import and instantiate
200
+ from pipeline import AdmeshIntentPipeline
201
+ clf = AdmeshIntentPipeline()
202
 
203
+ # Classify
204
+ import json
205
  result = clf("Which laptop should I buy for college?")
206
+ print(json.dumps(result, indent=2))
207
+ ```
208
+
209
+ Or use the one-liner factory method:
210
+
211
+ ```python
212
+ from pipeline import AdmeshIntentPipeline # after sys.path.insert above
213
+
214
+ clf = AdmeshIntentPipeline.from_pretrained("admesh/agentic-intent-classifier")
215
+ result = clf("I need a CRM for a 5-person startup")
216
  ```
217
 
218
+ Batch mode and custom thresholds are also supported:
219
 
220
  ```python
221
+ # Batch
222
  results = clf([
223
  "Best running shoes under $100",
224
+ "How does gradient descent work?",
225
  "Buy noise-cancelling headphones",
226
  ])
227
 
228
+ # Custom confidence thresholds
229
  result = clf(
230
+ "Buy noise-cancelling headphones",
231
  threshold_overrides={"intent_type": 0.6, "intent_subtype": 0.35},
232
  )
233
  ```
234
 
235
+ Verify artifacts and run a smoke test from the CLI:
236
+
237
+ ```bash
238
+ cd "<local_dir>"
239
+ python3 training/pipeline_verify.py
240
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
241
+ ```
242
+
243
+ Pin a specific revision for reproducibility:
244
+
245
+ ```python
246
+ local_dir = snapshot_download(
247
+ repo_id="admesh/agentic-intent-classifier",
248
+ repo_type="model",
249
+ revision="0584798f8efee6beccd778b0afa06782ab5add60",
250
+ )
251
+ ```
252
+
253
  ---
254
 
255
+ ## Setup (for local training)
256
 
257
+ ```bash
258
+ python3 -m venv .venv
259
+ source .venv/bin/activate
260
+ pip install -r agentic-intent-classifier/requirements.txt
261
+ ```
262
+
263
+ ## Inference (local training path)
264
 
265
+ Run one query locally:
266
 
267
  ```bash
268
+ cd agentic-intent-classifier
269
+ python3 training/train_iab.py
270
+ python3 training/calibrate_confidence.py --head iab_content
271
+ python3 combined_inference.py "Which CRM should I buy for a 3-person startup?"
272
  ```
273
 
274
+ Run only the `intent_type` head:
275
 
276
+ ```bash
277
+ cd agentic-intent-classifier
278
+ python3 inference_intent_type.py "best shoes under 100"
279
+ ```
280
 
281
+ Run the demo API:
 
 
 
282
 
283
+ ```bash
284
+ cd agentic-intent-classifier
285
+ python3 demo_api.py
286
+ ```
 
287
 
288
+ Example request:
 
289
 
290
+ ```bash
291
+ curl -sS -X POST http://127.0.0.1:8008/classify \
292
+ -H 'Content-Type: application/json' \
293
+ -d '{"text":"I cannot log into my account"}'
294
  ```
295
 
296
+ Infra endpoints:
297
 
298
+ ```bash
299
+ curl -sS http://127.0.0.1:8008/health
300
+ curl -sS http://127.0.0.1:8008/version
301
+ ```
302
 
303
+ Train only the IAB classifier head:
 
 
304
 
305
+ ```bash
306
+ cd agentic-intent-classifier
307
+ python3 training/train_iab.py
308
+ python3 training/calibrate_confidence.py --head iab_content
309
+ ```
310
 
311
+ The online `iab_content` path now uses the compact supervised classifier. Retrieval is still available as an optional shadow baseline.
312
+
313
+ Build the optional retrieval shadow index:
314
+
315
+ ```bash
316
+ cd agentic-intent-classifier
317
+ python3 training/build_iab_taxonomy_embeddings.py
318
  ```
319
 
320
+ By default the shadow retrieval path uses `Alibaba-NLP/gte-Qwen2-1.5B-instruct`. The retrieval runtime applies the model's query-side instruction format and last-token pooling, matching the Hugging Face usage guidance. If you want to point retrieval at a different embedding model, set `IAB_RETRIEVAL_MODEL_NAME_OVERRIDE` before building the index.
321
 
322
+ Open-source users can swap in their own embedding model, but the contract is:
323
+
324
+ - query embeddings and taxonomy-node embeddings must be produced by the same model and model revision
325
+ - after changing models, you must rebuild `artifacts/iab/taxonomy_embeddings.pt`
326
+ - the repository only tests and supports the default model path out of the box
327
+ - not every Hugging Face embedding model is drop-in compatible with this runtime; some require custom pooling, query instructions, or `trust_remote_code`
328
+
329
+ Example override:
330
+
331
+ ```bash
332
+ cd agentic-intent-classifier
333
+ export IAB_RETRIEVAL_MODEL_NAME_OVERRIDE=mixedbread-ai/mxbai-embed-large-v1
334
+ python3 training/build_iab_taxonomy_embeddings.py
335
  ```
336
 
337
+ This writes:
338
+
339
+ - `artifacts/iab/taxonomy_nodes.json`
340
+ - `artifacts/iab/taxonomy_embeddings.pt`
341
+
342
+ ## Training
343
+
344
+ ### Full local pipeline
345
+
346
+ ```bash
347
+ cd agentic-intent-classifier
348
+ python3 training/run_full_training_pipeline.py
349
+ ```
350
+
351
+ This pipeline now does:
352
+
353
+ 1. build separate full-intent-taxonomy augmentation data
354
+ 2. build separate `intent_type` difficulty augmentation + benchmark
355
+ 3. train `intent_type`
356
+ 4. build subtype corpus
357
+ 5. build separate `intent_subtype` difficulty augmentation + benchmark
358
+ 6. train `intent_subtype`
359
+ 7. build separate `decision_phase` difficulty augmentation + benchmark
360
+ 8. train `decision_phase`
361
+ 9. train `iab_content`
362
+ 10. calibrate all classifier heads, including `iab_content`
363
+ 11. run regression/evaluation unless `--skip-full-eval` is used
364
+
365
+ ### Build datasets individually
366
+
367
+ Separate full-intent augmentation:
368
+
369
+ ```bash
370
+ cd agentic-intent-classifier
371
+ python3 training/build_full_intent_taxonomy_dataset.py
372
+ ```
373
+
374
+ Intent-type difficulty augmentation and benchmark:
375
+
376
+ ```bash
377
+ cd agentic-intent-classifier
378
+ python3 training/build_intent_type_difficulty_dataset.py
379
+ ```
380
+
381
+ Decision-phase difficulty augmentation and benchmark:
382
+
383
+ ```bash
384
+ cd agentic-intent-classifier
385
+ python3 training/build_decision_phase_difficulty_dataset.py
386
+ ```
387
+
388
+ Subtype difficulty augmentation and benchmark:
389
+
390
+ ```bash
391
+ cd agentic-intent-classifier
392
+ python3 training/build_subtype_difficulty_dataset.py
393
+ ```
394
+
395
+ Subtype dataset:
396
+
397
+ ```bash
398
+ cd agentic-intent-classifier
399
+ python3 training/build_subtype_dataset.py
400
+ ```
401
+
402
+ IAB embedding index:
403
+
404
+ ```bash
405
+ cd agentic-intent-classifier
406
+ python3 training/build_iab_taxonomy_embeddings.py
407
+ ```
408
+
409
+ ### Train heads individually
410
+
411
+ ```bash
412
+ cd agentic-intent-classifier
413
+ python3 training/train.py
414
+ python3 training/train_subtype.py
415
+ python3 training/train_decision_phase.py
416
+ ```
417
+
418
+ ### Calibration
419
+
420
+ ```bash
421
+ cd agentic-intent-classifier
422
+ python3 training/calibrate_confidence.py --head intent_type
423
+ python3 training/calibrate_confidence.py --head intent_subtype
424
+ python3 training/calibrate_confidence.py --head decision_phase
425
+ ```
426
+
427
+ ## Evaluation
428
+
429
+ Full evaluation:
430
+
431
+ ```bash
432
+ cd agentic-intent-classifier
433
+ python3 evaluation/run_evaluation.py
434
+ ```
435
 
436
+ Known-failure regression:
437
 
438
+ ```bash
439
+ cd agentic-intent-classifier
440
+ python3 evaluation/run_regression_suite.py
441
+ ```
442
 
443
+ IAB behavior-lock regression:
444
+
445
+ ```bash
446
+ cd agentic-intent-classifier
447
+ python3 evaluation/run_iab_mapping_suite.py
448
+ ```
449
 
450
+ IAB quality-target evaluation:
 
 
 
 
451
 
452
+ ```bash
453
+ cd agentic-intent-classifier
454
+ python3 evaluation/run_iab_quality_suite.py
455
+ ```
456
+
457
+ Threshold sweeps:
458
+
459
+ ```bash
460
+ cd agentic-intent-classifier
461
+ python3 evaluation/sweep_intent_threshold.py
462
+ ```
463
 
464
+ Artifacts are written to:
465
 
 
 
466
  - `artifacts/calibration/`
467
+ - `artifacts/evaluation/latest/`
468
 
469
+ ## Google Colab
470
 
471
+ Use Colab for the full retraining pass if local memory is limited.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
+ Clone once:
474
+
475
+ ```bash
476
+ %cd /content
477
+ !git clone https://github.com/GouniManikumar12/agentic-intent-classifier.git
478
+ %cd /content/agentic-intent-classifier
479
+ ```
480
+
481
+ If the repo is already cloned and you want the latest code, pull manually:
482
+
483
+ ```bash
484
+ !git pull origin main
485
  ```
486
 
487
+ Full pipeline:
488
+
489
+ ```bash
490
+ !python training/run_full_training_pipeline.py
491
+ ```
492
+
493
+ If full evaluation is too heavy for the current Colab runtime:
494
+
495
+ ```bash
496
+ !python training/run_full_training_pipeline.py \
497
+ --iab-embedding-batch-size 32 \
498
+ --skip-full-eval
499
+ ```
500
+
501
+ Then run eval separately after training:
502
+
503
+ ```bash
504
+ !python evaluation/run_regression_suite.py
505
+ !python evaluation/run_iab_mapping_suite.py
506
+ !python evaluation/run_iab_quality_suite.py
507
+ !python evaluation/run_evaluation.py
508
+ ```
509
+
510
+ ## Current Saved Metrics
511
+
512
+ Generate fresh metrics with:
513
+
514
+ ```bash
515
+ cd agentic-intent-classifier
516
+ python3 evaluation/run_evaluation.py
517
+ ```
518
+
519
+ Do not treat any checked-in summary as canonical unless it was regenerated after the current code and artifacts were built. The IAB path is now retrieval-based, so older saved reports from the deleted hierarchy stack are not meaningful.
520
+
521
+ ## Latency Note
522
+
523
+ `combined_inference.py` is a debugging/offline path, not a production latency path.
524
+
525
+ Current production truth:
526
+
527
+ - per-request CLI execution is not a sub-50ms architecture
528
+ - production serving should use a long-lived API process with preloaded models
529
+ - if sub-50ms becomes a hard requirement, the serving path will need:
530
+ - persistent loaded models
531
+ - runtime optimization
532
+ - likely fewer model passes or a shared multi-head model
533
+
534
+ ## Current Status
535
+
536
+ Current repo status:
537
 
538
+ - full 10-class `intent.type` taxonomy is wired
539
+ - subtype and phase heads are present
540
+ - difficulty benchmarks are wired for `intent_type`, `intent_subtype`, and `decision_phase`
541
+ - full-TSV IAB taxonomy retrieval is wired through tier4
542
+ - separate full-intent augmentation dataset is in place
543
+ - evaluation/runtime memory handling is improved for large IAB splits
 
544
 
545
+ The main remaining gap is not basic infrastructure anymore. It is improving real-world robustness, especially for:
546
 
547
+ - `decision_phase`
548
+ - `intent_subtype`
549
+ - confidence quality on borderline commercial queries
550
+ - real-traffic supervision beyond synthetic data
artifacts/calibration/decision_phase.json CHANGED
@@ -1,31 +1,31 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
- "generated_at": "2026-03-25T20:15:26.091588+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
- "calibrated_accuracy": 0.8621,
8
- "calibrated_expected_calibration_error": 0.1012,
9
- "calibrated_negative_log_likelihood": 0.4275,
10
- "mean_calibrated_confidence": 0.8431,
11
- "mean_raw_confidence": 0.8274,
12
- "raw_accuracy": 0.8621,
13
- "raw_expected_calibration_error": 0.0985,
14
- "raw_negative_log_likelihood": 0.4338
15
  },
16
  "minimum_threshold_floor": 0.22,
17
- "optimized_temperature_candidate": 0.940009,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.8621,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 0.940009,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.8621,
29
  "coverage": 1.0,
30
  "threshold": 0.22
31
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.22,
4
+ "generated_at": "2026-03-25T21:20:02.753657+00:00",
5
  "head": "decision_phase",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.8276,
8
+ "calibrated_expected_calibration_error": 0.0811,
9
+ "calibrated_negative_log_likelihood": 0.5525,
10
+ "mean_calibrated_confidence": 0.8739,
11
+ "mean_raw_confidence": 0.8828,
12
+ "raw_accuracy": 0.8276,
13
+ "raw_expected_calibration_error": 0.0757,
14
+ "raw_negative_log_likelihood": 0.5551
15
  },
16
  "minimum_threshold_floor": 0.22,
17
+ "optimized_temperature_candidate": 1.032831,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.8276,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 1.032831,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.8276,
29
  "coverage": 1.0,
30
  "threshold": 0.22
31
  }
artifacts/calibration/iab_content.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
- "generated_at": "2026-03-25T20:39:18.586053+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
- "calibrated_accuracy": 0.9485,
8
- "calibrated_expected_calibration_error": 0.2692,
9
- "calibrated_negative_log_likelihood": 0.5281,
10
- "mean_calibrated_confidence": 0.6793,
11
- "mean_raw_confidence": 0.1987,
12
- "raw_accuracy": 0.9485,
13
- "raw_expected_calibration_error": 0.7498,
14
- "raw_negative_log_likelihood": 1.7931
15
  },
16
  "minimum_threshold_floor": 0.12,
17
- "optimized_temperature_candidate": 0.573651,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.9485,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
- "temperature": 0.573651,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.9553,
29
- "coverage": 0.9875,
30
  "threshold": 0.12
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.12,
4
+ "generated_at": "2026-03-25T21:21:46.770447+00:00",
5
  "head": "iab_content",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.9321,
8
+ "calibrated_expected_calibration_error": 0.2607,
9
+ "calibrated_negative_log_likelihood": 0.5642,
10
+ "mean_calibrated_confidence": 0.6714,
11
+ "mean_raw_confidence": 0.1481,
12
+ "raw_accuracy": 0.9321,
13
+ "raw_expected_calibration_error": 0.7839,
14
+ "raw_negative_log_likelihood": 2.103
15
  },
16
  "minimum_threshold_floor": 0.12,
17
+ "optimized_temperature_candidate": 0.502066,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.9321,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.7,
25
+ "temperature": 0.502066,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.9472,
29
+ "coverage": 0.975,
30
  "threshold": 0.12
31
  }
32
  }
artifacts/calibration/intent_subtype.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
- "generated_at": "2026-03-25T20:15:16.332284+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
  "calibrated_accuracy": 0.8625,
8
- "calibrated_expected_calibration_error": 0.0827,
9
- "calibrated_negative_log_likelihood": 0.3943,
10
- "mean_calibrated_confidence": 0.8131,
11
- "mean_raw_confidence": 0.7338,
12
  "raw_accuracy": 0.8625,
13
- "raw_expected_calibration_error": 0.152,
14
- "raw_negative_log_likelihood": 0.4841
15
  },
16
  "minimum_threshold_floor": 0.25,
17
- "optimized_temperature_candidate": 0.789295,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.8625,
20
  "coverage": 1.0,
@@ -22,7 +22,7 @@
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
- "temperature": 0.789295,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.8625,
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.25,
4
+ "generated_at": "2026-03-25T21:19:52.208134+00:00",
5
  "head": "intent_subtype",
6
  "metrics": {
7
  "calibrated_accuracy": 0.8625,
8
+ "calibrated_expected_calibration_error": 0.0584,
9
+ "calibrated_negative_log_likelihood": 0.4288,
10
+ "mean_calibrated_confidence": 0.8389,
11
+ "mean_raw_confidence": 0.7787,
12
  "raw_accuracy": 0.8625,
13
+ "raw_expected_calibration_error": 0.0904,
14
+ "raw_negative_log_likelihood": 0.4795
15
  },
16
  "minimum_threshold_floor": 0.25,
17
+ "optimized_temperature_candidate": 0.861152,
18
  "selected_threshold_before_floor": {
19
  "accepted_accuracy": 0.8625,
20
  "coverage": 1.0,
 
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.75,
25
+ "temperature": 0.861152,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
  "accepted_accuracy": 0.8625,
artifacts/calibration/intent_type.json CHANGED
@@ -1,32 +1,32 @@
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
- "generated_at": "2026-03-25T20:15:05.272668+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
- "calibrated_accuracy": 0.8936,
8
- "calibrated_expected_calibration_error": 0.0915,
9
- "calibrated_negative_log_likelihood": 0.2696,
10
- "mean_calibrated_confidence": 0.9148,
11
- "mean_raw_confidence": 0.8914,
12
- "raw_accuracy": 0.8936,
13
- "raw_expected_calibration_error": 0.0842,
14
- "raw_negative_log_likelihood": 0.2831
15
  },
16
  "minimum_threshold_floor": 0.4,
17
- "optimized_temperature_candidate": 0.918544,
18
  "selected_threshold_before_floor": {
19
- "accepted_accuracy": 0.8936,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
- "temperature": 0.918544,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
- "accepted_accuracy": 0.8936,
29
- "coverage": 1.0,
30
  "threshold": 0.4
31
  }
32
  }
 
1
  {
2
  "calibrated": true,
3
  "confidence_threshold": 0.4,
4
+ "generated_at": "2026-03-25T21:19:41.302013+00:00",
5
  "head": "intent_type",
6
  "metrics": {
7
+ "calibrated_accuracy": 0.8723,
8
+ "calibrated_expected_calibration_error": 0.0798,
9
+ "calibrated_negative_log_likelihood": 0.2692,
10
+ "mean_calibrated_confidence": 0.8962,
11
+ "mean_raw_confidence": 0.8671,
12
+ "raw_accuracy": 0.8723,
13
+ "raw_expected_calibration_error": 0.1073,
14
+ "raw_negative_log_likelihood": 0.2907
15
  },
16
  "minimum_threshold_floor": 0.4,
17
+ "optimized_temperature_candidate": 0.889496,
18
  "selected_threshold_before_floor": {
19
+ "accepted_accuracy": 0.8723,
20
  "coverage": 1.0,
21
  "threshold": 0.0
22
  },
23
  "selection_split": "val",
24
  "selection_target_precision": 0.8,
25
+ "temperature": 0.889496,
26
  "temperature_scaling_applied": true,
27
  "threshold_summary": {
28
+ "accepted_accuracy": 0.8913,
29
+ "coverage": 0.9787,
30
  "threshold": 0.4
31
  }
32
  }
artifacts/evaluation/latest/combined_demo_benchmark.json CHANGED
@@ -11,21 +11,13 @@
11
  "model_output": {
12
  "classification": {
13
  "iab_content": {
14
- "mapping_confidence": 0.5429,
15
  "mapping_mode": "exact",
16
  "taxonomy": "IAB Content Taxonomy",
17
  "taxonomy_version": "3.0",
18
  "tier1": {
19
  "id": "596",
20
  "label": "Technology & Computing"
21
- },
22
- "tier2": {
23
- "id": "599",
24
- "label": "Computing"
25
- },
26
- "tier3": {
27
- "id": "602",
28
- "label": "Software and Applications"
29
  }
30
  },
31
  "intent": {
@@ -33,31 +25,31 @@
33
  "component_confidence": {
34
  "decision_phase": {
35
  "calibrated": true,
36
- "confidence": 0.962,
37
  "confidence_threshold": 0.22,
38
  "label": "awareness",
39
  "meets_threshold": true,
40
- "raw_confidence": 0.9633
41
  },
42
  "intent_subtype": {
43
  "calibrated": true,
44
- "confidence": 0.9805,
45
  "confidence_threshold": 0.25,
46
  "label": "education",
47
  "meets_threshold": true,
48
- "raw_confidence": 0.9549
49
  },
50
  "intent_type": {
51
  "calibrated": true,
52
- "confidence": 0.9817,
53
  "confidence_threshold": 0.4,
54
  "label": "informational",
55
  "meets_threshold": true,
56
- "raw_confidence": 0.9658
57
  },
58
  "overall_strategy": "min_required_component_confidence"
59
  },
60
- "confidence": 0.962,
61
  "decision_phase": "awareness",
62
  "subtype": "education",
63
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -102,7 +94,7 @@
102
  "model_output": {
103
  "classification": {
104
  "iab_content": {
105
- "mapping_confidence": 0.4784,
106
  "mapping_mode": "exact",
107
  "taxonomy": "IAB Content Taxonomy",
108
  "taxonomy_version": "3.0",
@@ -116,31 +108,31 @@
116
  "component_confidence": {
117
  "decision_phase": {
118
  "calibrated": true,
119
- "confidence": 0.9277,
120
  "confidence_threshold": 0.22,
121
  "label": "awareness",
122
  "meets_threshold": true,
123
- "raw_confidence": 0.9297
124
  },
125
  "intent_subtype": {
126
  "calibrated": true,
127
- "confidence": 0.9749,
128
  "confidence_threshold": 0.25,
129
  "label": "education",
130
  "meets_threshold": true,
131
- "raw_confidence": 0.9445
132
  },
133
  "intent_type": {
134
  "calibrated": true,
135
- "confidence": 0.9797,
136
  "confidence_threshold": 0.4,
137
  "label": "informational",
138
  "meets_threshold": true,
139
- "raw_confidence": 0.9626
140
  },
141
  "overall_strategy": "min_required_component_confidence"
142
  },
143
- "confidence": 0.9277,
144
  "decision_phase": "awareness",
145
  "subtype": "education",
146
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
@@ -185,17 +177,13 @@
185
  "model_output": {
186
  "classification": {
187
  "iab_content": {
188
- "mapping_confidence": 0.2179,
189
- "mapping_mode": "exact",
190
  "taxonomy": "IAB Content Taxonomy",
191
  "taxonomy_version": "3.0",
192
  "tier1": {
193
  "id": "483",
194
  "label": "Sports"
195
- },
196
- "tier2": {
197
- "id": "496",
198
- "label": "Equine Sports"
199
  }
200
  },
201
  "intent": {
@@ -203,31 +191,31 @@
203
  "component_confidence": {
204
  "decision_phase": {
205
  "calibrated": true,
206
- "confidence": 0.9444,
207
  "confidence_threshold": 0.22,
208
  "label": "consideration",
209
  "meets_threshold": true,
210
- "raw_confidence": 0.9461
211
  },
212
  "intent_subtype": {
213
  "calibrated": true,
214
- "confidence": 0.4804,
215
  "confidence_threshold": 0.25,
216
  "label": "comparison",
217
  "meets_threshold": true,
218
- "raw_confidence": 0.4327
219
  },
220
  "intent_type": {
221
  "calibrated": true,
222
- "confidence": 0.981,
223
  "confidence_threshold": 0.4,
224
  "label": "commercial",
225
  "meets_threshold": true,
226
- "raw_confidence": 0.9653
227
  },
228
  "overall_strategy": "min_required_component_confidence"
229
  },
230
- "confidence": 0.4804,
231
  "decision_phase": "consideration",
232
  "subtype": "comparison",
233
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -272,17 +260,13 @@
272
  "model_output": {
273
  "classification": {
274
  "iab_content": {
275
- "mapping_confidence": 0.3122,
276
- "mapping_mode": "exact",
277
  "taxonomy": "IAB Content Taxonomy",
278
  "taxonomy_version": "3.0",
279
  "tier1": {
280
  "id": "596",
281
  "label": "Technology & Computing"
282
- },
283
- "tier2": {
284
- "id": "638",
285
- "label": "Robotics"
286
  }
287
  },
288
  "intent": {
@@ -290,31 +274,31 @@
290
  "component_confidence": {
291
  "decision_phase": {
292
  "calibrated": true,
293
- "confidence": 0.8858,
294
  "confidence_threshold": 0.22,
295
  "label": "consideration",
296
  "meets_threshold": true,
297
- "raw_confidence": 0.8885
298
  },
299
  "intent_subtype": {
300
  "calibrated": true,
301
- "confidence": 0.9538,
302
  "confidence_threshold": 0.25,
303
  "label": "comparison",
304
  "meets_threshold": true,
305
- "raw_confidence": 0.9083
306
  },
307
  "intent_type": {
308
  "calibrated": true,
309
- "confidence": 0.9676,
310
  "confidence_threshold": 0.4,
311
  "label": "commercial",
312
  "meets_threshold": true,
313
- "raw_confidence": 0.9435
314
  },
315
  "overall_strategy": "min_required_component_confidence"
316
  },
317
- "confidence": 0.8858,
318
  "decision_phase": "consideration",
319
  "subtype": "comparison",
320
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -359,13 +343,21 @@
359
  "model_output": {
360
  "classification": {
361
  "iab_content": {
362
- "mapping_confidence": 0.5309,
363
  "mapping_mode": "exact",
364
  "taxonomy": "IAB Content Taxonomy",
365
  "taxonomy_version": "3.0",
366
  "tier1": {
367
- "id": "596",
368
- "label": "Technology & Computing"
 
 
 
 
 
 
 
 
369
  }
370
  },
371
  "intent": {
@@ -373,31 +365,31 @@
373
  "component_confidence": {
374
  "decision_phase": {
375
  "calibrated": true,
376
- "confidence": 0.6077,
377
  "confidence_threshold": 0.22,
378
  "label": "decision",
379
  "meets_threshold": true,
380
- "raw_confidence": 0.6097
381
  },
382
  "intent_subtype": {
383
  "calibrated": true,
384
- "confidence": 0.7801,
385
  "confidence_threshold": 0.25,
386
  "label": "provider_selection",
387
  "meets_threshold": true,
388
- "raw_confidence": 0.6968
389
  },
390
  "intent_type": {
391
  "calibrated": true,
392
- "confidence": 0.9843,
393
  "confidence_threshold": 0.4,
394
  "label": "commercial",
395
  "meets_threshold": true,
396
- "raw_confidence": 0.9703
397
  },
398
  "overall_strategy": "min_required_component_confidence"
399
  },
400
- "confidence": 0.6077,
401
  "decision_phase": "decision",
402
  "subtype": "provider_selection",
403
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
@@ -442,17 +434,13 @@
442
  "model_output": {
443
  "classification": {
444
  "iab_content": {
445
- "mapping_confidence": 0.2299,
446
- "mapping_mode": "exact",
447
  "taxonomy": "IAB Content Taxonomy",
448
  "taxonomy_version": "3.0",
449
  "tier1": {
450
- "id": "v9i3On",
451
- "label": "Sensitive Topics"
452
- },
453
- "tier2": {
454
- "id": "XtODT3",
455
- "label": "Crime & Harmful Acts to Individuals, Society & Human Right Violations"
456
  }
457
  },
458
  "intent": {
@@ -460,31 +448,31 @@
460
  "component_confidence": {
461
  "decision_phase": {
462
  "calibrated": true,
463
- "confidence": 0.9662,
464
  "confidence_threshold": 0.22,
465
  "label": "action",
466
  "meets_threshold": true,
467
- "raw_confidence": 0.9674
468
  },
469
  "intent_subtype": {
470
  "calibrated": true,
471
- "confidence": 0.9473,
472
  "confidence_threshold": 0.25,
473
  "label": "signup",
474
  "meets_threshold": true,
475
- "raw_confidence": 0.8993
476
  },
477
  "intent_type": {
478
  "calibrated": true,
479
- "confidence": 0.9788,
480
  "confidence_threshold": 0.4,
481
  "label": "transactional",
482
  "meets_threshold": true,
483
- "raw_confidence": 0.9614
484
  },
485
  "overall_strategy": "min_required_component_confidence"
486
  },
487
- "confidence": 0.9473,
488
  "decision_phase": "action",
489
  "subtype": "signup",
490
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -529,7 +517,7 @@
529
  "model_output": {
530
  "classification": {
531
  "iab_content": {
532
- "mapping_confidence": 0.8304,
533
  "mapping_mode": "exact",
534
  "taxonomy": "IAB Content Taxonomy",
535
  "taxonomy_version": "3.0",
@@ -547,31 +535,31 @@
547
  "component_confidence": {
548
  "decision_phase": {
549
  "calibrated": true,
550
- "confidence": 0.9595,
551
  "confidence_threshold": 0.22,
552
  "label": "action",
553
  "meets_threshold": true,
554
- "raw_confidence": 0.9608
555
  },
556
  "intent_subtype": {
557
  "calibrated": true,
558
- "confidence": 0.8434,
559
  "confidence_threshold": 0.25,
560
  "label": "booking",
561
  "meets_threshold": true,
562
- "raw_confidence": 0.7616
563
  },
564
  "intent_type": {
565
  "calibrated": true,
566
- "confidence": 0.9805,
567
  "confidence_threshold": 0.4,
568
  "label": "transactional",
569
  "meets_threshold": true,
570
- "raw_confidence": 0.9649
571
  },
572
  "overall_strategy": "min_required_component_confidence"
573
  },
574
- "confidence": 0.8434,
575
  "decision_phase": "action",
576
  "subtype": "booking",
577
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
@@ -616,8 +604,8 @@
616
  "model_output": {
617
  "classification": {
618
  "iab_content": {
619
- "mapping_confidence": 0.5261,
620
- "mapping_mode": "exact",
621
  "taxonomy": "IAB Content Taxonomy",
622
  "taxonomy_version": "3.0",
623
  "tier1": {
@@ -630,31 +618,31 @@
630
  "component_confidence": {
631
  "decision_phase": {
632
  "calibrated": true,
633
- "confidence": 0.9573,
634
  "confidence_threshold": 0.22,
635
  "label": "post_purchase",
636
  "meets_threshold": true,
637
- "raw_confidence": 0.9587
638
  },
639
  "intent_subtype": {
640
  "calibrated": true,
641
- "confidence": 0.967,
642
  "confidence_threshold": 0.25,
643
  "label": "onboarding_setup",
644
  "meets_threshold": true,
645
- "raw_confidence": 0.9306
646
  },
647
  "intent_type": {
648
  "calibrated": true,
649
- "confidence": 0.5834,
650
  "confidence_threshold": 0.4,
651
  "label": "transactional",
652
  "meets_threshold": true,
653
- "raw_confidence": 0.5253
654
  },
655
  "overall_strategy": "min_required_component_confidence"
656
  },
657
- "confidence": 0.5834,
658
  "decision_phase": "post_purchase",
659
  "subtype": "onboarding_setup",
660
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
@@ -699,21 +687,13 @@
699
  "model_output": {
700
  "classification": {
701
  "iab_content": {
702
- "mapping_confidence": 0.272,
703
- "mapping_mode": "exact",
704
  "taxonomy": "IAB Content Taxonomy",
705
  "taxonomy_version": "3.0",
706
  "tier1": {
707
- "id": "52",
708
- "label": "Business and Finance"
709
- },
710
- "tier2": {
711
- "id": "53",
712
- "label": "Business"
713
- },
714
- "tier3": {
715
- "id": "72",
716
- "label": "Business I.T."
717
  }
718
  },
719
  "intent": {
@@ -721,31 +701,31 @@
721
  "component_confidence": {
722
  "decision_phase": {
723
  "calibrated": true,
724
- "confidence": 0.9589,
725
  "confidence_threshold": 0.22,
726
  "label": "support",
727
  "meets_threshold": true,
728
- "raw_confidence": 0.9603
729
  },
730
  "intent_subtype": {
731
  "calibrated": true,
732
- "confidence": 0.8859,
733
  "confidence_threshold": 0.25,
734
  "label": "account_help",
735
  "meets_threshold": true,
736
- "raw_confidence": 0.8147
737
  },
738
  "intent_type": {
739
  "calibrated": true,
740
- "confidence": 0.9699,
741
  "confidence_threshold": 0.4,
742
  "label": "support",
743
  "meets_threshold": true,
744
- "raw_confidence": 0.9476
745
  },
746
  "overall_strategy": "min_required_component_confidence"
747
  },
748
- "confidence": 0.8859,
749
  "decision_phase": "support",
750
  "subtype": "account_help",
751
  "summary": "Classified as support intent with subtype account_help in the support phase.",
@@ -796,8 +776,8 @@
796
  "model_output": {
797
  "classification": {
798
  "iab_content": {
799
- "mapping_confidence": 0.7892,
800
- "mapping_mode": "exact",
801
  "taxonomy": "IAB Content Taxonomy",
802
  "taxonomy_version": "3.0",
803
  "tier1": {
@@ -810,31 +790,31 @@
810
  "component_confidence": {
811
  "decision_phase": {
812
  "calibrated": true,
813
- "confidence": 0.9219,
814
  "confidence_threshold": 0.22,
815
  "label": "awareness",
816
  "meets_threshold": true,
817
- "raw_confidence": 0.9239
818
  },
819
  "intent_subtype": {
820
  "calibrated": true,
821
- "confidence": 0.9492,
822
  "confidence_threshold": 0.25,
823
  "label": "emotional_reflection",
824
  "meets_threshold": true,
825
- "raw_confidence": 0.9021
826
  },
827
  "intent_type": {
828
  "calibrated": true,
829
- "confidence": 0.9388,
830
  "confidence_threshold": 0.4,
831
  "label": "personal_reflection",
832
  "meets_threshold": true,
833
- "raw_confidence": 0.9059
834
  },
835
  "overall_strategy": "min_required_component_confidence"
836
  },
837
- "confidence": 0.9219,
838
  "decision_phase": "awareness",
839
  "subtype": "emotional_reflection",
840
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
@@ -885,7 +865,7 @@
885
  "model_output": {
886
  "classification": {
887
  "iab_content": {
888
- "mapping_confidence": 0.2238,
889
  "mapping_mode": "exact",
890
  "taxonomy": "IAB Content Taxonomy",
891
  "taxonomy_version": "3.0",
@@ -899,31 +879,31 @@
899
  "component_confidence": {
900
  "decision_phase": {
901
  "calibrated": true,
902
- "confidence": 0.8763,
903
  "confidence_threshold": 0.22,
904
  "label": "research",
905
  "meets_threshold": true,
906
- "raw_confidence": 0.8791
907
  },
908
  "intent_subtype": {
909
  "calibrated": true,
910
- "confidence": 0.9683,
911
  "confidence_threshold": 0.25,
912
  "label": "follow_up",
913
  "meets_threshold": true,
914
- "raw_confidence": 0.9314
915
  },
916
  "intent_type": {
917
  "calibrated": true,
918
- "confidence": 0.9623,
919
  "confidence_threshold": 0.4,
920
  "label": "ambiguous",
921
  "meets_threshold": true,
922
- "raw_confidence": 0.9367
923
  },
924
  "overall_strategy": "min_required_component_confidence"
925
  },
926
- "confidence": 0.8763,
927
  "decision_phase": "research",
928
  "subtype": "follow_up",
929
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -974,17 +954,13 @@
974
  "model_output": {
975
  "classification": {
976
  "iab_content": {
977
- "mapping_confidence": 0.2371,
978
  "mapping_mode": "exact",
979
  "taxonomy": "IAB Content Taxonomy",
980
  "taxonomy_version": "3.0",
981
  "tier1": {
982
  "id": "391",
983
  "label": "Personal Finance"
984
- },
985
- "tier2": {
986
- "id": "396",
987
- "label": "Financial Planning"
988
  }
989
  },
990
  "intent": {
@@ -992,31 +968,31 @@
992
  "component_confidence": {
993
  "decision_phase": {
994
  "calibrated": true,
995
- "confidence": 0.9225,
996
  "confidence_threshold": 0.22,
997
  "label": "research",
998
  "meets_threshold": true,
999
- "raw_confidence": 0.9246
1000
  },
1001
  "intent_subtype": {
1002
  "calibrated": true,
1003
- "confidence": 0.9586,
1004
  "confidence_threshold": 0.25,
1005
  "label": "follow_up",
1006
  "meets_threshold": true,
1007
- "raw_confidence": 0.9146
1008
  },
1009
  "intent_type": {
1010
  "calibrated": true,
1011
- "confidence": 0.9488,
1012
  "confidence_threshold": 0.4,
1013
  "label": "ambiguous",
1014
  "meets_threshold": true,
1015
- "raw_confidence": 0.9179
1016
  },
1017
  "overall_strategy": "min_required_component_confidence"
1018
  },
1019
- "confidence": 0.9225,
1020
  "decision_phase": "research",
1021
  "subtype": "follow_up",
1022
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
@@ -1067,13 +1043,13 @@
1067
  "model_output": {
1068
  "classification": {
1069
  "iab_content": {
1070
- "mapping_confidence": 0.2131,
1071
  "mapping_mode": "nearest_equivalent",
1072
  "taxonomy": "IAB Content Taxonomy",
1073
  "taxonomy_version": "3.0",
1074
  "tier1": {
1075
- "id": "42",
1076
- "label": "Books and Literature"
1077
  }
1078
  },
1079
  "intent": {
@@ -1081,31 +1057,31 @@
1081
  "component_confidence": {
1082
  "decision_phase": {
1083
  "calibrated": true,
1084
- "confidence": 0.9861,
1085
  "confidence_threshold": 0.22,
1086
  "label": "action",
1087
  "meets_threshold": true,
1088
- "raw_confidence": 0.9867
1089
  },
1090
  "intent_subtype": {
1091
  "calibrated": true,
1092
- "confidence": 0.7335,
1093
  "confidence_threshold": 0.25,
1094
  "label": "signup",
1095
  "meets_threshold": true,
1096
- "raw_confidence": 0.6454
1097
  },
1098
  "intent_type": {
1099
  "calibrated": true,
1100
- "confidence": 0.9628,
1101
  "confidence_threshold": 0.4,
1102
  "label": "transactional",
1103
  "meets_threshold": true,
1104
- "raw_confidence": 0.938
1105
  },
1106
  "overall_strategy": "min_required_component_confidence"
1107
  },
1108
- "confidence": 0.7335,
1109
  "decision_phase": "action",
1110
  "subtype": "signup",
1111
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
@@ -1150,17 +1126,17 @@
1150
  "model_output": {
1151
  "classification": {
1152
  "iab_content": {
1153
- "mapping_confidence": 0.3327,
1154
  "mapping_mode": "exact",
1155
  "taxonomy": "IAB Content Taxonomy",
1156
  "taxonomy_version": "3.0",
1157
  "tier1": {
1158
- "id": "596",
1159
- "label": "Technology & Computing"
1160
  },
1161
  "tier2": {
1162
- "id": "639",
1163
- "label": "Virtual Reality"
1164
  }
1165
  },
1166
  "intent": {
@@ -1168,31 +1144,31 @@
1168
  "component_confidence": {
1169
  "decision_phase": {
1170
  "calibrated": true,
1171
- "confidence": 0.9295,
1172
  "confidence_threshold": 0.22,
1173
  "label": "consideration",
1174
  "meets_threshold": true,
1175
- "raw_confidence": 0.9315
1176
  },
1177
  "intent_subtype": {
1178
  "calibrated": true,
1179
- "confidence": 0.9374,
1180
  "confidence_threshold": 0.25,
1181
  "label": "comparison",
1182
  "meets_threshold": true,
1183
- "raw_confidence": 0.8838
1184
  },
1185
  "intent_type": {
1186
  "calibrated": true,
1187
- "confidence": 0.9602,
1188
  "confidence_threshold": 0.4,
1189
  "label": "commercial",
1190
  "meets_threshold": true,
1191
- "raw_confidence": 0.9329
1192
  },
1193
  "overall_strategy": "min_required_component_confidence"
1194
  },
1195
- "confidence": 0.9295,
1196
  "decision_phase": "consideration",
1197
  "subtype": "comparison",
1198
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
@@ -1237,7 +1213,7 @@
1237
  "model_output": {
1238
  "classification": {
1239
  "iab_content": {
1240
- "mapping_confidence": 0.3227,
1241
  "mapping_mode": "exact",
1242
  "taxonomy": "IAB Content Taxonomy",
1243
  "taxonomy_version": "3.0",
@@ -1251,31 +1227,31 @@
1251
  "component_confidence": {
1252
  "decision_phase": {
1253
  "calibrated": true,
1254
- "confidence": 0.9535,
1255
  "confidence_threshold": 0.22,
1256
  "label": "awareness",
1257
  "meets_threshold": true,
1258
- "raw_confidence": 0.955
1259
  },
1260
  "intent_subtype": {
1261
  "calibrated": true,
1262
- "confidence": 0.9793,
1263
  "confidence_threshold": 0.25,
1264
  "label": "education",
1265
  "meets_threshold": true,
1266
- "raw_confidence": 0.9527
1267
  },
1268
  "intent_type": {
1269
  "calibrated": true,
1270
- "confidence": 0.9769,
1271
  "confidence_threshold": 0.4,
1272
  "label": "informational",
1273
  "meets_threshold": true,
1274
- "raw_confidence": 0.9584
1275
  },
1276
  "overall_strategy": "min_required_component_confidence"
1277
  },
1278
- "confidence": 0.9535,
1279
  "decision_phase": "awareness",
1280
  "subtype": "education",
1281
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
11
  "model_output": {
12
  "classification": {
13
  "iab_content": {
14
+ "mapping_confidence": 0.3078,
15
  "mapping_mode": "exact",
16
  "taxonomy": "IAB Content Taxonomy",
17
  "taxonomy_version": "3.0",
18
  "tier1": {
19
  "id": "596",
20
  "label": "Technology & Computing"
 
 
 
 
 
 
 
 
21
  }
22
  },
23
  "intent": {
 
25
  "component_confidence": {
26
  "decision_phase": {
27
  "calibrated": true,
28
+ "confidence": 0.9548,
29
  "confidence_threshold": 0.22,
30
  "label": "awareness",
31
  "meets_threshold": true,
32
+ "raw_confidence": 0.9611
33
  },
34
  "intent_subtype": {
35
  "calibrated": true,
36
+ "confidence": 0.9731,
37
  "confidence_threshold": 0.25,
38
  "label": "education",
39
  "meets_threshold": true,
40
+ "raw_confidence": 0.9378
41
  },
42
  "intent_type": {
43
  "calibrated": true,
44
+ "confidence": 0.9816,
45
  "confidence_threshold": 0.4,
46
  "label": "informational",
47
  "meets_threshold": true,
48
+ "raw_confidence": 0.9644
49
  },
50
  "overall_strategy": "min_required_component_confidence"
51
  },
52
+ "confidence": 0.9548,
53
  "decision_phase": "awareness",
54
  "subtype": "education",
55
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
94
  "model_output": {
95
  "classification": {
96
  "iab_content": {
97
+ "mapping_confidence": 0.2281,
98
  "mapping_mode": "exact",
99
  "taxonomy": "IAB Content Taxonomy",
100
  "taxonomy_version": "3.0",
 
108
  "component_confidence": {
109
  "decision_phase": {
110
  "calibrated": true,
111
+ "confidence": 0.9159,
112
  "confidence_threshold": 0.22,
113
  "label": "awareness",
114
  "meets_threshold": true,
115
+ "raw_confidence": 0.9256
116
  },
117
  "intent_subtype": {
118
  "calibrated": true,
119
+ "confidence": 0.9671,
120
  "confidence_threshold": 0.25,
121
  "label": "education",
122
  "meets_threshold": true,
123
+ "raw_confidence": 0.9273
124
  },
125
  "intent_type": {
126
  "calibrated": true,
127
+ "confidence": 0.9771,
128
  "confidence_threshold": 0.4,
129
  "label": "informational",
130
  "meets_threshold": true,
131
+ "raw_confidence": 0.957
132
  },
133
  "overall_strategy": "min_required_component_confidence"
134
  },
135
+ "confidence": 0.9159,
136
  "decision_phase": "awareness",
137
  "subtype": "education",
138
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
 
177
  "model_output": {
178
  "classification": {
179
  "iab_content": {
180
+ "mapping_confidence": 0.6271,
181
+ "mapping_mode": "nearest_equivalent",
182
  "taxonomy": "IAB Content Taxonomy",
183
  "taxonomy_version": "3.0",
184
  "tier1": {
185
  "id": "483",
186
  "label": "Sports"
 
 
 
 
187
  }
188
  },
189
  "intent": {
 
191
  "component_confidence": {
192
  "decision_phase": {
193
  "calibrated": true,
194
+ "confidence": 0.9469,
195
  "confidence_threshold": 0.22,
196
  "label": "consideration",
197
  "meets_threshold": true,
198
+ "raw_confidence": 0.954
199
  },
200
  "intent_subtype": {
201
  "calibrated": true,
202
+ "confidence": 0.4849,
203
  "confidence_threshold": 0.25,
204
  "label": "comparison",
205
  "meets_threshold": true,
206
+ "raw_confidence": 0.4322
207
  },
208
  "intent_type": {
209
  "calibrated": true,
210
+ "confidence": 0.9863,
211
  "confidence_threshold": 0.4,
212
  "label": "commercial",
213
  "meets_threshold": true,
214
+ "raw_confidence": 0.9724
215
  },
216
  "overall_strategy": "min_required_component_confidence"
217
  },
218
+ "confidence": 0.4849,
219
  "decision_phase": "consideration",
220
  "subtype": "comparison",
221
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
260
  "model_output": {
261
  "classification": {
262
  "iab_content": {
263
+ "mapping_confidence": 0.1648,
264
+ "mapping_mode": "nearest_equivalent",
265
  "taxonomy": "IAB Content Taxonomy",
266
  "taxonomy_version": "3.0",
267
  "tier1": {
268
  "id": "596",
269
  "label": "Technology & Computing"
 
 
 
 
270
  }
271
  },
272
  "intent": {
 
274
  "component_confidence": {
275
  "decision_phase": {
276
  "calibrated": true,
277
+ "confidence": 0.9303,
278
  "confidence_threshold": 0.22,
279
  "label": "consideration",
280
  "meets_threshold": true,
281
+ "raw_confidence": 0.9389
282
  },
283
  "intent_subtype": {
284
  "calibrated": true,
285
+ "confidence": 0.9598,
286
  "confidence_threshold": 0.25,
287
  "label": "comparison",
288
  "meets_threshold": true,
289
+ "raw_confidence": 0.9157
290
  },
291
  "intent_type": {
292
  "calibrated": true,
293
+ "confidence": 0.9746,
294
  "confidence_threshold": 0.4,
295
  "label": "commercial",
296
  "meets_threshold": true,
297
+ "raw_confidence": 0.953
298
  },
299
  "overall_strategy": "min_required_component_confidence"
300
  },
301
+ "confidence": 0.9303,
302
  "decision_phase": "consideration",
303
  "subtype": "comparison",
304
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
343
  "model_output": {
344
  "classification": {
345
  "iab_content": {
346
+ "mapping_confidence": 0.1701,
347
  "mapping_mode": "exact",
348
  "taxonomy": "IAB Content Taxonomy",
349
  "taxonomy_version": "3.0",
350
  "tier1": {
351
+ "id": "52",
352
+ "label": "Business and Finance"
353
+ },
354
+ "tier2": {
355
+ "id": "53",
356
+ "label": "Business"
357
+ },
358
+ "tier3": {
359
+ "id": "61",
360
+ "label": "Startups"
361
  }
362
  },
363
  "intent": {
 
365
  "component_confidence": {
366
  "decision_phase": {
367
  "calibrated": true,
368
+ "confidence": 0.6389,
369
  "confidence_threshold": 0.22,
370
  "label": "decision",
371
  "meets_threshold": true,
372
+ "raw_confidence": 0.6498
373
  },
374
  "intent_subtype": {
375
  "calibrated": true,
376
+ "confidence": 0.7851,
377
  "confidence_threshold": 0.25,
378
  "label": "provider_selection",
379
  "meets_threshold": true,
380
+ "raw_confidence": 0.6921
381
  },
382
  "intent_type": {
383
  "calibrated": true,
384
+ "confidence": 0.9784,
385
  "confidence_threshold": 0.4,
386
  "label": "commercial",
387
  "meets_threshold": true,
388
+ "raw_confidence": 0.9591
389
  },
390
  "overall_strategy": "min_required_component_confidence"
391
  },
392
+ "confidence": 0.6389,
393
  "decision_phase": "decision",
394
  "subtype": "provider_selection",
395
  "summary": "Classified as commercial intent with subtype provider_selection in the decision phase.",
 
434
  "model_output": {
435
  "classification": {
436
  "iab_content": {
437
+ "mapping_confidence": 0.24,
438
+ "mapping_mode": "nearest_equivalent",
439
  "taxonomy": "IAB Content Taxonomy",
440
  "taxonomy_version": "3.0",
441
  "tier1": {
442
+ "id": "483",
443
+ "label": "Sports"
 
 
 
 
444
  }
445
  },
446
  "intent": {
 
448
  "component_confidence": {
449
  "decision_phase": {
450
  "calibrated": true,
451
+ "confidence": 0.937,
452
  "confidence_threshold": 0.22,
453
  "label": "action",
454
  "meets_threshold": true,
455
+ "raw_confidence": 0.9451
456
  },
457
  "intent_subtype": {
458
  "calibrated": true,
459
+ "confidence": 0.9242,
460
  "confidence_threshold": 0.25,
461
  "label": "signup",
462
  "meets_threshold": true,
463
+ "raw_confidence": 0.8636
464
  },
465
  "intent_type": {
466
  "calibrated": true,
467
+ "confidence": 0.9809,
468
  "confidence_threshold": 0.4,
469
  "label": "transactional",
470
  "meets_threshold": true,
471
+ "raw_confidence": 0.9633
472
  },
473
  "overall_strategy": "min_required_component_confidence"
474
  },
475
+ "confidence": 0.9242,
476
  "decision_phase": "action",
477
  "subtype": "signup",
478
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
517
  "model_output": {
518
  "classification": {
519
  "iab_content": {
520
+ "mapping_confidence": 0.4786,
521
  "mapping_mode": "exact",
522
  "taxonomy": "IAB Content Taxonomy",
523
  "taxonomy_version": "3.0",
 
535
  "component_confidence": {
536
  "decision_phase": {
537
  "calibrated": true,
538
+ "confidence": 0.9436,
539
  "confidence_threshold": 0.22,
540
  "label": "action",
541
  "meets_threshold": true,
542
+ "raw_confidence": 0.951
543
  },
544
  "intent_subtype": {
545
  "calibrated": true,
546
+ "confidence": 0.8891,
547
  "confidence_threshold": 0.25,
548
  "label": "booking",
549
  "meets_threshold": true,
550
+ "raw_confidence": 0.8107
551
  },
552
  "intent_type": {
553
  "calibrated": true,
554
+ "confidence": 0.9715,
555
  "confidence_threshold": 0.4,
556
  "label": "transactional",
557
  "meets_threshold": true,
558
+ "raw_confidence": 0.9481
559
  },
560
  "overall_strategy": "min_required_component_confidence"
561
  },
562
+ "confidence": 0.8891,
563
  "decision_phase": "action",
564
  "subtype": "booking",
565
  "summary": "Classified as transactional intent with subtype booking in the action phase.",
 
604
  "model_output": {
605
  "classification": {
606
  "iab_content": {
607
+ "mapping_confidence": 0.3826,
608
+ "mapping_mode": "nearest_equivalent",
609
  "taxonomy": "IAB Content Taxonomy",
610
  "taxonomy_version": "3.0",
611
  "tier1": {
 
618
  "component_confidence": {
619
  "decision_phase": {
620
  "calibrated": true,
621
+ "confidence": 0.9613,
622
  "confidence_threshold": 0.22,
623
  "label": "post_purchase",
624
  "meets_threshold": true,
625
+ "raw_confidence": 0.9669
626
  },
627
  "intent_subtype": {
628
  "calibrated": true,
629
+ "confidence": 0.965,
630
  "confidence_threshold": 0.25,
631
  "label": "onboarding_setup",
632
  "meets_threshold": true,
633
+ "raw_confidence": 0.9235
634
  },
635
  "intent_type": {
636
  "calibrated": true,
637
+ "confidence": 0.5393,
638
  "confidence_threshold": 0.4,
639
  "label": "transactional",
640
  "meets_threshold": true,
641
+ "raw_confidence": 0.4772
642
  },
643
  "overall_strategy": "min_required_component_confidence"
644
  },
645
+ "confidence": 0.5393,
646
  "decision_phase": "post_purchase",
647
  "subtype": "onboarding_setup",
648
  "summary": "Classified as transactional intent with subtype onboarding_setup in the post_purchase phase.",
 
687
  "model_output": {
688
  "classification": {
689
  "iab_content": {
690
+ "mapping_confidence": 0.3628,
691
+ "mapping_mode": "nearest_equivalent",
692
  "taxonomy": "IAB Content Taxonomy",
693
  "taxonomy_version": "3.0",
694
  "tier1": {
695
+ "id": "391",
696
+ "label": "Personal Finance"
 
 
 
 
 
 
 
 
697
  }
698
  },
699
  "intent": {
 
701
  "component_confidence": {
702
  "decision_phase": {
703
  "calibrated": true,
704
+ "confidence": 0.9481,
705
  "confidence_threshold": 0.22,
706
  "label": "support",
707
  "meets_threshold": true,
708
+ "raw_confidence": 0.9551
709
  },
710
  "intent_subtype": {
711
  "calibrated": true,
712
+ "confidence": 0.934,
713
  "confidence_threshold": 0.25,
714
  "label": "account_help",
715
  "meets_threshold": true,
716
+ "raw_confidence": 0.8749
717
  },
718
  "intent_type": {
719
  "calibrated": true,
720
+ "confidence": 0.9542,
721
  "confidence_threshold": 0.4,
722
  "label": "support",
723
  "meets_threshold": true,
724
+ "raw_confidence": 0.9232
725
  },
726
  "overall_strategy": "min_required_component_confidence"
727
  },
728
+ "confidence": 0.934,
729
  "decision_phase": "support",
730
  "subtype": "account_help",
731
  "summary": "Classified as support intent with subtype account_help in the support phase.",
 
776
  "model_output": {
777
  "classification": {
778
  "iab_content": {
779
+ "mapping_confidence": 0.3231,
780
+ "mapping_mode": "nearest_equivalent",
781
  "taxonomy": "IAB Content Taxonomy",
782
  "taxonomy_version": "3.0",
783
  "tier1": {
 
790
  "component_confidence": {
791
  "decision_phase": {
792
  "calibrated": true,
793
+ "confidence": 0.8468,
794
  "confidence_threshold": 0.22,
795
  "label": "awareness",
796
  "meets_threshold": true,
797
+ "raw_confidence": 0.8606
798
  },
799
  "intent_subtype": {
800
  "calibrated": true,
801
+ "confidence": 0.9639,
802
  "confidence_threshold": 0.25,
803
  "label": "emotional_reflection",
804
  "meets_threshold": true,
805
+ "raw_confidence": 0.9211
806
  },
807
  "intent_type": {
808
  "calibrated": true,
809
+ "confidence": 0.9627,
810
  "confidence_threshold": 0.4,
811
  "label": "personal_reflection",
812
  "meets_threshold": true,
813
+ "raw_confidence": 0.9348
814
  },
815
  "overall_strategy": "min_required_component_confidence"
816
  },
817
+ "confidence": 0.8468,
818
  "decision_phase": "awareness",
819
  "subtype": "emotional_reflection",
820
  "summary": "Classified as personal_reflection intent with subtype emotional_reflection in the awareness phase.",
 
865
  "model_output": {
866
  "classification": {
867
  "iab_content": {
868
+ "mapping_confidence": 0.3327,
869
  "mapping_mode": "exact",
870
  "taxonomy": "IAB Content Taxonomy",
871
  "taxonomy_version": "3.0",
 
879
  "component_confidence": {
880
  "decision_phase": {
881
  "calibrated": true,
882
+ "confidence": 0.8651,
883
  "confidence_threshold": 0.22,
884
  "label": "research",
885
  "meets_threshold": true,
886
+ "raw_confidence": 0.8781
887
  },
888
  "intent_subtype": {
889
  "calibrated": true,
890
+ "confidence": 0.9652,
891
  "confidence_threshold": 0.25,
892
  "label": "follow_up",
893
  "meets_threshold": true,
894
+ "raw_confidence": 0.9229
895
  },
896
  "intent_type": {
897
  "calibrated": true,
898
+ "confidence": 0.9746,
899
  "confidence_threshold": 0.4,
900
  "label": "ambiguous",
901
  "meets_threshold": true,
902
+ "raw_confidence": 0.9541
903
  },
904
  "overall_strategy": "min_required_component_confidence"
905
  },
906
+ "confidence": 0.8651,
907
  "decision_phase": "research",
908
  "subtype": "follow_up",
909
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
954
  "model_output": {
955
  "classification": {
956
  "iab_content": {
957
+ "mapping_confidence": 0.1481,
958
  "mapping_mode": "exact",
959
  "taxonomy": "IAB Content Taxonomy",
960
  "taxonomy_version": "3.0",
961
  "tier1": {
962
  "id": "391",
963
  "label": "Personal Finance"
 
 
 
 
964
  }
965
  },
966
  "intent": {
 
968
  "component_confidence": {
969
  "decision_phase": {
970
  "calibrated": true,
971
+ "confidence": 0.9177,
972
  "confidence_threshold": 0.22,
973
  "label": "research",
974
  "meets_threshold": true,
975
+ "raw_confidence": 0.9273
976
  },
977
  "intent_subtype": {
978
  "calibrated": true,
979
+ "confidence": 0.9506,
980
  "confidence_threshold": 0.25,
981
  "label": "follow_up",
982
  "meets_threshold": true,
983
+ "raw_confidence": 0.8983
984
  },
985
  "intent_type": {
986
  "calibrated": true,
987
+ "confidence": 0.9628,
988
  "confidence_threshold": 0.4,
989
  "label": "ambiguous",
990
  "meets_threshold": true,
991
+ "raw_confidence": 0.9356
992
  },
993
  "overall_strategy": "min_required_component_confidence"
994
  },
995
+ "confidence": 0.9177,
996
  "decision_phase": "research",
997
  "subtype": "follow_up",
998
  "summary": "Classified as ambiguous intent with subtype follow_up in the research phase.",
 
1043
  "model_output": {
1044
  "classification": {
1045
  "iab_content": {
1046
+ "mapping_confidence": 0.0729,
1047
  "mapping_mode": "nearest_equivalent",
1048
  "taxonomy": "IAB Content Taxonomy",
1049
  "taxonomy_version": "3.0",
1050
  "tier1": {
1051
+ "id": "123",
1052
+ "label": "Careers"
1053
  }
1054
  },
1055
  "intent": {
 
1057
  "component_confidence": {
1058
  "decision_phase": {
1059
  "calibrated": true,
1060
+ "confidence": 0.9739,
1061
  "confidence_threshold": 0.22,
1062
  "label": "action",
1063
  "meets_threshold": true,
1064
+ "raw_confidence": 0.9781
1065
  },
1066
  "intent_subtype": {
1067
  "calibrated": true,
1068
+ "confidence": 0.7259,
1069
  "confidence_threshold": 0.25,
1070
  "label": "signup",
1071
  "meets_threshold": true,
1072
+ "raw_confidence": 0.6331
1073
  },
1074
  "intent_type": {
1075
  "calibrated": true,
1076
+ "confidence": 0.9763,
1077
  "confidence_threshold": 0.4,
1078
  "label": "transactional",
1079
  "meets_threshold": true,
1080
+ "raw_confidence": 0.9557
1081
  },
1082
  "overall_strategy": "min_required_component_confidence"
1083
  },
1084
+ "confidence": 0.7259,
1085
  "decision_phase": "action",
1086
  "subtype": "signup",
1087
  "summary": "Classified as transactional intent with subtype signup in the action phase.",
 
1126
  "model_output": {
1127
  "classification": {
1128
  "iab_content": {
1129
+ "mapping_confidence": 0.1383,
1130
  "mapping_mode": "exact",
1131
  "taxonomy": "IAB Content Taxonomy",
1132
  "taxonomy_version": "3.0",
1133
  "tier1": {
1134
+ "id": "123",
1135
+ "label": "Careers"
1136
  },
1137
  "tier2": {
1138
+ "id": "127",
1139
+ "label": "Job Search"
1140
  }
1141
  },
1142
  "intent": {
 
1144
  "component_confidence": {
1145
  "decision_phase": {
1146
  "calibrated": true,
1147
+ "confidence": 0.9578,
1148
  "confidence_threshold": 0.22,
1149
  "label": "consideration",
1150
  "meets_threshold": true,
1151
+ "raw_confidence": 0.9639
1152
  },
1153
  "intent_subtype": {
1154
  "calibrated": true,
1155
+ "confidence": 0.9095,
1156
  "confidence_threshold": 0.25,
1157
  "label": "comparison",
1158
  "meets_threshold": true,
1159
+ "raw_confidence": 0.8429
1160
  },
1161
  "intent_type": {
1162
  "calibrated": true,
1163
+ "confidence": 0.9747,
1164
  "confidence_threshold": 0.4,
1165
  "label": "commercial",
1166
  "meets_threshold": true,
1167
+ "raw_confidence": 0.953
1168
  },
1169
  "overall_strategy": "min_required_component_confidence"
1170
  },
1171
+ "confidence": 0.9095,
1172
  "decision_phase": "consideration",
1173
  "subtype": "comparison",
1174
  "summary": "Classified as commercial intent with subtype comparison in the consideration phase.",
 
1213
  "model_output": {
1214
  "classification": {
1215
  "iab_content": {
1216
+ "mapping_confidence": 0.1608,
1217
  "mapping_mode": "exact",
1218
  "taxonomy": "IAB Content Taxonomy",
1219
  "taxonomy_version": "3.0",
 
1227
  "component_confidence": {
1228
  "decision_phase": {
1229
  "calibrated": true,
1230
+ "confidence": 0.9436,
1231
  "confidence_threshold": 0.22,
1232
  "label": "awareness",
1233
  "meets_threshold": true,
1234
+ "raw_confidence": 0.951
1235
  },
1236
  "intent_subtype": {
1237
  "calibrated": true,
1238
+ "confidence": 0.9692,
1239
  "confidence_threshold": 0.25,
1240
  "label": "education",
1241
  "meets_threshold": true,
1242
+ "raw_confidence": 0.931
1243
  },
1244
  "intent_type": {
1245
  "calibrated": true,
1246
+ "confidence": 0.9775,
1247
  "confidence_threshold": 0.4,
1248
  "label": "informational",
1249
  "meets_threshold": true,
1250
+ "raw_confidence": 0.9578
1251
  },
1252
  "overall_strategy": "min_required_component_confidence"
1253
  },
1254
+ "confidence": 0.9436,
1255
  "decision_phase": "awareness",
1256
  "subtype": "education",
1257
  "summary": "Classified as informational intent with subtype education in the awareness phase.",
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,14,1,0,0,0,0,0
3
  research,0,15,0,0,0,0,0
4
- consideration,0,2,13,0,0,0,0
5
  decision,0,1,0,14,0,0,0
6
- action,0,0,0,1,14,0,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,14,1,0,0,0,0,0
3
  research,0,15,0,0,0,0,0
4
+ consideration,0,1,14,0,0,0,0
5
  decision,0,1,0,14,0,0,0
6
+ action,0,1,0,0,14,0,0
7
  post_purchase,0,0,0,0,0,15,0
8
  support,0,0,0,0,0,0,15
artifacts/evaluation/latest/decision_phase_difficulty_benchmark_report.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "accepted_accuracy": 0.9524,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9524,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
@@ -15,12 +15,12 @@
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.8857,
19
  "accepted_coverage": 1.0,
20
- "accuracy": 0.8857,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
- "macro_f1": 0.8908
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
@@ -33,9 +33,9 @@
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
- "macro_f1": 0.9536,
37
  "per_class_metrics": {
38
- "accuracy": 0.9523809523809523,
39
  "action": {
40
  "f1-score": 0.9655172413793104,
41
  "precision": 1.0,
@@ -49,21 +49,21 @@
49
  "support": 15.0
50
  },
51
  "consideration": {
52
- "f1-score": 0.9285714285714286,
53
  "precision": 1.0,
54
- "recall": 0.8666666666666667,
55
  "support": 15.0
56
  },
57
  "decision": {
58
- "f1-score": 0.9333333333333333,
59
- "precision": 0.9333333333333333,
60
  "recall": 0.9333333333333333,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
- "f1-score": 0.9536131694056934,
65
- "precision": 0.9604010025062657,
66
- "recall": 0.9523809523809524,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
@@ -85,9 +85,9 @@
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
- "f1-score": 0.9536131694056934,
89
- "precision": 0.9604010025062656,
90
- "recall": 0.9523809523809523,
91
  "support": 105.0
92
  }
93
  },
 
1
  {
2
+ "accepted_accuracy": 0.9619,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9619,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 105,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase_benchmark.jsonl",
 
15
  "macro_f1": 0.9711
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.9143,
19
  "accepted_coverage": 1.0,
20
+ "accuracy": 0.9143,
21
  "count": 35,
22
  "fallback_rate": 0.0,
23
+ "macro_f1": 0.9194
24
  },
25
  "medium": {
26
  "accepted_accuracy": 1.0,
 
33
  },
34
  "fallback_rate": 0.0,
35
  "head": "decision_phase",
36
+ "macro_f1": 0.9635,
37
  "per_class_metrics": {
38
+ "accuracy": 0.9619047619047619,
39
  "action": {
40
  "f1-score": 0.9655172413793104,
41
  "precision": 1.0,
 
49
  "support": 15.0
50
  },
51
  "consideration": {
52
+ "f1-score": 0.9655172413793104,
53
  "precision": 1.0,
54
+ "recall": 0.9333333333333333,
55
  "support": 15.0
56
  },
57
  "decision": {
58
+ "f1-score": 0.9655172413793104,
59
+ "precision": 1.0,
60
  "recall": 0.9333333333333333,
61
  "support": 15.0
62
  },
63
  "macro avg": {
64
+ "f1-score": 0.9634888438133874,
65
+ "precision": 0.9699248120300752,
66
+ "recall": 0.9619047619047619,
67
  "support": 105.0
68
  },
69
  "post_purchase": {
 
85
  "support": 15.0
86
  },
87
  "weighted avg": {
88
+ "f1-score": 0.9634888438133875,
89
+ "precision": 0.9699248120300752,
90
+ "recall": 0.9619047619047619,
91
  "support": 105.0
92
  }
93
  },
artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv CHANGED
@@ -1,7 +1,7 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
- research,1,3,0,0,0,0,0
4
- consideration,0,2,3,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,4,0
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
+ research,3,1,0,0,0,0,0
4
+ consideration,0,1,4,0,0,0,0
5
  decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,0,0,0
7
  post_purchase,0,0,0,0,0,4,0
artifacts/evaluation/latest/decision_phase_final_wave_cases_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.8889,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8889,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv",
6
  "count": 27,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8876,
11
  "per_class_metrics": {
12
- "accuracy": 0.8888888888888888,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
@@ -17,15 +17,15 @@
17
  "support": 0.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.9090909090909091,
21
- "precision": 0.8333333333333334,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.75,
27
  "precision": 1.0,
28
- "recall": 0.6,
29
  "support": 5.0
30
  },
31
  "decision": {
@@ -35,9 +35,9 @@
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.7608225108225108,
39
- "precision": 0.7761904761904762,
40
- "recall": 0.7642857142857142,
41
  "support": 27.0
42
  },
43
  "post_purchase": {
@@ -47,9 +47,9 @@
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.6666666666666666,
51
- "precision": 0.6,
52
- "recall": 0.75,
53
  "support": 4.0
54
  },
55
  "support": {
@@ -59,9 +59,9 @@
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8874859708193041,
63
- "precision": 0.9098765432098765,
64
- "recall": 0.8888888888888888,
65
  "support": 27.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.8519,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8519,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_final_wave_cases_confusion_matrix.csv",
6
  "count": 27,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/final_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.8319,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8518518518518519,
13
  "action": {
14
  "f1-score": 0.0,
15
  "precision": 0.0,
 
17
  "support": 0.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.7692307692307693,
21
+ "precision": 0.625,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.8888888888888888,
27
  "precision": 1.0,
28
+ "recall": 0.8,
29
  "support": 5.0
30
  },
31
  "decision": {
 
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.7130647130647131,
39
+ "precision": 0.7321428571428571,
40
+ "recall": 0.7214285714285714,
41
  "support": 27.0
42
  },
43
  "post_purchase": {
 
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.3333333333333333,
51
+ "precision": 0.5,
52
+ "recall": 0.25,
53
  "support": 4.0
54
  },
55
  "support": {
 
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.8379233934789491,
63
+ "precision": 0.8564814814814815,
64
+ "recall": 0.8518518518518519,
65
  "support": 27.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_test_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,3,0,0,0,0,0,0
3
- research,3,2,0,0,0,0,0
4
  consideration,0,2,3,0,0,0,0
5
- decision,0,0,0,4,0,1,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
  support,0,0,0,0,0,1,3
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,3,0,0,0,0,0,0
3
+ research,4,1,0,0,0,0,0
4
  consideration,0,2,3,0,0,0,0
5
+ decision,0,0,0,5,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,0,0,0,0,4,0
8
  support,0,0,0,0,0,1,3
artifacts/evaluation/latest/decision_phase_test_report.json CHANGED
@@ -7,7 +7,7 @@
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.7724,
11
  "per_class_metrics": {
12
  "accuracy": 0.7586206896551724,
13
  "action": {
@@ -17,8 +17,8 @@
17
  "support": 3.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.6666666666666666,
21
- "precision": 0.5,
22
  "recall": 1.0,
23
  "support": 3.0
24
  },
@@ -29,27 +29,27 @@
29
  "support": 5.0
30
  },
31
  "decision": {
32
- "f1-score": 0.8888888888888888,
33
  "precision": 1.0,
34
- "recall": 0.8,
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.7724489795918367,
39
- "precision": 0.8095238095238095,
40
  "recall": 0.7928571428571428,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 0.8,
45
- "precision": 0.6666666666666666,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.4444444444444444,
51
- "precision": 0.5,
52
- "recall": 0.4,
53
  "support": 5.0
54
  },
55
  "support": {
@@ -59,8 +59,8 @@
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.7601806239737274,
63
- "precision": 0.8160919540229885,
64
  "recall": 0.7586206896551724,
65
  "support": 29.0
66
  }
 
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.7637,
11
  "per_class_metrics": {
12
  "accuracy": 0.7586206896551724,
13
  "action": {
 
17
  "support": 3.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.6,
21
+ "precision": 0.42857142857142855,
22
  "recall": 1.0,
23
  "support": 3.0
24
  },
 
29
  "support": 5.0
30
  },
31
  "decision": {
32
+ "f1-score": 1.0,
33
  "precision": 1.0,
34
+ "recall": 1.0,
35
  "support": 5.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.763718820861678,
39
+ "precision": 0.7945578231292517,
40
  "recall": 0.7928571428571428,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 0.8888888888888888,
45
+ "precision": 0.8,
46
  "recall": 1.0,
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.25,
51
+ "precision": 0.3333333333333333,
52
+ "recall": 0.2,
53
  "support": 5.0
54
  },
55
  "support": {
 
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.7511767925561028,
63
+ "precision": 0.7983579638752052,
64
  "recall": 0.7586206896551724,
65
  "support": 29.0
66
  }
artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
  research,1,14,0,0,0,0,0
4
- consideration,0,4,13,0,0,0,0
5
- decision,0,0,1,15,0,0,0
6
  action,0,0,0,0,10,0,0
7
  post_purchase,0,0,0,0,0,14,0
8
  support,0,0,0,0,0,0,14
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,16,0,0,0,0,0,0
3
  research,1,14,0,0,0,0,0
4
+ consideration,0,2,15,0,0,0,0
5
+ decision,0,0,0,16,0,0,0
6
  action,0,0,0,0,10,0,0
7
  post_purchase,0,0,0,0,0,14,0
8
  support,0,0,0,0,0,0,14
artifacts/evaluation/latest/decision_phase_train_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9412,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9412,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.9464,
11
  "per_class_metrics": {
12
- "accuracy": 0.9411764705882353,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
@@ -23,21 +23,21 @@
23
  "support": 16.0
24
  },
25
  "consideration": {
26
- "f1-score": 0.8387096774193549,
27
- "precision": 0.9285714285714286,
28
- "recall": 0.7647058823529411,
29
  "support": 17.0
30
  },
31
  "decision": {
32
- "f1-score": 0.967741935483871,
33
  "precision": 1.0,
34
- "recall": 0.9375,
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.9463762044407206,
39
- "precision": 0.9496465252767774,
40
- "recall": 0.9479341736694679,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
@@ -47,8 +47,8 @@
47
  "support": 14.0
48
  },
49
  "research": {
50
- "f1-score": 0.8484848484848485,
51
- "precision": 0.7777777777777778,
52
  "recall": 0.9333333333333333,
53
  "support": 15.0
54
  },
@@ -59,9 +59,9 @@
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.9410231345715216,
63
- "precision": 0.946188279233262,
64
- "recall": 0.9411764705882353,
65
  "support": 102.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.9706,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.9706,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_train_confusion_matrix.csv",
6
  "count": 102,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/train.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.9729,
11
  "per_class_metrics": {
12
+ "accuracy": 0.9705882352941176,
13
  "action": {
14
  "f1-score": 1.0,
15
  "precision": 1.0,
 
23
  "support": 16.0
24
  },
25
  "consideration": {
26
+ "f1-score": 0.9375,
27
+ "precision": 1.0,
28
+ "recall": 0.8823529411764706,
29
  "support": 17.0
30
  },
31
  "decision": {
32
+ "f1-score": 1.0,
33
  "precision": 1.0,
34
+ "recall": 1.0,
35
  "support": 16.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.9729175394497975,
39
+ "precision": 0.9737394957983193,
40
+ "recall": 0.9736694677871148,
41
  "support": 102.0
42
  },
43
  "post_purchase": {
 
47
  "support": 14.0
48
  },
49
  "research": {
50
+ "f1-score": 0.9032258064516129,
51
+ "precision": 0.875,
52
  "recall": 0.9333333333333333,
53
  "support": 15.0
54
  },
 
59
  "support": 14.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.9705984177639775,
63
+ "precision": 0.9723904267589389,
64
+ "recall": 0.9705882352941176,
65
  "support": 102.0
66
  }
67
  },
artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv CHANGED
@@ -1,8 +1,8 @@
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
- research,1,3,0,0,0,0,0
4
  consideration,0,0,5,0,0,0,0
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,1,0,0,0,3,0
8
- support,0,0,0,0,1,0,3
 
1
  ,awareness,research,consideration,decision,action,post_purchase,support
2
  awareness,5,0,0,0,0,0,0
3
+ research,2,2,0,0,0,0,0
4
  consideration,0,0,5,0,0,0,0
5
  decision,0,0,1,3,0,0,0
6
  action,0,0,0,0,3,0,0
7
  post_purchase,0,1,0,0,0,3,0
8
+ support,0,0,0,0,0,1,3
artifacts/evaluation/latest/decision_phase_val_report.json CHANGED
@@ -1,24 +1,24 @@
1
  {
2
- "accepted_accuracy": 0.8621,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8621,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv",
6
  "count": 29,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
- "macro_f1": 0.8567,
11
  "per_class_metrics": {
12
- "accuracy": 0.8620689655172413,
13
  "action": {
14
- "f1-score": 0.8571428571428571,
15
- "precision": 0.75,
16
  "recall": 1.0,
17
  "support": 3.0
18
  },
19
  "awareness": {
20
- "f1-score": 0.9090909090909091,
21
- "precision": 0.8333333333333334,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
@@ -35,21 +35,21 @@
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
- "f1-score": 0.8566790352504637,
39
- "precision": 0.880952380952381,
40
- "recall": 0.8571428571428571,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
- "f1-score": 0.8571428571428571,
45
- "precision": 1.0,
46
  "recall": 0.75,
47
  "support": 4.0
48
  },
49
  "research": {
50
- "f1-score": 0.75,
51
- "precision": 0.75,
52
- "recall": 0.75,
53
  "support": 4.0
54
  },
55
  "support": {
@@ -59,9 +59,9 @@
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
- "f1-score": 0.8602776533811015,
63
- "precision": 0.8821839080459771,
64
- "recall": 0.8620689655172413,
65
  "support": 29.0
66
  }
67
  },
 
1
  {
2
+ "accepted_accuracy": 0.8276,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8276,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/decision_phase_val_confusion_matrix.csv",
6
  "count": 29,
7
  "dataset_path": "/content/agentic-intent-classifier/data/decision_phase/val.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "decision_phase",
10
+ "macro_f1": 0.8254,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8275862068965517,
13
  "action": {
14
+ "f1-score": 1.0,
15
+ "precision": 1.0,
16
  "recall": 1.0,
17
  "support": 3.0
18
  },
19
  "awareness": {
20
+ "f1-score": 0.8333333333333334,
21
+ "precision": 0.7142857142857143,
22
  "recall": 1.0,
23
  "support": 5.0
24
  },
 
35
  "support": 4.0
36
  },
37
  "macro avg": {
38
+ "f1-score": 0.8254483611626469,
39
+ "precision": 0.8520408163265306,
40
+ "recall": 0.8214285714285714,
41
  "support": 29.0
42
  },
43
  "post_purchase": {
44
+ "f1-score": 0.75,
45
+ "precision": 0.75,
46
  "recall": 0.75,
47
  "support": 4.0
48
  },
49
  "research": {
50
+ "f1-score": 0.5714285714285714,
51
+ "precision": 0.6666666666666666,
52
+ "recall": 0.5,
53
  "support": 4.0
54
  },
55
  "support": {
 
59
  "support": 4.0
60
  },
61
  "weighted avg": {
62
+ "f1-score": 0.822585460516495,
63
+ "precision": 0.8415435139573071,
64
+ "recall": 0.8275862068965517,
65
  "support": 29.0
66
  }
67
  },
artifacts/evaluation/latest/iab_behavior_lock_regression.json CHANGED
@@ -13,7 +13,7 @@
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "exact",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
@@ -24,11 +24,6 @@
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
  "mismatches": [
27
- {
28
- "actual": "exact",
29
- "expected": "nearest_equivalent",
30
- "path": "model_output.classification.iab_content.mapping_mode"
31
- },
32
  {
33
  "actual": null,
34
  "expected": "Auto Type",
@@ -106,8 +101,8 @@
106
  "actual": {
107
  "model_output.classification.iab_content.mapping_mode": "exact",
108
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
109
- "model_output.classification.iab_content.tier2.label": "Computing",
110
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
111
  },
112
  "expected": {
113
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -121,6 +116,16 @@
121
  "actual": "exact",
122
  "expected": "nearest_equivalent",
123
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
124
  }
125
  ],
126
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
@@ -130,9 +135,9 @@
130
  },
131
  {
132
  "actual": {
133
- "model_output.classification.iab_content.mapping_mode": "exact",
134
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
135
- "model_output.classification.iab_content.tier2.label": "Robotics",
136
  "model_output.classification.iab_content.tier3.label": null
137
  },
138
  "expected": {
@@ -144,12 +149,7 @@
144
  "id": "crm-comparison-maps-to-sales",
145
  "mismatches": [
146
  {
147
- "actual": "exact",
148
- "expected": "nearest_equivalent",
149
- "path": "model_output.classification.iab_content.mapping_mode"
150
- },
151
- {
152
- "actual": "Robotics",
153
  "expected": "Computing",
154
  "path": "model_output.classification.iab_content.tier2.label"
155
  },
@@ -166,9 +166,9 @@
166
  },
167
  {
168
  "actual": {
169
- "model_output.classification.iab_content.mapping_mode": "exact",
170
  "model_output.classification.iab_content.tier1.label": "Careers",
171
- "model_output.classification.iab_content.tier2.label": "Job Search",
172
  "model_output.classification.iab_content.tier3.label": null
173
  },
174
  "expected": {
@@ -185,12 +185,7 @@
185
  "path": "model_output.classification.iab_content.tier1.label"
186
  },
187
  {
188
- "actual": "exact",
189
- "expected": "nearest_equivalent",
190
- "path": "model_output.classification.iab_content.mapping_mode"
191
- },
192
- {
193
- "actual": "Job Search",
194
  "expected": "Computing",
195
  "path": "model_output.classification.iab_content.tier2.label"
196
  },
@@ -208,7 +203,7 @@
208
  {
209
  "actual": {
210
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
211
- "model_output.classification.iab_content.tier1.label": "Science"
212
  },
213
  "expected": {
214
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -217,7 +212,7 @@
217
  "id": "ml-explanation-maps-to-ai",
218
  "mismatches": [
219
  {
220
- "actual": "Science",
221
  "expected": "Technology & Computing",
222
  "path": "model_output.classification.iab_content.tier1.label"
223
  }
@@ -229,10 +224,10 @@
229
  },
230
  {
231
  "actual": {
232
- "model_output.classification.iab_content.mapping_mode": "exact",
233
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
234
- "model_output.classification.iab_content.tier2.label": "Computing",
235
- "model_output.classification.iab_content.tier3.label": "Information and Network Security"
236
  },
237
  "expected": {
238
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -243,12 +238,17 @@
243
  "id": "support-credential-help-maps-to-business-it",
244
  "mismatches": [
245
  {
246
- "actual": "exact",
247
- "expected": "nearest_equivalent",
248
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
249
  },
250
  {
251
- "actual": "Information and Network Security",
252
  "expected": "Internet",
253
  "path": "model_output.classification.iab_content.tier3.label"
254
  }
@@ -284,9 +284,9 @@
284
  },
285
  {
286
  "actual": {
287
- "model_output.classification.iab_content.mapping_mode": "exact",
288
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
289
- "model_output.classification.iab_content.tier2.label": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
290
  "model_output.classification.iab_content.tier3.label": null
291
  },
292
  "expected": {
@@ -298,17 +298,12 @@
298
  "id": "trial-signup-maps-to-software",
299
  "mismatches": [
300
  {
301
- "actual": "Sensitive Topics",
302
  "expected": "Hobbies & Interests",
303
  "path": "model_output.classification.iab_content.tier1.label"
304
  },
305
  {
306
- "actual": "exact",
307
- "expected": "nearest_equivalent",
308
- "path": "model_output.classification.iab_content.mapping_mode"
309
- },
310
- {
311
- "actual": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
312
  "expected": "Content Production",
313
  "path": "model_output.classification.iab_content.tier2.label"
314
  },
 
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
 
24
  },
25
  "id": "car-buying-maps-to-automotive-buying",
26
  "mismatches": [
 
 
 
 
 
27
  {
28
  "actual": null,
29
  "expected": "Auto Type",
 
101
  "actual": {
102
  "model_output.classification.iab_content.mapping_mode": "exact",
103
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
104
+ "model_output.classification.iab_content.tier2.label": null,
105
+ "model_output.classification.iab_content.tier3.label": null
106
  },
107
  "expected": {
108
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
116
  "actual": "exact",
117
  "expected": "nearest_equivalent",
118
  "path": "model_output.classification.iab_content.mapping_mode"
119
+ },
120
+ {
121
+ "actual": null,
122
+ "expected": "Computing",
123
+ "path": "model_output.classification.iab_content.tier2.label"
124
+ },
125
+ {
126
+ "actual": null,
127
+ "expected": "Software and Applications",
128
+ "path": "model_output.classification.iab_content.tier3.label"
129
  }
130
  ],
131
  "notes": "CRM education should resolve to the closest business/sales path, not generic software.",
 
135
  },
136
  {
137
  "actual": {
138
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
139
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
140
+ "model_output.classification.iab_content.tier2.label": null,
141
  "model_output.classification.iab_content.tier3.label": null
142
  },
143
  "expected": {
 
149
  "id": "crm-comparison-maps-to-sales",
150
  "mismatches": [
151
  {
152
+ "actual": null,
 
 
 
 
 
153
  "expected": "Computing",
154
  "path": "model_output.classification.iab_content.tier2.label"
155
  },
 
166
  },
167
  {
168
  "actual": {
169
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
170
  "model_output.classification.iab_content.tier1.label": "Careers",
171
+ "model_output.classification.iab_content.tier2.label": null,
172
  "model_output.classification.iab_content.tier3.label": null
173
  },
174
  "expected": {
 
185
  "path": "model_output.classification.iab_content.tier1.label"
186
  },
187
  {
188
+ "actual": null,
 
 
 
 
 
189
  "expected": "Computing",
190
  "path": "model_output.classification.iab_content.tier2.label"
191
  },
 
203
  {
204
  "actual": {
205
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
206
+ "model_output.classification.iab_content.tier1.label": "Real Estate"
207
  },
208
  "expected": {
209
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
212
  "id": "ml-explanation-maps-to-ai",
213
  "mismatches": [
214
  {
215
+ "actual": "Real Estate",
216
  "expected": "Technology & Computing",
217
  "path": "model_output.classification.iab_content.tier1.label"
218
  }
 
224
  },
225
  {
226
  "actual": {
227
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
228
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
229
+ "model_output.classification.iab_content.tier2.label": null,
230
+ "model_output.classification.iab_content.tier3.label": null
231
  },
232
  "expected": {
233
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
238
  "id": "support-credential-help-maps-to-business-it",
239
  "mismatches": [
240
  {
241
+ "actual": "Personal Finance",
242
+ "expected": "Technology & Computing",
243
+ "path": "model_output.classification.iab_content.tier1.label"
244
+ },
245
+ {
246
+ "actual": null,
247
+ "expected": "Computing",
248
+ "path": "model_output.classification.iab_content.tier2.label"
249
  },
250
  {
251
+ "actual": null,
252
  "expected": "Internet",
253
  "path": "model_output.classification.iab_content.tier3.label"
254
  }
 
284
  },
285
  {
286
  "actual": {
287
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
288
+ "model_output.classification.iab_content.tier1.label": "Sports",
289
+ "model_output.classification.iab_content.tier2.label": null,
290
  "model_output.classification.iab_content.tier3.label": null
291
  },
292
  "expected": {
 
298
  "id": "trial-signup-maps-to-software",
299
  "mismatches": [
300
  {
301
+ "actual": "Sports",
302
  "expected": "Hobbies & Interests",
303
  "path": "model_output.classification.iab_content.tier1.label"
304
  },
305
  {
306
+ "actual": null,
 
 
 
 
 
307
  "expected": "Content Production",
308
  "path": "model_output.classification.iab_content.tier2.label"
309
  },
artifacts/evaluation/latest/iab_content_cross_vertical_benchmark_report.json CHANGED
@@ -1,90 +1,90 @@
1
  {
2
- "accepted_accuracy": 0.427,
3
- "accepted_coverage": 0.9889,
4
- "accuracy": 0.4222,
5
  "count": 90,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.4138,
10
- "accepted_coverage": 0.9667,
11
- "accuracy": 0.4,
12
  "count": 30,
13
- "fallback_rate": 0.0333,
14
- "macro_f1": 0.2727
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.4667,
18
- "accepted_coverage": 1.0,
19
- "accuracy": 0.4667,
20
  "count": 30,
21
- "fallback_rate": 0.0,
22
- "macro_f1": 0.3106
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.4,
26
- "accepted_coverage": 1.0,
27
- "accuracy": 0.4,
28
  "count": 30,
29
- "fallback_rate": 0.0,
30
- "macro_f1": 0.2667
31
  }
32
  },
33
- "fallback_rate": 0.0111,
34
  "head": "iab_content",
35
- "macro_f1": 0.227,
36
  "primary_source": "supervised_classifier",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 2.4,
40
  "error_buckets": {
41
- "exact_match": 38,
42
- "parent_safe_stop": 1,
43
- "right_tier1_wrong_tier2": 14,
44
  "wrong_deep_leaf": 8,
45
- "wrong_tier1": 29
46
  },
47
- "exact_path_accuracy": 0.4222,
48
- "parent_safe_accuracy": 0.4444,
49
- "tier1_accuracy": 0.6778,
50
- "tier2_accuracy": 0.4881,
51
- "tier3_accuracy": 0.5238,
52
- "tier4_accuracy": 0.5
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
- "average_prediction_depth": 2.4,
57
  "error_buckets": {
58
- "exact_match": 37,
59
- "parent_safe_stop": 1,
60
- "right_tier1_wrong_tier2": 14,
61
- "wrong_deep_leaf": 9,
62
- "wrong_tier1": 29
63
  },
64
- "exact_path_accuracy": 0.4111,
65
- "parent_safe_accuracy": 0.4333,
66
- "tier1_accuracy": 0.6778,
67
- "tier2_accuracy": 0.4881,
68
- "tier3_accuracy": 0.4762,
69
- "tier4_accuracy": 0.5
70
  },
71
  "combined_path": {
72
- "average_prediction_depth": 2.4,
73
  "error_buckets": {
74
- "exact_match": 37,
75
- "parent_safe_stop": 1,
76
- "right_tier1_wrong_tier2": 14,
77
- "wrong_deep_leaf": 9,
78
- "wrong_tier1": 29
79
  },
80
- "exact_path_accuracy": 0.4111,
81
- "fallback_overuse_count": 25,
82
- "fallback_rate": 0.2778,
83
- "parent_safe_accuracy": 0.4333,
84
- "tier1_accuracy": 0.6778,
85
- "tier2_accuracy": 0.4881,
86
- "tier3_accuracy": 0.4762,
87
- "tier4_accuracy": 0.5
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
 
1
  {
2
+ "accepted_accuracy": 0.3108,
3
+ "accepted_coverage": 0.8222,
4
+ "accuracy": 0.2556,
5
  "count": 90,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_cross_vertical_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.3636,
10
+ "accepted_coverage": 0.7333,
11
+ "accuracy": 0.2667,
12
  "count": 30,
13
+ "fallback_rate": 0.2667,
14
+ "macro_f1": 0.1778
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.3077,
18
+ "accepted_coverage": 0.8667,
19
+ "accuracy": 0.2667,
20
  "count": 30,
21
+ "fallback_rate": 0.1333,
22
+ "macro_f1": 0.1562
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.2692,
26
+ "accepted_coverage": 0.8667,
27
+ "accuracy": 0.2333,
28
  "count": 30,
29
+ "fallback_rate": 0.1333,
30
+ "macro_f1": 0.1591
31
  }
32
  },
33
+ "fallback_rate": 0.1778,
34
  "head": "iab_content",
35
+ "macro_f1": 0.1228,
36
  "primary_source": "supervised_classifier",
37
  "suite": "cross_vertical_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 1.9222,
40
  "error_buckets": {
41
+ "exact_match": 23,
42
+ "parent_safe_stop": 3,
43
+ "right_tier1_wrong_tier2": 23,
44
  "wrong_deep_leaf": 8,
45
+ "wrong_tier1": 33
46
  },
47
+ "exact_path_accuracy": 0.2556,
48
+ "parent_safe_accuracy": 0.4222,
49
+ "tier1_accuracy": 0.6333,
50
+ "tier2_accuracy": 0.3571,
51
+ "tier3_accuracy": 0.2381,
52
+ "tier4_accuracy": 0.0
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
+ "average_prediction_depth": 1.9222,
57
  "error_buckets": {
58
+ "exact_match": 23,
59
+ "parent_safe_stop": 3,
60
+ "right_tier1_wrong_tier2": 23,
61
+ "wrong_deep_leaf": 8,
62
+ "wrong_tier1": 33
63
  },
64
+ "exact_path_accuracy": 0.2556,
65
+ "parent_safe_accuracy": 0.4222,
66
+ "tier1_accuracy": 0.6333,
67
+ "tier2_accuracy": 0.3571,
68
+ "tier3_accuracy": 0.2381,
69
+ "tier4_accuracy": 0.0
70
  },
71
  "combined_path": {
72
+ "average_prediction_depth": 1.9222,
73
  "error_buckets": {
74
+ "exact_match": 23,
75
+ "parent_safe_stop": 3,
76
+ "right_tier1_wrong_tier2": 23,
77
+ "wrong_deep_leaf": 8,
78
+ "wrong_tier1": 33
79
  },
80
+ "exact_path_accuracy": 0.2556,
81
+ "fallback_overuse_count": 19,
82
+ "fallback_rate": 0.2111,
83
+ "parent_safe_accuracy": 0.4222,
84
+ "tier1_accuracy": 0.6333,
85
+ "tier2_accuracy": 0.3571,
86
+ "tier3_accuracy": 0.2381,
87
+ "tier4_accuracy": 0.0
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
artifacts/evaluation/latest/iab_content_difficulty_benchmark_report.json CHANGED
@@ -1,90 +1,90 @@
1
  {
2
- "accepted_accuracy": 0.4231,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.4231,
5
  "count": 156,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
- "accepted_accuracy": 0.4615,
10
- "accepted_coverage": 1.0,
11
- "accuracy": 0.4615,
12
  "count": 52,
13
- "fallback_rate": 0.0,
14
- "macro_f1": 0.2359
15
  },
16
  "hard": {
17
- "accepted_accuracy": 0.3654,
18
- "accepted_coverage": 1.0,
19
- "accuracy": 0.3654,
20
  "count": 52,
21
- "fallback_rate": 0.0,
22
- "macro_f1": 0.1892
23
  },
24
  "medium": {
25
- "accepted_accuracy": 0.4423,
26
- "accepted_coverage": 1.0,
27
- "accuracy": 0.4423,
28
  "count": 52,
29
- "fallback_rate": 0.0,
30
- "macro_f1": 0.2338
31
  }
32
  },
33
- "fallback_rate": 0.0,
34
  "head": "iab_content",
35
- "macro_f1": 0.1524,
36
  "primary_source": "supervised_classifier",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
- "average_prediction_depth": 2.4103,
40
  "error_buckets": {
41
- "exact_match": 66,
42
- "parent_safe_stop": 1,
43
- "right_tier1_wrong_tier2": 42,
44
- "wrong_deep_leaf": 8,
45
- "wrong_tier1": 39
46
  },
47
- "exact_path_accuracy": 0.4231,
48
- "parent_safe_accuracy": 0.5385,
49
- "tier1_accuracy": 0.75,
50
- "tier2_accuracy": 0.4808,
51
- "tier3_accuracy": 0.5093,
52
- "tier4_accuracy": 0.4583
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
- "average_prediction_depth": 2.4103,
57
  "error_buckets": {
58
- "exact_match": 59,
59
- "parent_safe_stop": 1,
60
- "right_tier1_wrong_tier2": 42,
61
- "wrong_deep_leaf": 15,
62
- "wrong_tier1": 39
63
  },
64
- "exact_path_accuracy": 0.3782,
65
- "parent_safe_accuracy": 0.4936,
66
- "tier1_accuracy": 0.75,
67
- "tier2_accuracy": 0.4808,
68
- "tier3_accuracy": 0.4259,
69
- "tier4_accuracy": 0.1667
70
  },
71
  "combined_path": {
72
- "average_prediction_depth": 2.4103,
73
  "error_buckets": {
74
- "exact_match": 59,
75
- "parent_safe_stop": 1,
76
- "right_tier1_wrong_tier2": 42,
77
- "wrong_deep_leaf": 15,
78
- "wrong_tier1": 39
79
  },
80
- "exact_path_accuracy": 0.3782,
81
- "fallback_overuse_count": 15,
82
- "fallback_rate": 0.0962,
83
- "parent_safe_accuracy": 0.4936,
84
- "tier1_accuracy": 0.75,
85
- "tier2_accuracy": 0.4808,
86
- "tier3_accuracy": 0.4259,
87
- "tier4_accuracy": 0.1667
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
 
1
  {
2
+ "accepted_accuracy": 0.32,
3
+ "accepted_coverage": 0.8013,
4
+ "accuracy": 0.2564,
5
  "count": 156,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab_benchmark.jsonl",
7
  "difficulty_breakdown": {
8
  "easy": {
9
+ "accepted_accuracy": 0.35,
10
+ "accepted_coverage": 0.7692,
11
+ "accuracy": 0.2692,
12
  "count": 52,
13
+ "fallback_rate": 0.2308,
14
+ "macro_f1": 0.153
15
  },
16
  "hard": {
17
+ "accepted_accuracy": 0.275,
18
+ "accepted_coverage": 0.7692,
19
+ "accuracy": 0.2115,
20
  "count": 52,
21
+ "fallback_rate": 0.2308,
22
+ "macro_f1": 0.1108
23
  },
24
  "medium": {
25
+ "accepted_accuracy": 0.3333,
26
+ "accepted_coverage": 0.8654,
27
+ "accuracy": 0.2885,
28
  "count": 52,
29
+ "fallback_rate": 0.1346,
30
+ "macro_f1": 0.1491
31
  }
32
  },
33
+ "fallback_rate": 0.1987,
34
  "head": "iab_content",
35
+ "macro_f1": 0.105,
36
  "primary_source": "supervised_classifier",
37
  "suite": "difficulty_benchmark",
38
  "tier_metrics": {
39
+ "average_prediction_depth": 1.7564,
40
  "error_buckets": {
41
+ "exact_match": 40,
42
+ "parent_safe_stop": 11,
43
+ "right_tier1_wrong_tier2": 58,
44
+ "wrong_deep_leaf": 1,
45
+ "wrong_tier1": 46
46
  },
47
+ "exact_path_accuracy": 0.2564,
48
+ "parent_safe_accuracy": 0.6218,
49
+ "tier1_accuracy": 0.7051,
50
+ "tier2_accuracy": 0.3333,
51
+ "tier3_accuracy": 0.2315,
52
+ "tier4_accuracy": 0.0
53
  },
54
  "view_metrics": {
55
  "classifier": {
56
+ "average_prediction_depth": 1.7564,
57
  "error_buckets": {
58
+ "exact_match": 40,
59
+ "parent_safe_stop": 11,
60
+ "right_tier1_wrong_tier2": 58,
61
+ "wrong_deep_leaf": 1,
62
+ "wrong_tier1": 46
63
  },
64
+ "exact_path_accuracy": 0.2564,
65
+ "parent_safe_accuracy": 0.6218,
66
+ "tier1_accuracy": 0.7051,
67
+ "tier2_accuracy": 0.3333,
68
+ "tier3_accuracy": 0.2315,
69
+ "tier4_accuracy": 0.0
70
  },
71
  "combined_path": {
72
+ "average_prediction_depth": 1.7564,
73
  "error_buckets": {
74
+ "exact_match": 40,
75
+ "parent_safe_stop": 11,
76
+ "right_tier1_wrong_tier2": 58,
77
+ "wrong_deep_leaf": 1,
78
+ "wrong_tier1": 46
79
  },
80
+ "exact_path_accuracy": 0.2564,
81
+ "fallback_overuse_count": 13,
82
+ "fallback_rate": 0.0833,
83
+ "parent_safe_accuracy": 0.6218,
84
+ "tier1_accuracy": 0.7051,
85
+ "tier2_accuracy": 0.3333,
86
+ "tier3_accuracy": 0.2315,
87
+ "tier4_accuracy": 0.0
88
  },
89
  "disagreements": {
90
  "classifier_vs_combined": 0
artifacts/evaluation/latest/iab_content_extended_cases_report.json CHANGED
@@ -1,58 +1,58 @@
1
  {
2
- "accepted_accuracy": 0.5,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.5,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.3333,
10
  "primary_source": "supervised_classifier",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.125,
14
  "error_buckets": {
15
- "exact_match": 4,
16
- "right_tier1_wrong_tier2": 2,
17
  "wrong_deep_leaf": 1,
18
- "wrong_tier1": 1
19
  },
20
- "exact_path_accuracy": 0.5,
21
- "parent_safe_accuracy": 0.5,
22
- "tier1_accuracy": 0.875,
23
  "tier2_accuracy": 0.5714,
24
  "tier3_accuracy": 0.0,
25
  "tier4_accuracy": 0.0
26
  },
27
  "view_metrics": {
28
  "classifier": {
29
- "average_prediction_depth": 2.125,
30
  "error_buckets": {
31
- "exact_match": 4,
32
- "right_tier1_wrong_tier2": 2,
33
  "wrong_deep_leaf": 1,
34
- "wrong_tier1": 1
35
  },
36
- "exact_path_accuracy": 0.5,
37
- "parent_safe_accuracy": 0.5,
38
- "tier1_accuracy": 0.875,
39
  "tier2_accuracy": 0.5714,
40
  "tier3_accuracy": 0.0,
41
  "tier4_accuracy": 0.0
42
  },
43
  "combined_path": {
44
- "average_prediction_depth": 2.125,
45
  "error_buckets": {
46
- "exact_match": 4,
47
- "right_tier1_wrong_tier2": 2,
48
  "wrong_deep_leaf": 1,
49
- "wrong_tier1": 1
50
  },
51
- "exact_path_accuracy": 0.5,
52
  "fallback_overuse_count": 2,
53
  "fallback_rate": 0.25,
54
- "parent_safe_accuracy": 0.5,
55
- "tier1_accuracy": 0.875,
56
  "tier2_accuracy": 0.5714,
57
  "tier3_accuracy": 0.0,
58
  "tier4_accuracy": 0.0
 
1
  {
2
+ "accepted_accuracy": 0.6,
3
+ "accepted_coverage": 0.625,
4
+ "accuracy": 0.375,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/extended_cases.jsonl",
7
+ "fallback_rate": 0.375,
8
  "head": "iab_content",
9
+ "macro_f1": 0.2308,
10
  "primary_source": "supervised_classifier",
11
  "suite": "extended_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 1.75,
14
  "error_buckets": {
15
+ "exact_match": 3,
16
+ "right_tier1_wrong_tier2": 1,
17
  "wrong_deep_leaf": 1,
18
+ "wrong_tier1": 3
19
  },
20
+ "exact_path_accuracy": 0.375,
21
+ "parent_safe_accuracy": 0.375,
22
+ "tier1_accuracy": 0.625,
23
  "tier2_accuracy": 0.5714,
24
  "tier3_accuracy": 0.0,
25
  "tier4_accuracy": 0.0
26
  },
27
  "view_metrics": {
28
  "classifier": {
29
+ "average_prediction_depth": 1.75,
30
  "error_buckets": {
31
+ "exact_match": 3,
32
+ "right_tier1_wrong_tier2": 1,
33
  "wrong_deep_leaf": 1,
34
+ "wrong_tier1": 3
35
  },
36
+ "exact_path_accuracy": 0.375,
37
+ "parent_safe_accuracy": 0.375,
38
+ "tier1_accuracy": 0.625,
39
  "tier2_accuracy": 0.5714,
40
  "tier3_accuracy": 0.0,
41
  "tier4_accuracy": 0.0
42
  },
43
  "combined_path": {
44
+ "average_prediction_depth": 1.75,
45
  "error_buckets": {
46
+ "exact_match": 3,
47
+ "right_tier1_wrong_tier2": 1,
48
  "wrong_deep_leaf": 1,
49
+ "wrong_tier1": 3
50
  },
51
+ "exact_path_accuracy": 0.375,
52
  "fallback_overuse_count": 2,
53
  "fallback_rate": 0.25,
54
+ "parent_safe_accuracy": 0.375,
55
+ "tier1_accuracy": 0.625,
56
  "tier2_accuracy": 0.5714,
57
  "tier3_accuracy": 0.0,
58
  "tier4_accuracy": 0.0
artifacts/evaluation/latest/iab_content_hard_cases_report.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "accepted_accuracy": 0.4286,
3
- "accepted_coverage": 0.875,
4
  "accuracy": 0.375,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
- "fallback_rate": 0.125,
8
  "head": "iab_content",
9
  "macro_f1": 0.2308,
10
  "primary_source": "supervised_classifier",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.25,
14
  "error_buckets": {
15
  "exact_match": 3,
16
  "right_tier1_wrong_tier2": 1,
@@ -25,7 +25,7 @@
25
  },
26
  "view_metrics": {
27
  "classifier": {
28
- "average_prediction_depth": 2.25,
29
  "error_buckets": {
30
  "exact_match": 3,
31
  "right_tier1_wrong_tier2": 1,
@@ -39,7 +39,7 @@
39
  "tier4_accuracy": 0.0
40
  },
41
  "combined_path": {
42
- "average_prediction_depth": 2.25,
43
  "error_buckets": {
44
  "exact_match": 3,
45
  "right_tier1_wrong_tier2": 1,
 
1
  {
2
+ "accepted_accuracy": 0.6,
3
+ "accepted_coverage": 0.625,
4
  "accuracy": 0.375,
5
  "count": 8,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/hard_cases.jsonl",
7
+ "fallback_rate": 0.375,
8
  "head": "iab_content",
9
  "macro_f1": 0.2308,
10
  "primary_source": "supervised_classifier",
11
  "suite": "hard_cases",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 1.75,
14
  "error_buckets": {
15
  "exact_match": 3,
16
  "right_tier1_wrong_tier2": 1,
 
25
  },
26
  "view_metrics": {
27
  "classifier": {
28
+ "average_prediction_depth": 1.75,
29
  "error_buckets": {
30
  "exact_match": 3,
31
  "right_tier1_wrong_tier2": 1,
 
39
  "tier4_accuracy": 0.0
40
  },
41
  "combined_path": {
42
+ "average_prediction_depth": 1.75,
43
  "error_buckets": {
44
  "exact_match": 3,
45
  "right_tier1_wrong_tier2": 1,
artifacts/evaluation/latest/iab_content_test_report.json CHANGED
@@ -1,46 +1,46 @@
1
  {
2
- "accepted_accuracy": 0.943,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.943,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/test.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.911,
10
  "primary_source": "supervised_classifier",
11
  "suite": "test",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.213,
14
  "error_buckets": {
15
- "exact_match": 3095,
16
- "parent_safe_stop": 45,
17
- "right_tier1_wrong_tier2": 41,
18
- "wrong_deep_leaf": 72,
19
- "wrong_tier1": 29
20
  },
21
- "exact_path_accuracy": 0.943,
22
- "parent_safe_accuracy": 0.958,
23
- "tier1_accuracy": 0.9912,
24
- "tier2_accuracy": 0.9776,
25
- "tier3_accuracy": 0.9078,
26
- "tier4_accuracy": 0.7
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
- "average_prediction_depth": 2.213,
31
  "error_buckets": {
32
- "exact_match": 3052,
33
- "parent_safe_stop": 44,
34
- "right_tier1_wrong_tier2": 53,
35
- "wrong_deep_leaf": 104,
36
- "wrong_tier1": 29
37
  },
38
- "exact_path_accuracy": 0.9299,
39
- "parent_safe_accuracy": 0.9445,
40
- "tier1_accuracy": 0.9912,
41
- "tier2_accuracy": 0.9734,
42
- "tier3_accuracy": 0.8725,
43
- "tier4_accuracy": 0.5
44
  },
45
  "combined_path": {
46
  "count": 3282,
 
1
  {
2
+ "accepted_accuracy": 0.9278,
3
+ "accepted_coverage": 0.996,
4
+ "accuracy": 0.9247,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/test.jsonl",
7
+ "fallback_rate": 0.004,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8814,
10
  "primary_source": "supervised_classifier",
11
  "suite": "test",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1706,
14
  "error_buckets": {
15
+ "exact_match": 3035,
16
+ "parent_safe_stop": 87,
17
+ "right_tier1_wrong_tier2": 56,
18
+ "wrong_deep_leaf": 69,
19
+ "wrong_tier1": 35
20
  },
21
+ "exact_path_accuracy": 0.9247,
22
+ "parent_safe_accuracy": 0.961,
23
+ "tier1_accuracy": 0.9893,
24
+ "tier2_accuracy": 0.9707,
25
+ "tier3_accuracy": 0.8487,
26
+ "tier4_accuracy": 0.5714
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
+ "average_prediction_depth": 2.1706,
31
  "error_buckets": {
32
+ "exact_match": 3004,
33
+ "parent_safe_stop": 84,
34
+ "right_tier1_wrong_tier2": 68,
35
+ "wrong_deep_leaf": 91,
36
+ "wrong_tier1": 35
37
  },
38
+ "exact_path_accuracy": 0.9153,
39
+ "parent_safe_accuracy": 0.9506,
40
+ "tier1_accuracy": 0.9893,
41
+ "tier2_accuracy": 0.9665,
42
+ "tier3_accuracy": 0.8259,
43
+ "tier4_accuracy": 0.4429
44
  },
45
  "combined_path": {
46
  "count": 3282,
artifacts/evaluation/latest/iab_content_train_report.json CHANGED
@@ -1,46 +1,46 @@
1
  {
2
- "accepted_accuracy": 0.9459,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.9459,
5
  "count": 13211,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/train.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.9194,
10
  "primary_source": "supervised_classifier",
11
  "suite": "train",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.2105,
14
  "error_buckets": {
15
- "exact_match": 12496,
16
- "parent_safe_stop": 162,
17
- "right_tier1_wrong_tier2": 144,
18
- "wrong_deep_leaf": 284,
19
- "wrong_tier1": 125
20
  },
21
- "exact_path_accuracy": 0.9459,
22
- "parent_safe_accuracy": 0.9585,
23
- "tier1_accuracy": 0.9905,
24
- "tier2_accuracy": 0.9805,
25
- "tier3_accuracy": 0.9135,
26
- "tier4_accuracy": 0.7268
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
- "average_prediction_depth": 2.2105,
31
  "error_buckets": {
32
- "exact_match": 12323,
33
- "parent_safe_stop": 157,
34
- "right_tier1_wrong_tier2": 192,
35
- "wrong_deep_leaf": 414,
36
- "wrong_tier1": 125
37
  },
38
- "exact_path_accuracy": 0.9328,
39
- "parent_safe_accuracy": 0.945,
40
- "tier1_accuracy": 0.9905,
41
- "tier2_accuracy": 0.9764,
42
- "tier3_accuracy": 0.8777,
43
- "tier4_accuracy": 0.525
44
  },
45
  "combined_path": {
46
  "count": 13211,
 
1
  {
2
+ "accepted_accuracy": 0.9314,
3
+ "accepted_coverage": 0.9972,
4
+ "accuracy": 0.9295,
5
  "count": 13211,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/train.jsonl",
7
+ "fallback_rate": 0.0028,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8927,
10
  "primary_source": "supervised_classifier",
11
  "suite": "train",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1683,
14
  "error_buckets": {
15
+ "exact_match": 12280,
16
+ "parent_safe_stop": 312,
17
+ "right_tier1_wrong_tier2": 215,
18
+ "wrong_deep_leaf": 288,
19
+ "wrong_tier1": 116
20
  },
21
+ "exact_path_accuracy": 0.9295,
22
+ "parent_safe_accuracy": 0.9618,
23
+ "tier1_accuracy": 0.9912,
24
+ "tier2_accuracy": 0.9737,
25
+ "tier3_accuracy": 0.8557,
26
+ "tier4_accuracy": 0.6107
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
+ "average_prediction_depth": 2.1683,
31
  "error_buckets": {
32
+ "exact_match": 12145,
33
+ "parent_safe_stop": 300,
34
+ "right_tier1_wrong_tier2": 263,
35
+ "wrong_deep_leaf": 387,
36
+ "wrong_tier1": 116
37
  },
38
+ "exact_path_accuracy": 0.9193,
39
+ "parent_safe_accuracy": 0.9507,
40
+ "tier1_accuracy": 0.9912,
41
+ "tier2_accuracy": 0.9695,
42
+ "tier3_accuracy": 0.8301,
43
+ "tier4_accuracy": 0.475
44
  },
45
  "combined_path": {
46
  "count": 13211,
artifacts/evaluation/latest/iab_content_val_report.json CHANGED
@@ -1,46 +1,46 @@
1
  {
2
- "accepted_accuracy": 0.9442,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.9442,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/val.jsonl",
7
- "fallback_rate": 0.0,
8
  "head": "iab_content",
9
- "macro_f1": 0.9166,
10
  "primary_source": "supervised_classifier",
11
  "suite": "val",
12
  "tier_metrics": {
13
- "average_prediction_depth": 2.2151,
14
  "error_buckets": {
15
- "exact_match": 3099,
16
- "parent_safe_stop": 35,
17
- "right_tier1_wrong_tier2": 45,
18
- "wrong_deep_leaf": 72,
19
- "wrong_tier1": 31
20
  },
21
- "exact_path_accuracy": 0.9442,
22
- "parent_safe_accuracy": 0.9576,
23
- "tier1_accuracy": 0.9906,
24
- "tier2_accuracy": 0.9769,
25
- "tier3_accuracy": 0.9088,
26
- "tier4_accuracy": 0.7286
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
- "average_prediction_depth": 2.2151,
31
  "error_buckets": {
32
- "exact_match": 3056,
33
- "parent_safe_stop": 34,
34
- "right_tier1_wrong_tier2": 57,
35
- "wrong_deep_leaf": 104,
36
- "wrong_tier1": 31
37
  },
38
- "exact_path_accuracy": 0.9311,
39
- "parent_safe_accuracy": 0.9442,
40
- "tier1_accuracy": 0.9906,
41
- "tier2_accuracy": 0.9727,
42
- "tier3_accuracy": 0.8736,
43
- "tier4_accuracy": 0.5286
44
  },
45
  "combined_path": {
46
  "count": 3282,
 
1
  {
2
+ "accepted_accuracy": 0.9273,
3
+ "accepted_coverage": 0.9973,
4
+ "accuracy": 0.9254,
5
  "count": 3282,
6
  "dataset_path": "/content/agentic-intent-classifier/data/iab/val.jsonl",
7
+ "fallback_rate": 0.0027,
8
  "head": "iab_content",
9
+ "macro_f1": 0.8864,
10
  "primary_source": "supervised_classifier",
11
  "suite": "val",
12
  "tier_metrics": {
13
+ "average_prediction_depth": 2.1709,
14
  "error_buckets": {
15
+ "exact_match": 3037,
16
+ "parent_safe_stop": 80,
17
+ "right_tier1_wrong_tier2": 55,
18
+ "wrong_deep_leaf": 74,
19
+ "wrong_tier1": 36
20
  },
21
+ "exact_path_accuracy": 0.9254,
22
+ "parent_safe_accuracy": 0.9613,
23
+ "tier1_accuracy": 0.989,
24
+ "tier2_accuracy": 0.9713,
25
+ "tier3_accuracy": 0.8549,
26
+ "tier4_accuracy": 0.6071
27
  },
28
  "view_metrics": {
29
  "classifier": {
30
+ "average_prediction_depth": 2.1709,
31
  "error_buckets": {
32
+ "exact_match": 3002,
33
+ "parent_safe_stop": 78,
34
+ "right_tier1_wrong_tier2": 67,
35
+ "wrong_deep_leaf": 99,
36
+ "wrong_tier1": 36
37
  },
38
+ "exact_path_accuracy": 0.9147,
39
+ "parent_safe_accuracy": 0.95,
40
+ "tier1_accuracy": 0.989,
41
+ "tier2_accuracy": 0.9672,
42
+ "tier3_accuracy": 0.829,
43
+ "tier4_accuracy": 0.4643
44
  },
45
  "combined_path": {
46
  "count": 3282,
artifacts/evaluation/latest/iab_cross_vertical_behavior_lock_regression.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 90,
5
- "passed": 0,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_behavior_lock_cases.json",
10
  "count": 90,
11
- "failed": 90,
12
- "passed": 0,
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "exact",
17
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
18
- "model_output.classification.iab_content.tier2.label": "Insurance"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -23,31 +23,15 @@
23
  "model_output.classification.iab_content.tier2.label": "Travel Type"
24
  },
25
  "id": "auto-buying-easy",
26
- "mismatches": [
27
- {
28
- "actual": "Personal Finance",
29
- "expected": "Travel",
30
- "path": "model_output.classification.iab_content.tier1.label"
31
- },
32
- {
33
- "actual": "exact",
34
- "expected": "nearest_equivalent",
35
- "path": "model_output.classification.iab_content.mapping_mode"
36
- },
37
- {
38
- "actual": "Insurance",
39
- "expected": "Travel Type",
40
- "path": "model_output.classification.iab_content.tier2.label"
41
- }
42
- ],
43
  "notes": "Cross-vertical easy IAB mapping case for Automotive > Auto Buying and Selling.",
44
- "pass": false,
45
  "status": "must_fix",
46
  "text": "Which car should I buy for commuting?"
47
  },
48
  {
49
  "actual": {
50
- "model_output.classification.iab_content.mapping_mode": "exact",
51
  "model_output.classification.iab_content.tier1.label": "Automotive",
52
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
53
  },
@@ -57,23 +41,17 @@
57
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
58
  },
59
  "id": "auto-buying-medium",
60
- "mismatches": [
61
- {
62
- "actual": "exact",
63
- "expected": "nearest_equivalent",
64
- "path": "model_output.classification.iab_content.mapping_mode"
65
- }
66
- ],
67
  "notes": "Cross-vertical medium IAB mapping case for Automotive > Auto Buying and Selling.",
68
- "pass": false,
69
  "status": "must_fix",
70
  "text": "Best used SUV for a family of four"
71
  },
72
  {
73
  "actual": {
74
- "model_output.classification.iab_content.mapping_mode": "exact",
75
  "model_output.classification.iab_content.tier1.label": "Automotive",
76
- "model_output.classification.iab_content.tier2.label": "Car Culture"
77
  },
78
  "expected": {
79
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -83,12 +61,7 @@
83
  "id": "auto-buying-hard",
84
  "mismatches": [
85
  {
86
- "actual": "exact",
87
- "expected": "nearest_equivalent",
88
- "path": "model_output.classification.iab_content.mapping_mode"
89
- },
90
- {
91
- "actual": "Car Culture",
92
  "expected": "Auto Type",
93
  "path": "model_output.classification.iab_content.tier2.label"
94
  }
@@ -102,8 +75,8 @@
102
  "actual": {
103
  "model_output.classification.iab_content.mapping_mode": "exact",
104
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
105
- "model_output.classification.iab_content.tier2.label": "Computing",
106
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
107
  },
108
  "expected": {
109
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -117,6 +90,16 @@
117
  "actual": "exact",
118
  "expected": "nearest_equivalent",
119
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
120
  }
121
  ],
122
  "notes": "Cross-vertical easy IAB mapping case for Business and Finance > Business > Sales.",
@@ -126,9 +109,9 @@
126
  },
127
  {
128
  "actual": {
129
- "model_output.classification.iab_content.mapping_mode": "exact",
130
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
131
- "model_output.classification.iab_content.tier2.label": "Robotics",
132
  "model_output.classification.iab_content.tier3.label": null
133
  },
134
  "expected": {
@@ -140,12 +123,7 @@
140
  "id": "sales-crm-medium",
141
  "mismatches": [
142
  {
143
- "actual": "exact",
144
- "expected": "nearest_equivalent",
145
- "path": "model_output.classification.iab_content.mapping_mode"
146
- },
147
- {
148
- "actual": "Robotics",
149
  "expected": "Computing",
150
  "path": "model_output.classification.iab_content.tier2.label"
151
  },
@@ -165,7 +143,7 @@
165
  "model_output.classification.iab_content.mapping_mode": "exact",
166
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
167
  "model_output.classification.iab_content.tier2.label": "Business",
168
- "model_output.classification.iab_content.tier3.label": "Sales"
169
  },
170
  "expected": {
171
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -179,6 +157,11 @@
179
  "actual": "exact",
180
  "expected": "nearest_equivalent",
181
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
182
  }
183
  ],
184
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
@@ -188,9 +171,9 @@
188
  },
189
  {
190
  "actual": {
191
- "model_output.classification.iab_content.mapping_mode": "exact",
192
- "model_output.classification.iab_content.tier1.label": "Careers",
193
- "model_output.classification.iab_content.tier2.label": "Job Search",
194
  "model_output.classification.iab_content.tier3.label": null
195
  },
196
  "expected": {
@@ -202,17 +185,7 @@
202
  "id": "marketing-tools-easy",
203
  "mismatches": [
204
  {
205
- "actual": "Careers",
206
- "expected": "Technology & Computing",
207
- "path": "model_output.classification.iab_content.tier1.label"
208
- },
209
- {
210
- "actual": "exact",
211
- "expected": "nearest_equivalent",
212
- "path": "model_output.classification.iab_content.mapping_mode"
213
- },
214
- {
215
- "actual": "Job Search",
216
  "expected": "Computing",
217
  "path": "model_output.classification.iab_content.tier2.label"
218
  },
@@ -229,9 +202,9 @@
229
  },
230
  {
231
  "actual": {
232
- "model_output.classification.iab_content.mapping_mode": "exact",
233
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
234
- "model_output.classification.iab_content.tier2.label": "Terrorism"
235
  },
236
  "expected": {
237
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -241,17 +214,12 @@
241
  "id": "marketing-tools-medium",
242
  "mismatches": [
243
  {
244
- "actual": "Sensitive Topics",
245
  "expected": "Business and Finance",
246
  "path": "model_output.classification.iab_content.tier1.label"
247
  },
248
  {
249
- "actual": "exact",
250
- "expected": "nearest_equivalent",
251
- "path": "model_output.classification.iab_content.mapping_mode"
252
- },
253
- {
254
- "actual": "Terrorism",
255
  "expected": "Business",
256
  "path": "model_output.classification.iab_content.tier2.label"
257
  }
@@ -263,9 +231,9 @@
263
  },
264
  {
265
  "actual": {
266
- "model_output.classification.iab_content.mapping_mode": "exact",
267
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
268
- "model_output.classification.iab_content.tier2.label": "Home Utilities",
269
  "model_output.classification.iab_content.tier3.label": null
270
  },
271
  "expected": {
@@ -277,17 +245,12 @@
277
  "id": "marketing-tools-hard",
278
  "mismatches": [
279
  {
280
- "actual": "Personal Finance",
281
  "expected": "Technology & Computing",
282
  "path": "model_output.classification.iab_content.tier1.label"
283
  },
284
  {
285
- "actual": "exact",
286
- "expected": "nearest_equivalent",
287
- "path": "model_output.classification.iab_content.mapping_mode"
288
- },
289
- {
290
- "actual": "Home Utilities",
291
  "expected": "Computing",
292
  "path": "model_output.classification.iab_content.tier2.label"
293
  },
@@ -304,10 +267,10 @@
304
  },
305
  {
306
  "actual": {
307
- "model_output.classification.iab_content.mapping_mode": "exact",
308
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
309
- "model_output.classification.iab_content.tier2.label": "Computing",
310
- "model_output.classification.iab_content.tier3.label": "Information and Network Security"
311
  },
312
  "expected": {
313
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -318,12 +281,17 @@
318
  "id": "business-it-easy",
319
  "mismatches": [
320
  {
321
- "actual": "exact",
322
- "expected": "nearest_equivalent",
323
- "path": "model_output.classification.iab_content.mapping_mode"
324
  },
325
  {
326
- "actual": "Information and Network Security",
 
 
 
 
 
327
  "expected": "Internet",
328
  "path": "model_output.classification.iab_content.tier3.label"
329
  }
@@ -335,9 +303,9 @@
335
  },
336
  {
337
  "actual": {
338
- "model_output.classification.iab_content.mapping_mode": "exact",
339
- "model_output.classification.iab_content.tier1.label": "Careers",
340
- "model_output.classification.iab_content.tier2.label": "Job Search"
341
  },
342
  "expected": {
343
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -347,9 +315,14 @@
347
  "id": "business-it-medium",
348
  "mismatches": [
349
  {
350
- "actual": "exact",
351
- "expected": "nearest_equivalent",
352
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
353
  }
354
  ],
355
  "notes": "Cross-vertical medium IAB mapping case for Business and Finance > Business > Business I.T..",
@@ -415,8 +388,8 @@
415
  {
416
  "actual": {
417
  "model_output.classification.iab_content.mapping_mode": "exact",
418
- "model_output.classification.iab_content.tier1.label": "Attractions",
419
- "model_output.classification.iab_content.tier2.label": "Bars & Restaurants"
420
  },
421
  "expected": {
422
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -425,20 +398,10 @@
425
  },
426
  "id": "dining-out-medium",
427
  "mismatches": [
428
- {
429
- "actual": "Attractions",
430
- "expected": "Food & Drink",
431
- "path": "model_output.classification.iab_content.tier1.label"
432
- },
433
  {
434
  "actual": "exact",
435
  "expected": "nearest_equivalent",
436
  "path": "model_output.classification.iab_content.mapping_mode"
437
- },
438
- {
439
- "actual": "Bars & Restaurants",
440
- "expected": "Dining Out",
441
- "path": "model_output.classification.iab_content.tier2.label"
442
  }
443
  ],
444
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Dining Out.",
@@ -449,8 +412,8 @@
449
  {
450
  "actual": {
451
  "model_output.classification.iab_content.mapping_mode": "exact",
452
- "model_output.classification.iab_content.tier1.label": "Attractions",
453
- "model_output.classification.iab_content.tier2.label": "Bars & Restaurants"
454
  },
455
  "expected": {
456
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -459,20 +422,10 @@
459
  },
460
  "id": "dining-out-hard",
461
  "mismatches": [
462
- {
463
- "actual": "Attractions",
464
- "expected": "Food & Drink",
465
- "path": "model_output.classification.iab_content.tier1.label"
466
- },
467
  {
468
  "actual": "exact",
469
  "expected": "nearest_equivalent",
470
  "path": "model_output.classification.iab_content.mapping_mode"
471
- },
472
- {
473
- "actual": "Bars & Restaurants",
474
- "expected": "Dining Out",
475
- "path": "model_output.classification.iab_content.tier2.label"
476
  }
477
  ],
478
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
@@ -549,7 +502,7 @@
549
  {
550
  "actual": {
551
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
552
- "model_output.classification.iab_content.tier1.label": "Science"
553
  },
554
  "expected": {
555
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -558,7 +511,7 @@
558
  "id": "artificial-intelligence-easy",
559
  "mismatches": [
560
  {
561
- "actual": "Science",
562
  "expected": "Technology & Computing",
563
  "path": "model_output.classification.iab_content.tier1.label"
564
  }
@@ -570,9 +523,9 @@
570
  },
571
  {
572
  "actual": {
573
- "model_output.classification.iab_content.mapping_mode": "exact",
574
  "model_output.classification.iab_content.tier1.label": "Education",
575
- "model_output.classification.iab_content.tier2.label": "Language Learning"
576
  },
577
  "expected": {
578
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -582,9 +535,9 @@
582
  "id": "artificial-intelligence-medium",
583
  "mismatches": [
584
  {
585
- "actual": "exact",
586
- "expected": "nearest_equivalent",
587
- "path": "model_output.classification.iab_content.mapping_mode"
588
  }
589
  ],
590
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Artificial Intelligence.",
@@ -619,8 +572,8 @@
619
  {
620
  "actual": {
621
  "model_output.classification.iab_content.mapping_mode": "exact",
622
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
623
- "model_output.classification.iab_content.tier2.label": "Computing"
624
  },
625
  "expected": {
626
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -630,7 +583,7 @@
630
  "id": "software-apps-easy",
631
  "mismatches": [
632
  {
633
- "actual": "Technology & Computing",
634
  "expected": "Business and Finance",
635
  "path": "model_output.classification.iab_content.tier1.label"
636
  },
@@ -640,7 +593,7 @@
640
  "path": "model_output.classification.iab_content.mapping_mode"
641
  },
642
  {
643
- "actual": "Computing",
644
  "expected": "Business",
645
  "path": "model_output.classification.iab_content.tier2.label"
646
  }
@@ -654,8 +607,8 @@
654
  "actual": {
655
  "model_output.classification.iab_content.mapping_mode": "exact",
656
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
657
- "model_output.classification.iab_content.tier2.label": "Computing",
658
- "model_output.classification.iab_content.tier3.label": "Software and Applications",
659
  "model_output.classification.iab_content.tier4.label": null
660
  },
661
  "expected": {
@@ -673,7 +626,12 @@
673
  "path": "model_output.classification.iab_content.mapping_mode"
674
  },
675
  {
676
- "actual": "Software and Applications",
 
 
 
 
 
677
  "expected": "Internet",
678
  "path": "model_output.classification.iab_content.tier3.label"
679
  },
@@ -691,8 +649,8 @@
691
  {
692
  "actual": {
693
  "model_output.classification.iab_content.mapping_mode": "exact",
694
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
695
- "model_output.classification.iab_content.tier2.label": "Virtual Reality",
696
  "model_output.classification.iab_content.tier3.label": null
697
  },
698
  "expected": {
@@ -703,13 +661,18 @@
703
  },
704
  "id": "software-apps-hard",
705
  "mismatches": [
 
 
 
 
 
706
  {
707
  "actual": "exact",
708
  "expected": "nearest_equivalent",
709
  "path": "model_output.classification.iab_content.mapping_mode"
710
  },
711
  {
712
- "actual": "Virtual Reality",
713
  "expected": "Computing",
714
  "path": "model_output.classification.iab_content.tier2.label"
715
  },
@@ -774,10 +737,10 @@
774
  },
775
  {
776
  "actual": {
777
- "model_output.classification.iab_content.mapping_mode": "exact",
778
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
779
  "model_output.classification.iab_content.tier2.label": "Computing",
780
- "model_output.classification.iab_content.tier3.label": "Information and Network Security",
781
  "model_output.classification.iab_content.tier4.label": null
782
  },
783
  "expected": {
@@ -790,12 +753,7 @@
790
  "id": "communication-software-medium",
791
  "mismatches": [
792
  {
793
- "actual": "exact",
794
- "expected": "nearest_equivalent",
795
- "path": "model_output.classification.iab_content.mapping_mode"
796
- },
797
- {
798
- "actual": "Information and Network Security",
799
  "expected": "Software and Applications",
800
  "path": "model_output.classification.iab_content.tier3.label"
801
  },
@@ -812,9 +770,9 @@
812
  },
813
  {
814
  "actual": {
815
- "model_output.classification.iab_content.mapping_mode": "exact",
816
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
817
- "model_output.classification.iab_content.tier2.label": "Virtual Reality",
818
  "model_output.classification.iab_content.tier3.label": null,
819
  "model_output.classification.iab_content.tier4.label": null
820
  },
@@ -828,12 +786,12 @@
828
  "id": "communication-software-hard",
829
  "mismatches": [
830
  {
831
- "actual": "exact",
832
- "expected": "nearest_equivalent",
833
- "path": "model_output.classification.iab_content.mapping_mode"
834
  },
835
  {
836
- "actual": "Virtual Reality",
837
  "expected": "Computing",
838
  "path": "model_output.classification.iab_content.tier2.label"
839
  },
@@ -858,8 +816,8 @@
858
  "model_output.classification.iab_content.mapping_mode": "exact",
859
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
860
  "model_output.classification.iab_content.tier2.label": "Computing",
861
- "model_output.classification.iab_content.tier3.label": "Internet",
862
- "model_output.classification.iab_content.tier4.label": "Web Hosting"
863
  },
864
  "expected": {
865
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -874,6 +832,16 @@
874
  "actual": "exact",
875
  "expected": "nearest_equivalent",
876
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
877
  }
878
  ],
879
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
@@ -883,11 +851,11 @@
883
  },
884
  {
885
  "actual": {
886
- "model_output.classification.iab_content.mapping_mode": "exact",
887
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
888
- "model_output.classification.iab_content.tier2.label": "Computing",
889
- "model_output.classification.iab_content.tier3.label": "Internet",
890
- "model_output.classification.iab_content.tier4.label": "Web Hosting"
891
  },
892
  "expected": {
893
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -899,9 +867,19 @@
899
  "id": "web-hosting-medium",
900
  "mismatches": [
901
  {
902
- "actual": "exact",
903
- "expected": "nearest_equivalent",
904
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
905
  }
906
  ],
907
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
@@ -911,11 +889,11 @@
911
  },
912
  {
913
  "actual": {
914
- "model_output.classification.iab_content.mapping_mode": "exact",
915
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
916
  "model_output.classification.iab_content.tier2.label": "Computing",
917
- "model_output.classification.iab_content.tier3.label": "Internet",
918
- "model_output.classification.iab_content.tier4.label": "Web Hosting"
919
  },
920
  "expected": {
921
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -927,9 +905,14 @@
927
  "id": "web-hosting-hard",
928
  "mismatches": [
929
  {
930
- "actual": "exact",
931
- "expected": "nearest_equivalent",
932
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
933
  }
934
  ],
935
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
@@ -993,8 +976,8 @@
993
  "actual": {
994
  "model_output.classification.iab_content.mapping_mode": "exact",
995
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
996
- "model_output.classification.iab_content.tier2.label": "Computing",
997
- "model_output.classification.iab_content.tier3.label": "Laptops"
998
  },
999
  "expected": {
1000
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1010,12 +993,7 @@
1010
  "path": "model_output.classification.iab_content.mapping_mode"
1011
  },
1012
  {
1013
- "actual": "Computing",
1014
- "expected": "Consumer Electronics",
1015
- "path": "model_output.classification.iab_content.tier2.label"
1016
- },
1017
- {
1018
- "actual": "Laptops",
1019
  "expected": "Smartphones",
1020
  "path": "model_output.classification.iab_content.tier3.label"
1021
  }
@@ -1027,11 +1005,11 @@
1027
  },
1028
  {
1029
  "actual": {
1030
- "model_output.classification.iab_content.mapping_mode": "exact",
1031
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1032
- "model_output.classification.iab_content.tier2.label": "Computing",
1033
- "model_output.classification.iab_content.tier3.label": "Software and Applications",
1034
- "model_output.classification.iab_content.tier4.label": "Computer Animation"
1035
  },
1036
  "expected": {
1037
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1043,12 +1021,17 @@
1043
  "id": "desktops-easy",
1044
  "mismatches": [
1045
  {
1046
- "actual": "exact",
1047
- "expected": "nearest_equivalent",
1048
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1049
  },
1050
  {
1051
- "actual": "Computer Animation",
1052
  "expected": "Photo Editing Software",
1053
  "path": "model_output.classification.iab_content.tier4.label"
1054
  }
@@ -1060,10 +1043,10 @@
1060
  },
1061
  {
1062
  "actual": {
1063
- "model_output.classification.iab_content.mapping_mode": "exact",
1064
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1065
  "model_output.classification.iab_content.tier2.label": "Computing",
1066
- "model_output.classification.iab_content.tier3.label": "Desktops"
1067
  },
1068
  "expected": {
1069
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1074,9 +1057,9 @@
1074
  "id": "desktops-medium",
1075
  "mismatches": [
1076
  {
1077
- "actual": "exact",
1078
- "expected": "nearest_equivalent",
1079
- "path": "model_output.classification.iab_content.mapping_mode"
1080
  }
1081
  ],
1082
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
@@ -1086,10 +1069,10 @@
1086
  },
1087
  {
1088
  "actual": {
1089
- "model_output.classification.iab_content.mapping_mode": "exact",
1090
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1091
- "model_output.classification.iab_content.tier2.label": "Computing",
1092
- "model_output.classification.iab_content.tier3.label": "Desktops"
1093
  },
1094
  "expected": {
1095
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1100,9 +1083,14 @@
1100
  "id": "desktops-hard",
1101
  "mismatches": [
1102
  {
1103
- "actual": "exact",
1104
- "expected": "nearest_equivalent",
1105
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1106
  }
1107
  ],
1108
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
@@ -1188,10 +1176,10 @@
1188
  },
1189
  {
1190
  "actual": {
1191
- "model_output.classification.iab_content.mapping_mode": "exact",
1192
- "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1193
- "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1194
- "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1195
  },
1196
  "expected": {
1197
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1202,17 +1190,17 @@
1202
  "id": "style-fashion-parent-easy",
1203
  "mismatches": [
1204
  {
1205
- "actual": "exact",
1206
- "expected": "nearest_equivalent",
1207
- "path": "model_output.classification.iab_content.mapping_mode"
1208
  },
1209
  {
1210
- "actual": "Men's Fashion",
1211
  "expected": "Women's Fashion",
1212
  "path": "model_output.classification.iab_content.tier2.label"
1213
  },
1214
  {
1215
- "actual": "Men's Shoes and Footwear",
1216
  "expected": "Women's Shoes and Footwear",
1217
  "path": "model_output.classification.iab_content.tier3.label"
1218
  }
@@ -1286,9 +1274,9 @@
1286
  },
1287
  {
1288
  "actual": {
1289
- "model_output.classification.iab_content.mapping_mode": "exact",
1290
- "model_output.classification.iab_content.tier1.label": "Sports",
1291
- "model_output.classification.iab_content.tier2.label": "Bodybuilding",
1292
  "model_output.classification.iab_content.tier3.label": null
1293
  },
1294
  "expected": {
@@ -1300,17 +1288,7 @@
1300
  "id": "womens-shoes-easy",
1301
  "mismatches": [
1302
  {
1303
- "actual": "Sports",
1304
- "expected": "Style & Fashion",
1305
- "path": "model_output.classification.iab_content.tier1.label"
1306
- },
1307
- {
1308
- "actual": "exact",
1309
- "expected": "nearest_equivalent",
1310
- "path": "model_output.classification.iab_content.mapping_mode"
1311
- },
1312
- {
1313
- "actual": "Bodybuilding",
1314
  "expected": "Women's Fashion",
1315
  "path": "model_output.classification.iab_content.tier2.label"
1316
  },
@@ -1327,9 +1305,9 @@
1327
  },
1328
  {
1329
  "actual": {
1330
- "model_output.classification.iab_content.mapping_mode": "exact",
1331
- "model_output.classification.iab_content.tier1.label": "Sports",
1332
- "model_output.classification.iab_content.tier2.label": "Walking",
1333
  "model_output.classification.iab_content.tier3.label": null
1334
  },
1335
  "expected": {
@@ -1341,17 +1319,7 @@
1341
  "id": "womens-shoes-medium",
1342
  "mismatches": [
1343
  {
1344
- "actual": "Sports",
1345
- "expected": "Style & Fashion",
1346
- "path": "model_output.classification.iab_content.tier1.label"
1347
- },
1348
- {
1349
- "actual": "exact",
1350
- "expected": "nearest_equivalent",
1351
- "path": "model_output.classification.iab_content.mapping_mode"
1352
- },
1353
- {
1354
- "actual": "Walking",
1355
  "expected": "Women's Fashion",
1356
  "path": "model_output.classification.iab_content.tier2.label"
1357
  },
@@ -1396,8 +1364,8 @@
1396
  "actual": {
1397
  "model_output.classification.iab_content.mapping_mode": "exact",
1398
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1399
- "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1400
- "model_output.classification.iab_content.tier3.label": null
1401
  },
1402
  "expected": {
1403
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1413,12 +1381,12 @@
1413
  "path": "model_output.classification.iab_content.mapping_mode"
1414
  },
1415
  {
1416
- "actual": "Children's Clothing",
1417
  "expected": "Men's Fashion",
1418
  "path": "model_output.classification.iab_content.tier2.label"
1419
  },
1420
  {
1421
- "actual": null,
1422
  "expected": "Men's Clothing",
1423
  "path": "model_output.classification.iab_content.tier3.label"
1424
  }
@@ -1432,8 +1400,8 @@
1432
  "actual": {
1433
  "model_output.classification.iab_content.mapping_mode": "exact",
1434
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1435
- "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1436
- "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1437
  },
1438
  "expected": {
1439
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1449,7 +1417,12 @@
1449
  "path": "model_output.classification.iab_content.mapping_mode"
1450
  },
1451
  {
1452
- "actual": "Men's Shoes and Footwear",
 
 
 
 
 
1453
  "expected": "Men's Clothing",
1454
  "path": "model_output.classification.iab_content.tier3.label"
1455
  }
@@ -1463,8 +1436,8 @@
1463
  "actual": {
1464
  "model_output.classification.iab_content.mapping_mode": "exact",
1465
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1466
- "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1467
- "model_output.classification.iab_content.tier3.label": null
1468
  },
1469
  "expected": {
1470
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1480,12 +1453,12 @@
1480
  "path": "model_output.classification.iab_content.mapping_mode"
1481
  },
1482
  {
1483
- "actual": "Children's Clothing",
1484
  "expected": "Men's Fashion",
1485
  "path": "model_output.classification.iab_content.tier2.label"
1486
  },
1487
  {
1488
- "actual": null,
1489
  "expected": "Men's Shoes and Footwear",
1490
  "path": "model_output.classification.iab_content.tier3.label"
1491
  }
@@ -1531,10 +1504,10 @@
1531
  },
1532
  {
1533
  "actual": {
1534
- "model_output.classification.iab_content.mapping_mode": "exact",
1535
  "model_output.classification.iab_content.tier1.label": "Travel",
1536
- "model_output.classification.iab_content.tier2.label": "Travel Type",
1537
- "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1538
  },
1539
  "expected": {
1540
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1545,9 +1518,14 @@
1545
  "id": "hotels-medium",
1546
  "mismatches": [
1547
  {
1548
- "actual": "exact",
1549
- "expected": "nearest_equivalent",
1550
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1551
  }
1552
  ],
1553
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
@@ -1559,7 +1537,7 @@
1559
  "actual": {
1560
  "model_output.classification.iab_content.mapping_mode": "exact",
1561
  "model_output.classification.iab_content.tier1.label": "Travel",
1562
- "model_output.classification.iab_content.tier2.label": "Travel Type"
1563
  },
1564
  "expected": {
1565
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1572,6 +1550,11 @@
1572
  "actual": "exact",
1573
  "expected": "nearest_equivalent",
1574
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1575
  }
1576
  ],
1577
  "notes": "Cross-vertical hard IAB mapping case for Travel > Travel Type > Hotels and Motels.",
@@ -1672,8 +1655,8 @@
1672
  "actual": {
1673
  "model_output.classification.iab_content.mapping_mode": "exact",
1674
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1675
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1676
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1677
  },
1678
  "expected": {
1679
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1694,12 +1677,12 @@
1694
  "path": "model_output.classification.iab_content.mapping_mode"
1695
  },
1696
  {
1697
- "actual": "Fitness and Exercise",
1698
  "expected": "Business",
1699
  "path": "model_output.classification.iab_content.tier2.label"
1700
  },
1701
  {
1702
- "actual": "Running and Jogging",
1703
  "expected": "Green Solutions",
1704
  "path": "model_output.classification.iab_content.tier3.label"
1705
  }
@@ -1713,7 +1696,7 @@
1713
  "actual": {
1714
  "model_output.classification.iab_content.mapping_mode": "exact",
1715
  "model_output.classification.iab_content.tier1.label": "Sports",
1716
- "model_output.classification.iab_content.tier2.label": "Walking",
1717
  "model_output.classification.iab_content.tier3.label": null
1718
  },
1719
  "expected": {
@@ -1735,7 +1718,7 @@
1735
  "path": "model_output.classification.iab_content.mapping_mode"
1736
  },
1737
  {
1738
- "actual": "Walking",
1739
  "expected": "Fitness and Exercise",
1740
  "path": "model_output.classification.iab_content.tier2.label"
1741
  },
@@ -1897,9 +1880,9 @@
1897
  },
1898
  {
1899
  "actual": {
1900
- "model_output.classification.iab_content.mapping_mode": "exact",
1901
  "model_output.classification.iab_content.tier1.label": "Travel",
1902
- "model_output.classification.iab_content.tier2.label": "Travel Type"
1903
  },
1904
  "expected": {
1905
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1914,12 +1897,7 @@
1914
  "path": "model_output.classification.iab_content.tier1.label"
1915
  },
1916
  {
1917
- "actual": "exact",
1918
- "expected": "nearest_equivalent",
1919
- "path": "model_output.classification.iab_content.mapping_mode"
1920
- },
1921
- {
1922
- "actual": "Travel Type",
1923
  "expected": "Fiction",
1924
  "path": "model_output.classification.iab_content.tier2.label"
1925
  }
@@ -1932,7 +1910,7 @@
1932
  {
1933
  "actual": {
1934
  "model_output.classification.iab_content.mapping_mode": "exact",
1935
- "model_output.classification.iab_content.tier1.label": "Books and Literature"
1936
  },
1937
  "expected": {
1938
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1940,6 +1918,11 @@
1940
  },
1941
  "id": "fiction-hard",
1942
  "mismatches": [
 
 
 
 
 
1943
  {
1944
  "actual": "exact",
1945
  "expected": "nearest_equivalent",
@@ -1955,7 +1938,7 @@
1955
  "actual": {
1956
  "model_output.classification.iab_content.mapping_mode": "exact",
1957
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1958
- "model_output.classification.iab_content.tier2.label": "Interior Decorating"
1959
  },
1960
  "expected": {
1961
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1968,11 +1951,6 @@
1968
  "actual": "exact",
1969
  "expected": "nearest_equivalent",
1970
  "path": "model_output.classification.iab_content.mapping_mode"
1971
- },
1972
- {
1973
- "actual": "Interior Decorating",
1974
- "expected": "Remodeling & Construction",
1975
- "path": "model_output.classification.iab_content.tier2.label"
1976
  }
1977
  ],
1978
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Home Improvement.",
@@ -1983,9 +1961,9 @@
1983
  {
1984
  "actual": {
1985
  "model_output.classification.iab_content.mapping_mode": "exact",
1986
- "model_output.classification.iab_content.tier1.label": "Home & Garden",
1987
- "model_output.classification.iab_content.tier2.label": "Interior Decorating",
1988
- "model_output.classification.iab_content.tier3.label": null
1989
  },
1990
  "expected": {
1991
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1995,25 +1973,10 @@
1995
  },
1996
  "id": "home-improvement-medium",
1997
  "mismatches": [
1998
- {
1999
- "actual": "Home & Garden",
2000
- "expected": "Style & Fashion",
2001
- "path": "model_output.classification.iab_content.tier1.label"
2002
- },
2003
  {
2004
  "actual": "exact",
2005
  "expected": "nearest_equivalent",
2006
  "path": "model_output.classification.iab_content.mapping_mode"
2007
- },
2008
- {
2009
- "actual": "Interior Decorating",
2010
- "expected": "Personal Care",
2011
- "path": "model_output.classification.iab_content.tier2.label"
2012
- },
2013
- {
2014
- "actual": null,
2015
- "expected": "Bath and Shower",
2016
- "path": "model_output.classification.iab_content.tier3.label"
2017
  }
2018
  ],
2019
  "notes": "Cross-vertical medium IAB mapping case for Home & Garden > Home Improvement.",
@@ -2057,9 +2020,9 @@
2057
  },
2058
  {
2059
  "actual": {
2060
- "model_output.classification.iab_content.mapping_mode": "exact",
2061
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
2062
- "model_output.classification.iab_content.tier2.label": "Augmented Reality"
2063
  },
2064
  "expected": {
2065
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2069,17 +2032,12 @@
2069
  "id": "online-education-easy",
2070
  "mismatches": [
2071
  {
2072
- "actual": "Technology & Computing",
2073
  "expected": "Education",
2074
  "path": "model_output.classification.iab_content.tier1.label"
2075
  },
2076
  {
2077
- "actual": "exact",
2078
- "expected": "nearest_equivalent",
2079
- "path": "model_output.classification.iab_content.mapping_mode"
2080
- },
2081
- {
2082
- "actual": "Augmented Reality",
2083
  "expected": "Language Learning",
2084
  "path": "model_output.classification.iab_content.tier2.label"
2085
  }
@@ -2218,10 +2176,10 @@
2218
  },
2219
  {
2220
  "actual": {
2221
- "model_output.classification.iab_content.mapping_mode": "exact",
2222
- "model_output.classification.iab_content.tier1.label": "Medical Health",
2223
- "model_output.classification.iab_content.tier2.label": "Diseases and Conditions",
2224
- "model_output.classification.iab_content.tier3.label": "Allergies"
2225
  },
2226
  "expected": {
2227
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2232,9 +2190,19 @@
2232
  "id": "medical-health-easy",
2233
  "mismatches": [
2234
  {
2235
- "actual": "exact",
2236
- "expected": "nearest_equivalent",
2237
- "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
2238
  }
2239
  ],
2240
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
@@ -2244,10 +2212,10 @@
2244
  },
2245
  {
2246
  "actual": {
2247
- "model_output.classification.iab_content.mapping_mode": "exact",
2248
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2249
  "model_output.classification.iab_content.tier2.label": "Diseases and Conditions",
2250
- "model_output.classification.iab_content.tier3.label": "Bone and Joint Conditions",
2251
  "model_output.classification.iab_content.tier4.label": null
2252
  },
2253
  "expected": {
@@ -2260,12 +2228,7 @@
2260
  "id": "medical-health-medium",
2261
  "mismatches": [
2262
  {
2263
- "actual": "exact",
2264
- "expected": "nearest_equivalent",
2265
- "path": "model_output.classification.iab_content.mapping_mode"
2266
- },
2267
- {
2268
- "actual": "Bone and Joint Conditions",
2269
  "expected": "Injuries",
2270
  "path": "model_output.classification.iab_content.tier3.label"
2271
  },
@@ -2284,7 +2247,7 @@
2284
  "actual": {
2285
  "model_output.classification.iab_content.mapping_mode": "exact",
2286
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2287
- "model_output.classification.iab_content.tier2.label": null,
2288
  "model_output.classification.iab_content.tier3.label": null
2289
  },
2290
  "expected": {
@@ -2306,7 +2269,7 @@
2306
  "path": "model_output.classification.iab_content.mapping_mode"
2307
  },
2308
  {
2309
- "actual": null,
2310
  "expected": "Wellness",
2311
  "path": "model_output.classification.iab_content.tier2.label"
2312
  },
@@ -2407,9 +2370,9 @@
2407
  },
2408
  {
2409
  "actual": {
2410
- "model_output.classification.iab_content.mapping_mode": "exact",
2411
  "model_output.classification.iab_content.tier1.label": "Holidays",
2412
- "model_output.classification.iab_content.tier2.label": "National & Civic Holidays"
2413
  },
2414
  "expected": {
2415
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2424,12 +2387,7 @@
2424
  "path": "model_output.classification.iab_content.tier1.label"
2425
  },
2426
  {
2427
- "actual": "exact",
2428
- "expected": "nearest_equivalent",
2429
- "path": "model_output.classification.iab_content.mapping_mode"
2430
- },
2431
- {
2432
- "actual": "National & Civic Holidays",
2433
  "expected": "Food Movements",
2434
  "path": "model_output.classification.iab_content.tier2.label"
2435
  }
@@ -2530,9 +2488,9 @@
2530
  },
2531
  {
2532
  "actual": {
2533
- "model_output.classification.iab_content.mapping_mode": "exact",
2534
- "model_output.classification.iab_content.tier1.label": "Genres",
2535
- "model_output.classification.iab_content.tier2.label": "Family/Children"
2536
  },
2537
  "expected": {
2538
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2542,17 +2500,12 @@
2542
  "id": "parenting-medium",
2543
  "mismatches": [
2544
  {
2545
- "actual": "Genres",
2546
  "expected": "Family and Relationships",
2547
  "path": "model_output.classification.iab_content.tier1.label"
2548
  },
2549
  {
2550
- "actual": "exact",
2551
- "expected": "nearest_equivalent",
2552
- "path": "model_output.classification.iab_content.mapping_mode"
2553
- },
2554
- {
2555
- "actual": "Family/Children",
2556
  "expected": "Parenting",
2557
  "path": "model_output.classification.iab_content.tier2.label"
2558
  }
@@ -2567,7 +2520,7 @@
2567
  "model_output.classification.iab_content.mapping_mode": "exact",
2568
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2569
  "model_output.classification.iab_content.tier2.label": "Parenting",
2570
- "model_output.classification.iab_content.tier3.label": null
2571
  },
2572
  "expected": {
2573
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2583,7 +2536,7 @@
2583
  "path": "model_output.classification.iab_content.mapping_mode"
2584
  },
2585
  {
2586
- "actual": null,
2587
  "expected": "Special Needs Kids",
2588
  "path": "model_output.classification.iab_content.tier3.label"
2589
  }
@@ -2665,7 +2618,7 @@
2665
  "actual": {
2666
  "model_output.classification.iab_content.mapping_mode": "exact",
2667
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2668
- "model_output.classification.iab_content.tier2.label": "Movies"
2669
  },
2670
  "expected": {
2671
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -2678,6 +2631,11 @@
2678
  "actual": "exact",
2679
  "expected": "nearest_equivalent",
2680
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
2681
  }
2682
  ],
2683
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
@@ -2688,8 +2646,8 @@
2688
  {
2689
  "actual": {
2690
  "model_output.classification.iab_content.mapping_mode": "exact",
2691
- "model_output.classification.iab_content.tier1.label": "Entertainment",
2692
- "model_output.classification.iab_content.tier2.label": "Movies",
2693
  "model_output.classification.iab_content.tier3.label": null
2694
  },
2695
  "expected": {
@@ -2701,7 +2659,7 @@
2701
  "id": "movies-medium",
2702
  "mismatches": [
2703
  {
2704
- "actual": "Entertainment",
2705
  "expected": "Video Gaming",
2706
  "path": "model_output.classification.iab_content.tier1.label"
2707
  },
@@ -2711,7 +2669,7 @@
2711
  "path": "model_output.classification.iab_content.mapping_mode"
2712
  },
2713
  {
2714
- "actual": "Movies",
2715
  "expected": "Video Game Genres",
2716
  "path": "model_output.classification.iab_content.tier2.label"
2717
  },
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 88,
5
+ "passed": 2,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_behavior_lock_cases.json",
10
  "count": 90,
11
+ "failed": 88,
12
+ "passed": 2,
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
+ "model_output.classification.iab_content.tier1.label": "Travel",
18
+ "model_output.classification.iab_content.tier2.label": "Travel Type"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
23
  "model_output.classification.iab_content.tier2.label": "Travel Type"
24
  },
25
  "id": "auto-buying-easy",
26
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  "notes": "Cross-vertical easy IAB mapping case for Automotive > Auto Buying and Selling.",
28
+ "pass": true,
29
  "status": "must_fix",
30
  "text": "Which car should I buy for commuting?"
31
  },
32
  {
33
  "actual": {
34
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
35
  "model_output.classification.iab_content.tier1.label": "Automotive",
36
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
37
  },
 
41
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
42
  },
43
  "id": "auto-buying-medium",
44
+ "mismatches": [],
 
 
 
 
 
 
45
  "notes": "Cross-vertical medium IAB mapping case for Automotive > Auto Buying and Selling.",
46
+ "pass": true,
47
  "status": "must_fix",
48
  "text": "Best used SUV for a family of four"
49
  },
50
  {
51
  "actual": {
52
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
53
  "model_output.classification.iab_content.tier1.label": "Automotive",
54
+ "model_output.classification.iab_content.tier2.label": null
55
  },
56
  "expected": {
57
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
61
  "id": "auto-buying-hard",
62
  "mismatches": [
63
  {
64
+ "actual": null,
 
 
 
 
 
65
  "expected": "Auto Type",
66
  "path": "model_output.classification.iab_content.tier2.label"
67
  }
 
75
  "actual": {
76
  "model_output.classification.iab_content.mapping_mode": "exact",
77
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
78
+ "model_output.classification.iab_content.tier2.label": null,
79
+ "model_output.classification.iab_content.tier3.label": null
80
  },
81
  "expected": {
82
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
90
  "actual": "exact",
91
  "expected": "nearest_equivalent",
92
  "path": "model_output.classification.iab_content.mapping_mode"
93
+ },
94
+ {
95
+ "actual": null,
96
+ "expected": "Computing",
97
+ "path": "model_output.classification.iab_content.tier2.label"
98
+ },
99
+ {
100
+ "actual": null,
101
+ "expected": "Software and Applications",
102
+ "path": "model_output.classification.iab_content.tier3.label"
103
  }
104
  ],
105
  "notes": "Cross-vertical easy IAB mapping case for Business and Finance > Business > Sales.",
 
109
  },
110
  {
111
  "actual": {
112
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
113
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
114
+ "model_output.classification.iab_content.tier2.label": null,
115
  "model_output.classification.iab_content.tier3.label": null
116
  },
117
  "expected": {
 
123
  "id": "sales-crm-medium",
124
  "mismatches": [
125
  {
126
+ "actual": null,
 
 
 
 
 
127
  "expected": "Computing",
128
  "path": "model_output.classification.iab_content.tier2.label"
129
  },
 
143
  "model_output.classification.iab_content.mapping_mode": "exact",
144
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
145
  "model_output.classification.iab_content.tier2.label": "Business",
146
+ "model_output.classification.iab_content.tier3.label": "Startups"
147
  },
148
  "expected": {
149
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
157
  "actual": "exact",
158
  "expected": "nearest_equivalent",
159
  "path": "model_output.classification.iab_content.mapping_mode"
160
+ },
161
+ {
162
+ "actual": "Startups",
163
+ "expected": "Sales",
164
+ "path": "model_output.classification.iab_content.tier3.label"
165
  }
166
  ],
167
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
 
171
  },
172
  {
173
  "actual": {
174
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
175
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
176
+ "model_output.classification.iab_content.tier2.label": null,
177
  "model_output.classification.iab_content.tier3.label": null
178
  },
179
  "expected": {
 
185
  "id": "marketing-tools-easy",
186
  "mismatches": [
187
  {
188
+ "actual": null,
 
 
 
 
 
 
 
 
 
 
189
  "expected": "Computing",
190
  "path": "model_output.classification.iab_content.tier2.label"
191
  },
 
202
  },
203
  {
204
  "actual": {
205
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
206
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
207
+ "model_output.classification.iab_content.tier2.label": null
208
  },
209
  "expected": {
210
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
214
  "id": "marketing-tools-medium",
215
  "mismatches": [
216
  {
217
+ "actual": "Technology & Computing",
218
  "expected": "Business and Finance",
219
  "path": "model_output.classification.iab_content.tier1.label"
220
  },
221
  {
222
+ "actual": null,
 
 
 
 
 
223
  "expected": "Business",
224
  "path": "model_output.classification.iab_content.tier2.label"
225
  }
 
231
  },
232
  {
233
  "actual": {
234
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
235
+ "model_output.classification.iab_content.tier1.label": "Careers",
236
+ "model_output.classification.iab_content.tier2.label": null,
237
  "model_output.classification.iab_content.tier3.label": null
238
  },
239
  "expected": {
 
245
  "id": "marketing-tools-hard",
246
  "mismatches": [
247
  {
248
+ "actual": "Careers",
249
  "expected": "Technology & Computing",
250
  "path": "model_output.classification.iab_content.tier1.label"
251
  },
252
  {
253
+ "actual": null,
 
 
 
 
 
254
  "expected": "Computing",
255
  "path": "model_output.classification.iab_content.tier2.label"
256
  },
 
267
  },
268
  {
269
  "actual": {
270
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
271
+ "model_output.classification.iab_content.tier1.label": "Careers",
272
+ "model_output.classification.iab_content.tier2.label": null,
273
+ "model_output.classification.iab_content.tier3.label": null
274
  },
275
  "expected": {
276
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
281
  "id": "business-it-easy",
282
  "mismatches": [
283
  {
284
+ "actual": "Careers",
285
+ "expected": "Technology & Computing",
286
+ "path": "model_output.classification.iab_content.tier1.label"
287
  },
288
  {
289
+ "actual": null,
290
+ "expected": "Computing",
291
+ "path": "model_output.classification.iab_content.tier2.label"
292
+ },
293
+ {
294
+ "actual": null,
295
  "expected": "Internet",
296
  "path": "model_output.classification.iab_content.tier3.label"
297
  }
 
303
  },
304
  {
305
  "actual": {
306
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
307
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
308
+ "model_output.classification.iab_content.tier2.label": null
309
  },
310
  "expected": {
311
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
315
  "id": "business-it-medium",
316
  "mismatches": [
317
  {
318
+ "actual": "Personal Finance",
319
+ "expected": "Careers",
320
+ "path": "model_output.classification.iab_content.tier1.label"
321
+ },
322
+ {
323
+ "actual": null,
324
+ "expected": "Job Search",
325
+ "path": "model_output.classification.iab_content.tier2.label"
326
  }
327
  ],
328
  "notes": "Cross-vertical medium IAB mapping case for Business and Finance > Business > Business I.T..",
 
388
  {
389
  "actual": {
390
  "model_output.classification.iab_content.mapping_mode": "exact",
391
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
392
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
393
  },
394
  "expected": {
395
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
398
  },
399
  "id": "dining-out-medium",
400
  "mismatches": [
 
 
 
 
 
401
  {
402
  "actual": "exact",
403
  "expected": "nearest_equivalent",
404
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
405
  }
406
  ],
407
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Dining Out.",
 
412
  {
413
  "actual": {
414
  "model_output.classification.iab_content.mapping_mode": "exact",
415
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
416
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
417
  },
418
  "expected": {
419
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
422
  },
423
  "id": "dining-out-hard",
424
  "mismatches": [
 
 
 
 
 
425
  {
426
  "actual": "exact",
427
  "expected": "nearest_equivalent",
428
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
429
  }
430
  ],
431
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
 
502
  {
503
  "actual": {
504
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
505
+ "model_output.classification.iab_content.tier1.label": "Real Estate"
506
  },
507
  "expected": {
508
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
511
  "id": "artificial-intelligence-easy",
512
  "mismatches": [
513
  {
514
+ "actual": "Real Estate",
515
  "expected": "Technology & Computing",
516
  "path": "model_output.classification.iab_content.tier1.label"
517
  }
 
523
  },
524
  {
525
  "actual": {
526
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
527
  "model_output.classification.iab_content.tier1.label": "Education",
528
+ "model_output.classification.iab_content.tier2.label": null
529
  },
530
  "expected": {
531
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
535
  "id": "artificial-intelligence-medium",
536
  "mismatches": [
537
  {
538
+ "actual": null,
539
+ "expected": "Language Learning",
540
+ "path": "model_output.classification.iab_content.tier2.label"
541
  }
542
  ],
543
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Artificial Intelligence.",
 
572
  {
573
  "actual": {
574
  "model_output.classification.iab_content.mapping_mode": "exact",
575
+ "model_output.classification.iab_content.tier1.label": "Careers",
576
+ "model_output.classification.iab_content.tier2.label": "Job Search"
577
  },
578
  "expected": {
579
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
583
  "id": "software-apps-easy",
584
  "mismatches": [
585
  {
586
+ "actual": "Careers",
587
  "expected": "Business and Finance",
588
  "path": "model_output.classification.iab_content.tier1.label"
589
  },
 
593
  "path": "model_output.classification.iab_content.mapping_mode"
594
  },
595
  {
596
+ "actual": "Job Search",
597
  "expected": "Business",
598
  "path": "model_output.classification.iab_content.tier2.label"
599
  }
 
607
  "actual": {
608
  "model_output.classification.iab_content.mapping_mode": "exact",
609
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
610
+ "model_output.classification.iab_content.tier2.label": null,
611
+ "model_output.classification.iab_content.tier3.label": null,
612
  "model_output.classification.iab_content.tier4.label": null
613
  },
614
  "expected": {
 
626
  "path": "model_output.classification.iab_content.mapping_mode"
627
  },
628
  {
629
+ "actual": null,
630
+ "expected": "Computing",
631
+ "path": "model_output.classification.iab_content.tier2.label"
632
+ },
633
+ {
634
+ "actual": null,
635
  "expected": "Internet",
636
  "path": "model_output.classification.iab_content.tier3.label"
637
  },
 
649
  {
650
  "actual": {
651
  "model_output.classification.iab_content.mapping_mode": "exact",
652
+ "model_output.classification.iab_content.tier1.label": "Careers",
653
+ "model_output.classification.iab_content.tier2.label": "Job Search",
654
  "model_output.classification.iab_content.tier3.label": null
655
  },
656
  "expected": {
 
661
  },
662
  "id": "software-apps-hard",
663
  "mismatches": [
664
+ {
665
+ "actual": "Careers",
666
+ "expected": "Technology & Computing",
667
+ "path": "model_output.classification.iab_content.tier1.label"
668
+ },
669
  {
670
  "actual": "exact",
671
  "expected": "nearest_equivalent",
672
  "path": "model_output.classification.iab_content.mapping_mode"
673
  },
674
  {
675
+ "actual": "Job Search",
676
  "expected": "Computing",
677
  "path": "model_output.classification.iab_content.tier2.label"
678
  },
 
737
  },
738
  {
739
  "actual": {
740
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
741
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
742
  "model_output.classification.iab_content.tier2.label": "Computing",
743
+ "model_output.classification.iab_content.tier3.label": null,
744
  "model_output.classification.iab_content.tier4.label": null
745
  },
746
  "expected": {
 
753
  "id": "communication-software-medium",
754
  "mismatches": [
755
  {
756
+ "actual": null,
 
 
 
 
 
757
  "expected": "Software and Applications",
758
  "path": "model_output.classification.iab_content.tier3.label"
759
  },
 
770
  },
771
  {
772
  "actual": {
773
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
774
+ "model_output.classification.iab_content.tier1.label": "Careers",
775
+ "model_output.classification.iab_content.tier2.label": null,
776
  "model_output.classification.iab_content.tier3.label": null,
777
  "model_output.classification.iab_content.tier4.label": null
778
  },
 
786
  "id": "communication-software-hard",
787
  "mismatches": [
788
  {
789
+ "actual": "Careers",
790
+ "expected": "Technology & Computing",
791
+ "path": "model_output.classification.iab_content.tier1.label"
792
  },
793
  {
794
+ "actual": null,
795
  "expected": "Computing",
796
  "path": "model_output.classification.iab_content.tier2.label"
797
  },
 
816
  "model_output.classification.iab_content.mapping_mode": "exact",
817
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
818
  "model_output.classification.iab_content.tier2.label": "Computing",
819
+ "model_output.classification.iab_content.tier3.label": "Data Storage and Warehousing",
820
+ "model_output.classification.iab_content.tier4.label": null
821
  },
822
  "expected": {
823
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
832
  "actual": "exact",
833
  "expected": "nearest_equivalent",
834
  "path": "model_output.classification.iab_content.mapping_mode"
835
+ },
836
+ {
837
+ "actual": "Data Storage and Warehousing",
838
+ "expected": "Internet",
839
+ "path": "model_output.classification.iab_content.tier3.label"
840
+ },
841
+ {
842
+ "actual": null,
843
+ "expected": "Web Hosting",
844
+ "path": "model_output.classification.iab_content.tier4.label"
845
  }
846
  ],
847
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
 
851
  },
852
  {
853
  "actual": {
854
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
855
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
856
+ "model_output.classification.iab_content.tier2.label": null,
857
+ "model_output.classification.iab_content.tier3.label": null,
858
+ "model_output.classification.iab_content.tier4.label": null
859
  },
860
  "expected": {
861
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
867
  "id": "web-hosting-medium",
868
  "mismatches": [
869
  {
870
+ "actual": null,
871
+ "expected": "Computing",
872
+ "path": "model_output.classification.iab_content.tier2.label"
873
+ },
874
+ {
875
+ "actual": null,
876
+ "expected": "Internet",
877
+ "path": "model_output.classification.iab_content.tier3.label"
878
+ },
879
+ {
880
+ "actual": null,
881
+ "expected": "Web Hosting",
882
+ "path": "model_output.classification.iab_content.tier4.label"
883
  }
884
  ],
885
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
 
889
  },
890
  {
891
  "actual": {
892
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
893
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
894
  "model_output.classification.iab_content.tier2.label": "Computing",
895
+ "model_output.classification.iab_content.tier3.label": null,
896
+ "model_output.classification.iab_content.tier4.label": null
897
  },
898
  "expected": {
899
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
905
  "id": "web-hosting-hard",
906
  "mismatches": [
907
  {
908
+ "actual": null,
909
+ "expected": "Internet",
910
+ "path": "model_output.classification.iab_content.tier3.label"
911
+ },
912
+ {
913
+ "actual": null,
914
+ "expected": "Web Hosting",
915
+ "path": "model_output.classification.iab_content.tier4.label"
916
  }
917
  ],
918
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
 
976
  "actual": {
977
  "model_output.classification.iab_content.mapping_mode": "exact",
978
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
979
+ "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
980
+ "model_output.classification.iab_content.tier3.label": null
981
  },
982
  "expected": {
983
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
993
  "path": "model_output.classification.iab_content.mapping_mode"
994
  },
995
  {
996
+ "actual": null,
 
 
 
 
 
997
  "expected": "Smartphones",
998
  "path": "model_output.classification.iab_content.tier3.label"
999
  }
 
1005
  },
1006
  {
1007
  "actual": {
1008
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1009
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1010
+ "model_output.classification.iab_content.tier2.label": null,
1011
+ "model_output.classification.iab_content.tier3.label": null,
1012
+ "model_output.classification.iab_content.tier4.label": null
1013
  },
1014
  "expected": {
1015
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1021
  "id": "desktops-easy",
1022
  "mismatches": [
1023
  {
1024
+ "actual": null,
1025
+ "expected": "Computing",
1026
+ "path": "model_output.classification.iab_content.tier2.label"
1027
+ },
1028
+ {
1029
+ "actual": null,
1030
+ "expected": "Software and Applications",
1031
+ "path": "model_output.classification.iab_content.tier3.label"
1032
  },
1033
  {
1034
+ "actual": null,
1035
  "expected": "Photo Editing Software",
1036
  "path": "model_output.classification.iab_content.tier4.label"
1037
  }
 
1043
  },
1044
  {
1045
  "actual": {
1046
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1047
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1048
  "model_output.classification.iab_content.tier2.label": "Computing",
1049
+ "model_output.classification.iab_content.tier3.label": null
1050
  },
1051
  "expected": {
1052
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1057
  "id": "desktops-medium",
1058
  "mismatches": [
1059
  {
1060
+ "actual": null,
1061
+ "expected": "Desktops",
1062
+ "path": "model_output.classification.iab_content.tier3.label"
1063
  }
1064
  ],
1065
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
 
1069
  },
1070
  {
1071
  "actual": {
1072
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1073
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1074
+ "model_output.classification.iab_content.tier2.label": null,
1075
+ "model_output.classification.iab_content.tier3.label": null
1076
  },
1077
  "expected": {
1078
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1083
  "id": "desktops-hard",
1084
  "mismatches": [
1085
  {
1086
+ "actual": null,
1087
+ "expected": "Computing",
1088
+ "path": "model_output.classification.iab_content.tier2.label"
1089
+ },
1090
+ {
1091
+ "actual": null,
1092
+ "expected": "Desktops",
1093
+ "path": "model_output.classification.iab_content.tier3.label"
1094
  }
1095
  ],
1096
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
 
1176
  },
1177
  {
1178
  "actual": {
1179
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1180
+ "model_output.classification.iab_content.tier1.label": "Shopping",
1181
+ "model_output.classification.iab_content.tier2.label": null,
1182
+ "model_output.classification.iab_content.tier3.label": null
1183
  },
1184
  "expected": {
1185
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1190
  "id": "style-fashion-parent-easy",
1191
  "mismatches": [
1192
  {
1193
+ "actual": "Shopping",
1194
+ "expected": "Style & Fashion",
1195
+ "path": "model_output.classification.iab_content.tier1.label"
1196
  },
1197
  {
1198
+ "actual": null,
1199
  "expected": "Women's Fashion",
1200
  "path": "model_output.classification.iab_content.tier2.label"
1201
  },
1202
  {
1203
+ "actual": null,
1204
  "expected": "Women's Shoes and Footwear",
1205
  "path": "model_output.classification.iab_content.tier3.label"
1206
  }
 
1274
  },
1275
  {
1276
  "actual": {
1277
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1278
+ "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1279
+ "model_output.classification.iab_content.tier2.label": null,
1280
  "model_output.classification.iab_content.tier3.label": null
1281
  },
1282
  "expected": {
 
1288
  "id": "womens-shoes-easy",
1289
  "mismatches": [
1290
  {
1291
+ "actual": null,
 
 
 
 
 
 
 
 
 
 
1292
  "expected": "Women's Fashion",
1293
  "path": "model_output.classification.iab_content.tier2.label"
1294
  },
 
1305
  },
1306
  {
1307
  "actual": {
1308
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1309
+ "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1310
+ "model_output.classification.iab_content.tier2.label": null,
1311
  "model_output.classification.iab_content.tier3.label": null
1312
  },
1313
  "expected": {
 
1319
  "id": "womens-shoes-medium",
1320
  "mismatches": [
1321
  {
1322
+ "actual": null,
 
 
 
 
 
 
 
 
 
 
1323
  "expected": "Women's Fashion",
1324
  "path": "model_output.classification.iab_content.tier2.label"
1325
  },
 
1364
  "actual": {
1365
  "model_output.classification.iab_content.mapping_mode": "exact",
1366
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1367
+ "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1368
+ "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1369
  },
1370
  "expected": {
1371
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1381
  "path": "model_output.classification.iab_content.mapping_mode"
1382
  },
1383
  {
1384
+ "actual": "Women's Fashion",
1385
  "expected": "Men's Fashion",
1386
  "path": "model_output.classification.iab_content.tier2.label"
1387
  },
1388
  {
1389
+ "actual": "Women's Shoes and Footwear",
1390
  "expected": "Men's Clothing",
1391
  "path": "model_output.classification.iab_content.tier3.label"
1392
  }
 
1400
  "actual": {
1401
  "model_output.classification.iab_content.mapping_mode": "exact",
1402
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1403
+ "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1404
+ "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1405
  },
1406
  "expected": {
1407
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1417
  "path": "model_output.classification.iab_content.mapping_mode"
1418
  },
1419
  {
1420
+ "actual": "Women's Fashion",
1421
+ "expected": "Men's Fashion",
1422
+ "path": "model_output.classification.iab_content.tier2.label"
1423
+ },
1424
+ {
1425
+ "actual": "Women's Shoes and Footwear",
1426
  "expected": "Men's Clothing",
1427
  "path": "model_output.classification.iab_content.tier3.label"
1428
  }
 
1436
  "actual": {
1437
  "model_output.classification.iab_content.mapping_mode": "exact",
1438
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1439
+ "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1440
+ "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1441
  },
1442
  "expected": {
1443
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1453
  "path": "model_output.classification.iab_content.mapping_mode"
1454
  },
1455
  {
1456
+ "actual": "Women's Fashion",
1457
  "expected": "Men's Fashion",
1458
  "path": "model_output.classification.iab_content.tier2.label"
1459
  },
1460
  {
1461
+ "actual": "Women's Shoes and Footwear",
1462
  "expected": "Men's Shoes and Footwear",
1463
  "path": "model_output.classification.iab_content.tier3.label"
1464
  }
 
1504
  },
1505
  {
1506
  "actual": {
1507
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1508
  "model_output.classification.iab_content.tier1.label": "Travel",
1509
+ "model_output.classification.iab_content.tier2.label": null,
1510
+ "model_output.classification.iab_content.tier3.label": null
1511
  },
1512
  "expected": {
1513
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1518
  "id": "hotels-medium",
1519
  "mismatches": [
1520
  {
1521
+ "actual": null,
1522
+ "expected": "Travel Type",
1523
+ "path": "model_output.classification.iab_content.tier2.label"
1524
+ },
1525
+ {
1526
+ "actual": null,
1527
+ "expected": "Hotels and Motels",
1528
+ "path": "model_output.classification.iab_content.tier3.label"
1529
  }
1530
  ],
1531
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
 
1537
  "actual": {
1538
  "model_output.classification.iab_content.mapping_mode": "exact",
1539
  "model_output.classification.iab_content.tier1.label": "Travel",
1540
+ "model_output.classification.iab_content.tier2.label": null
1541
  },
1542
  "expected": {
1543
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1550
  "actual": "exact",
1551
  "expected": "nearest_equivalent",
1552
  "path": "model_output.classification.iab_content.mapping_mode"
1553
+ },
1554
+ {
1555
+ "actual": null,
1556
+ "expected": "Travel Type",
1557
+ "path": "model_output.classification.iab_content.tier2.label"
1558
  }
1559
  ],
1560
  "notes": "Cross-vertical hard IAB mapping case for Travel > Travel Type > Hotels and Motels.",
 
1655
  "actual": {
1656
  "model_output.classification.iab_content.mapping_mode": "exact",
1657
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1658
+ "model_output.classification.iab_content.tier2.label": null,
1659
+ "model_output.classification.iab_content.tier3.label": null
1660
  },
1661
  "expected": {
1662
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1677
  "path": "model_output.classification.iab_content.mapping_mode"
1678
  },
1679
  {
1680
+ "actual": null,
1681
  "expected": "Business",
1682
  "path": "model_output.classification.iab_content.tier2.label"
1683
  },
1684
  {
1685
+ "actual": null,
1686
  "expected": "Green Solutions",
1687
  "path": "model_output.classification.iab_content.tier3.label"
1688
  }
 
1696
  "actual": {
1697
  "model_output.classification.iab_content.mapping_mode": "exact",
1698
  "model_output.classification.iab_content.tier1.label": "Sports",
1699
+ "model_output.classification.iab_content.tier2.label": "Bodybuilding",
1700
  "model_output.classification.iab_content.tier3.label": null
1701
  },
1702
  "expected": {
 
1718
  "path": "model_output.classification.iab_content.mapping_mode"
1719
  },
1720
  {
1721
+ "actual": "Bodybuilding",
1722
  "expected": "Fitness and Exercise",
1723
  "path": "model_output.classification.iab_content.tier2.label"
1724
  },
 
1880
  },
1881
  {
1882
  "actual": {
1883
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1884
  "model_output.classification.iab_content.tier1.label": "Travel",
1885
+ "model_output.classification.iab_content.tier2.label": null
1886
  },
1887
  "expected": {
1888
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1897
  "path": "model_output.classification.iab_content.tier1.label"
1898
  },
1899
  {
1900
+ "actual": null,
 
 
 
 
 
1901
  "expected": "Fiction",
1902
  "path": "model_output.classification.iab_content.tier2.label"
1903
  }
 
1910
  {
1911
  "actual": {
1912
  "model_output.classification.iab_content.mapping_mode": "exact",
1913
+ "model_output.classification.iab_content.tier1.label": "Genres"
1914
  },
1915
  "expected": {
1916
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1918
  },
1919
  "id": "fiction-hard",
1920
  "mismatches": [
1921
+ {
1922
+ "actual": "Genres",
1923
+ "expected": "Books and Literature",
1924
+ "path": "model_output.classification.iab_content.tier1.label"
1925
+ },
1926
  {
1927
  "actual": "exact",
1928
  "expected": "nearest_equivalent",
 
1938
  "actual": {
1939
  "model_output.classification.iab_content.mapping_mode": "exact",
1940
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1941
+ "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1942
  },
1943
  "expected": {
1944
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1951
  "actual": "exact",
1952
  "expected": "nearest_equivalent",
1953
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
1954
  }
1955
  ],
1956
  "notes": "Cross-vertical easy IAB mapping case for Home & Garden > Home Improvement.",
 
1961
  {
1962
  "actual": {
1963
  "model_output.classification.iab_content.mapping_mode": "exact",
1964
+ "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1965
+ "model_output.classification.iab_content.tier2.label": "Personal Care",
1966
+ "model_output.classification.iab_content.tier3.label": "Bath and Shower"
1967
  },
1968
  "expected": {
1969
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1973
  },
1974
  "id": "home-improvement-medium",
1975
  "mismatches": [
 
 
 
 
 
1976
  {
1977
  "actual": "exact",
1978
  "expected": "nearest_equivalent",
1979
  "path": "model_output.classification.iab_content.mapping_mode"
 
 
 
 
 
 
 
 
 
 
1980
  }
1981
  ],
1982
  "notes": "Cross-vertical medium IAB mapping case for Home & Garden > Home Improvement.",
 
2020
  },
2021
  {
2022
  "actual": {
2023
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2024
+ "model_output.classification.iab_content.tier1.label": "Careers",
2025
+ "model_output.classification.iab_content.tier2.label": null
2026
  },
2027
  "expected": {
2028
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2032
  "id": "online-education-easy",
2033
  "mismatches": [
2034
  {
2035
+ "actual": "Careers",
2036
  "expected": "Education",
2037
  "path": "model_output.classification.iab_content.tier1.label"
2038
  },
2039
  {
2040
+ "actual": null,
 
 
 
 
 
2041
  "expected": "Language Learning",
2042
  "path": "model_output.classification.iab_content.tier2.label"
2043
  }
 
2176
  },
2177
  {
2178
  "actual": {
2179
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2180
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
2181
+ "model_output.classification.iab_content.tier2.label": null,
2182
+ "model_output.classification.iab_content.tier3.label": null
2183
  },
2184
  "expected": {
2185
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2190
  "id": "medical-health-easy",
2191
  "mismatches": [
2192
  {
2193
+ "actual": "Food & Drink",
2194
+ "expected": "Medical Health",
2195
+ "path": "model_output.classification.iab_content.tier1.label"
2196
+ },
2197
+ {
2198
+ "actual": null,
2199
+ "expected": "Diseases and Conditions",
2200
+ "path": "model_output.classification.iab_content.tier2.label"
2201
+ },
2202
+ {
2203
+ "actual": null,
2204
+ "expected": "Allergies",
2205
+ "path": "model_output.classification.iab_content.tier3.label"
2206
  }
2207
  ],
2208
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
 
2212
  },
2213
  {
2214
  "actual": {
2215
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2216
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2217
  "model_output.classification.iab_content.tier2.label": "Diseases and Conditions",
2218
+ "model_output.classification.iab_content.tier3.label": null,
2219
  "model_output.classification.iab_content.tier4.label": null
2220
  },
2221
  "expected": {
 
2228
  "id": "medical-health-medium",
2229
  "mismatches": [
2230
  {
2231
+ "actual": null,
 
 
 
 
 
2232
  "expected": "Injuries",
2233
  "path": "model_output.classification.iab_content.tier3.label"
2234
  },
 
2247
  "actual": {
2248
  "model_output.classification.iab_content.mapping_mode": "exact",
2249
  "model_output.classification.iab_content.tier1.label": "Medical Health",
2250
+ "model_output.classification.iab_content.tier2.label": "Surgery",
2251
  "model_output.classification.iab_content.tier3.label": null
2252
  },
2253
  "expected": {
 
2269
  "path": "model_output.classification.iab_content.mapping_mode"
2270
  },
2271
  {
2272
+ "actual": "Surgery",
2273
  "expected": "Wellness",
2274
  "path": "model_output.classification.iab_content.tier2.label"
2275
  },
 
2370
  },
2371
  {
2372
  "actual": {
2373
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2374
  "model_output.classification.iab_content.tier1.label": "Holidays",
2375
+ "model_output.classification.iab_content.tier2.label": null
2376
  },
2377
  "expected": {
2378
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2387
  "path": "model_output.classification.iab_content.tier1.label"
2388
  },
2389
  {
2390
+ "actual": null,
 
 
 
 
 
2391
  "expected": "Food Movements",
2392
  "path": "model_output.classification.iab_content.tier2.label"
2393
  }
 
2488
  },
2489
  {
2490
  "actual": {
2491
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2492
+ "model_output.classification.iab_content.tier1.label": "Shopping",
2493
+ "model_output.classification.iab_content.tier2.label": null
2494
  },
2495
  "expected": {
2496
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2500
  "id": "parenting-medium",
2501
  "mismatches": [
2502
  {
2503
+ "actual": "Shopping",
2504
  "expected": "Family and Relationships",
2505
  "path": "model_output.classification.iab_content.tier1.label"
2506
  },
2507
  {
2508
+ "actual": null,
 
 
 
 
 
2509
  "expected": "Parenting",
2510
  "path": "model_output.classification.iab_content.tier2.label"
2511
  }
 
2520
  "model_output.classification.iab_content.mapping_mode": "exact",
2521
  "model_output.classification.iab_content.tier1.label": "Family and Relationships",
2522
  "model_output.classification.iab_content.tier2.label": "Parenting",
2523
+ "model_output.classification.iab_content.tier3.label": "Parenting Babies and Toddlers"
2524
  },
2525
  "expected": {
2526
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2536
  "path": "model_output.classification.iab_content.mapping_mode"
2537
  },
2538
  {
2539
+ "actual": "Parenting Babies and Toddlers",
2540
  "expected": "Special Needs Kids",
2541
  "path": "model_output.classification.iab_content.tier3.label"
2542
  }
 
2618
  "actual": {
2619
  "model_output.classification.iab_content.mapping_mode": "exact",
2620
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2621
+ "model_output.classification.iab_content.tier2.label": null
2622
  },
2623
  "expected": {
2624
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
2631
  "actual": "exact",
2632
  "expected": "nearest_equivalent",
2633
  "path": "model_output.classification.iab_content.mapping_mode"
2634
+ },
2635
+ {
2636
+ "actual": null,
2637
+ "expected": "Movies",
2638
+ "path": "model_output.classification.iab_content.tier2.label"
2639
  }
2640
  ],
2641
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
 
2646
  {
2647
  "actual": {
2648
  "model_output.classification.iab_content.mapping_mode": "exact",
2649
+ "model_output.classification.iab_content.tier1.label": "Genres",
2650
+ "model_output.classification.iab_content.tier2.label": "Horror",
2651
  "model_output.classification.iab_content.tier3.label": null
2652
  },
2653
  "expected": {
 
2659
  "id": "movies-medium",
2660
  "mismatches": [
2661
  {
2662
+ "actual": "Genres",
2663
  "expected": "Video Gaming",
2664
  "path": "model_output.classification.iab_content.tier1.label"
2665
  },
 
2669
  "path": "model_output.classification.iab_content.mapping_mode"
2670
  },
2671
  {
2672
+ "actual": "Horror",
2673
  "expected": "Video Game Genres",
2674
  "path": "model_output.classification.iab_content.tier2.label"
2675
  },
artifacts/evaluation/latest/iab_cross_vertical_quality_target_eval.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "by_status": {
3
  "must_fix": {
4
- "failed": 49,
5
- "passed": 41,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
- "failed": 49,
12
- "passed": 41,
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "exact",
17
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
18
- "model_output.classification.iab_content.tier2.label": "Insurance"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -25,17 +25,12 @@
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
- "actual": "Personal Finance",
29
  "expected": "Automotive",
30
  "path": "model_output.classification.iab_content.tier1.label"
31
  },
32
  {
33
- "actual": "exact",
34
- "expected": "nearest_equivalent",
35
- "path": "model_output.classification.iab_content.mapping_mode"
36
- },
37
- {
38
- "actual": "Insurance",
39
  "expected": "Auto Buying and Selling",
40
  "path": "model_output.classification.iab_content.tier2.label"
41
  }
@@ -47,7 +42,7 @@
47
  },
48
  {
49
  "actual": {
50
- "model_output.classification.iab_content.mapping_mode": "exact",
51
  "model_output.classification.iab_content.tier1.label": "Automotive",
52
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
53
  },
@@ -58,11 +53,6 @@
58
  },
59
  "id": "auto-buying-medium",
60
  "mismatches": [
61
- {
62
- "actual": "exact",
63
- "expected": "nearest_equivalent",
64
- "path": "model_output.classification.iab_content.mapping_mode"
65
- },
66
  {
67
  "actual": "Auto Body Styles",
68
  "expected": "Auto Buying and Selling",
@@ -76,9 +66,9 @@
76
  },
77
  {
78
  "actual": {
79
- "model_output.classification.iab_content.mapping_mode": "exact",
80
  "model_output.classification.iab_content.tier1.label": "Automotive",
81
- "model_output.classification.iab_content.tier2.label": "Car Culture"
82
  },
83
  "expected": {
84
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -88,12 +78,7 @@
88
  "id": "auto-buying-hard",
89
  "mismatches": [
90
  {
91
- "actual": "exact",
92
- "expected": "nearest_equivalent",
93
- "path": "model_output.classification.iab_content.mapping_mode"
94
- },
95
- {
96
- "actual": "Car Culture",
97
  "expected": "Auto Buying and Selling",
98
  "path": "model_output.classification.iab_content.tier2.label"
99
  }
@@ -107,8 +92,8 @@
107
  "actual": {
108
  "model_output.classification.iab_content.mapping_mode": "exact",
109
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
110
- "model_output.classification.iab_content.tier2.label": "Computing",
111
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
112
  },
113
  "expected": {
114
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -124,12 +109,12 @@
124
  "path": "model_output.classification.iab_content.tier1.label"
125
  },
126
  {
127
- "actual": "Computing",
128
  "expected": "Business",
129
  "path": "model_output.classification.iab_content.tier2.label"
130
  },
131
  {
132
- "actual": "Software and Applications",
133
  "expected": "Sales",
134
  "path": "model_output.classification.iab_content.tier3.label"
135
  }
@@ -141,9 +126,9 @@
141
  },
142
  {
143
  "actual": {
144
- "model_output.classification.iab_content.mapping_mode": "exact",
145
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
146
- "model_output.classification.iab_content.tier2.label": "Robotics",
147
  "model_output.classification.iab_content.tier3.label": null
148
  },
149
  "expected": {
@@ -160,7 +145,12 @@
160
  "path": "model_output.classification.iab_content.tier1.label"
161
  },
162
  {
163
- "actual": "Robotics",
 
 
 
 
 
164
  "expected": "Business",
165
  "path": "model_output.classification.iab_content.tier2.label"
166
  },
@@ -180,7 +170,7 @@
180
  "model_output.classification.iab_content.mapping_mode": "exact",
181
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
182
  "model_output.classification.iab_content.tier2.label": "Business",
183
- "model_output.classification.iab_content.tier3.label": "Sales"
184
  },
185
  "expected": {
186
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -189,17 +179,23 @@
189
  "model_output.classification.iab_content.tier3.label": "Sales"
190
  },
191
  "id": "sales-crm-hard",
192
- "mismatches": [],
 
 
 
 
 
 
193
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
194
- "pass": true,
195
  "status": "must_fix",
196
  "text": "Need software to manage leads and pipeline for a startup sales team"
197
  },
198
  {
199
  "actual": {
200
- "model_output.classification.iab_content.mapping_mode": "exact",
201
- "model_output.classification.iab_content.tier1.label": "Careers",
202
- "model_output.classification.iab_content.tier2.label": "Job Search",
203
  "model_output.classification.iab_content.tier3.label": null
204
  },
205
  "expected": {
@@ -211,12 +207,17 @@
211
  "id": "marketing-tools-easy",
212
  "mismatches": [
213
  {
214
- "actual": "Careers",
215
  "expected": "Business and Finance",
216
  "path": "model_output.classification.iab_content.tier1.label"
217
  },
218
  {
219
- "actual": "Job Search",
 
 
 
 
 
220
  "expected": "Business",
221
  "path": "model_output.classification.iab_content.tier2.label"
222
  },
@@ -233,9 +234,9 @@
233
  },
234
  {
235
  "actual": {
236
- "model_output.classification.iab_content.mapping_mode": "exact",
237
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
238
- "model_output.classification.iab_content.tier2.label": "Terrorism",
239
  "model_output.classification.iab_content.tier3.label": null
240
  },
241
  "expected": {
@@ -247,12 +248,17 @@
247
  "id": "marketing-tools-medium",
248
  "mismatches": [
249
  {
250
- "actual": "Sensitive Topics",
251
  "expected": "Business and Finance",
252
  "path": "model_output.classification.iab_content.tier1.label"
253
  },
254
  {
255
- "actual": "Terrorism",
 
 
 
 
 
256
  "expected": "Business",
257
  "path": "model_output.classification.iab_content.tier2.label"
258
  },
@@ -269,9 +275,9 @@
269
  },
270
  {
271
  "actual": {
272
- "model_output.classification.iab_content.mapping_mode": "exact",
273
- "model_output.classification.iab_content.tier1.label": "Personal Finance",
274
- "model_output.classification.iab_content.tier2.label": "Home Utilities",
275
  "model_output.classification.iab_content.tier3.label": null
276
  },
277
  "expected": {
@@ -283,12 +289,17 @@
283
  "id": "marketing-tools-hard",
284
  "mismatches": [
285
  {
286
- "actual": "Personal Finance",
287
  "expected": "Business and Finance",
288
  "path": "model_output.classification.iab_content.tier1.label"
289
  },
290
  {
291
- "actual": "Home Utilities",
 
 
 
 
 
292
  "expected": "Business",
293
  "path": "model_output.classification.iab_content.tier2.label"
294
  },
@@ -305,10 +316,10 @@
305
  },
306
  {
307
  "actual": {
308
- "model_output.classification.iab_content.mapping_mode": "exact",
309
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
310
- "model_output.classification.iab_content.tier2.label": "Computing",
311
- "model_output.classification.iab_content.tier3.label": "Information and Network Security"
312
  },
313
  "expected": {
314
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -319,17 +330,22 @@
319
  "id": "business-it-easy",
320
  "mismatches": [
321
  {
322
- "actual": "Technology & Computing",
323
  "expected": "Business and Finance",
324
  "path": "model_output.classification.iab_content.tier1.label"
325
  },
326
  {
327
- "actual": "Computing",
 
 
 
 
 
328
  "expected": "Business",
329
  "path": "model_output.classification.iab_content.tier2.label"
330
  },
331
  {
332
- "actual": "Information and Network Security",
333
  "expected": "Business I.T.",
334
  "path": "model_output.classification.iab_content.tier3.label"
335
  }
@@ -341,9 +357,9 @@
341
  },
342
  {
343
  "actual": {
344
- "model_output.classification.iab_content.mapping_mode": "exact",
345
- "model_output.classification.iab_content.tier1.label": "Careers",
346
- "model_output.classification.iab_content.tier2.label": "Job Search",
347
  "model_output.classification.iab_content.tier3.label": null
348
  },
349
  "expected": {
@@ -355,12 +371,17 @@
355
  "id": "business-it-medium",
356
  "mismatches": [
357
  {
358
- "actual": "Careers",
359
  "expected": "Business and Finance",
360
  "path": "model_output.classification.iab_content.tier1.label"
361
  },
362
  {
363
- "actual": "Job Search",
 
 
 
 
 
364
  "expected": "Business",
365
  "path": "model_output.classification.iab_content.tier2.label"
366
  },
@@ -432,8 +453,8 @@
432
  {
433
  "actual": {
434
  "model_output.classification.iab_content.mapping_mode": "exact",
435
- "model_output.classification.iab_content.tier1.label": "Attractions",
436
- "model_output.classification.iab_content.tier2.label": "Bars & Restaurants"
437
  },
438
  "expected": {
439
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -441,28 +462,17 @@
441
  "model_output.classification.iab_content.tier2.label": "Dining Out"
442
  },
443
  "id": "dining-out-medium",
444
- "mismatches": [
445
- {
446
- "actual": "Attractions",
447
- "expected": "Food & Drink",
448
- "path": "model_output.classification.iab_content.tier1.label"
449
- },
450
- {
451
- "actual": "Bars & Restaurants",
452
- "expected": "Dining Out",
453
- "path": "model_output.classification.iab_content.tier2.label"
454
- }
455
- ],
456
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Dining Out.",
457
- "pass": false,
458
  "status": "must_fix",
459
  "text": "Good restaurants for a client dinner downtown"
460
  },
461
  {
462
  "actual": {
463
  "model_output.classification.iab_content.mapping_mode": "exact",
464
- "model_output.classification.iab_content.tier1.label": "Attractions",
465
- "model_output.classification.iab_content.tier2.label": "Bars & Restaurants"
466
  },
467
  "expected": {
468
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -470,20 +480,9 @@
470
  "model_output.classification.iab_content.tier2.label": "Dining Out"
471
  },
472
  "id": "dining-out-hard",
473
- "mismatches": [
474
- {
475
- "actual": "Attractions",
476
- "expected": "Food & Drink",
477
- "path": "model_output.classification.iab_content.tier1.label"
478
- },
479
- {
480
- "actual": "Bars & Restaurants",
481
- "expected": "Dining Out",
482
- "path": "model_output.classification.iab_content.tier2.label"
483
- }
484
- ],
485
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
486
- "pass": false,
487
  "status": "must_fix",
488
  "text": "Need a place to eat tonight where I can make a reservation online"
489
  },
@@ -550,7 +549,7 @@
550
  {
551
  "actual": {
552
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
553
- "model_output.classification.iab_content.tier1.label": "Science",
554
  "model_output.classification.iab_content.tier2.label": null
555
  },
556
  "expected": {
@@ -561,7 +560,7 @@
561
  "id": "artificial-intelligence-easy",
562
  "mismatches": [
563
  {
564
- "actual": "Science",
565
  "expected": "Technology & Computing",
566
  "path": "model_output.classification.iab_content.tier1.label"
567
  },
@@ -583,9 +582,9 @@
583
  },
584
  {
585
  "actual": {
586
- "model_output.classification.iab_content.mapping_mode": "exact",
587
  "model_output.classification.iab_content.tier1.label": "Education",
588
- "model_output.classification.iab_content.tier2.label": "Language Learning"
589
  },
590
  "expected": {
591
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -600,7 +599,12 @@
600
  "path": "model_output.classification.iab_content.tier1.label"
601
  },
602
  {
603
- "actual": "Language Learning",
 
 
 
 
 
604
  "expected": "Artificial Intelligence",
605
  "path": "model_output.classification.iab_content.tier2.label"
606
  }
@@ -642,9 +646,9 @@
642
  {
643
  "actual": {
644
  "model_output.classification.iab_content.mapping_mode": "exact",
645
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
646
- "model_output.classification.iab_content.tier2.label": "Computing",
647
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
648
  },
649
  "expected": {
650
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -653,9 +657,25 @@
653
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
654
  },
655
  "id": "software-apps-easy",
656
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
658
- "pass": true,
659
  "status": "must_fix",
660
  "text": "Best workflow software for a small operations team"
661
  },
@@ -663,8 +683,8 @@
663
  "actual": {
664
  "model_output.classification.iab_content.mapping_mode": "exact",
665
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
666
- "model_output.classification.iab_content.tier2.label": "Computing",
667
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
668
  },
669
  "expected": {
670
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -673,17 +693,28 @@
673
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
674
  },
675
  "id": "software-apps-medium",
676
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
677
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
678
- "pass": true,
679
  "status": "must_fix",
680
  "text": "Need project management software for a distributed team"
681
  },
682
  {
683
  "actual": {
684
  "model_output.classification.iab_content.mapping_mode": "exact",
685
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
686
- "model_output.classification.iab_content.tier2.label": "Virtual Reality",
687
  "model_output.classification.iab_content.tier3.label": null
688
  },
689
  "expected": {
@@ -695,7 +726,12 @@
695
  "id": "software-apps-hard",
696
  "mismatches": [
697
  {
698
- "actual": "Virtual Reality",
 
 
 
 
 
699
  "expected": "Computing",
700
  "path": "model_output.classification.iab_content.tier2.label"
701
  },
@@ -755,10 +791,10 @@
755
  },
756
  {
757
  "actual": {
758
- "model_output.classification.iab_content.mapping_mode": "exact",
759
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
760
  "model_output.classification.iab_content.tier2.label": "Computing",
761
- "model_output.classification.iab_content.tier3.label": "Information and Network Security",
762
  "model_output.classification.iab_content.tier4.label": null
763
  },
764
  "expected": {
@@ -771,7 +807,12 @@
771
  "id": "communication-software-medium",
772
  "mismatches": [
773
  {
774
- "actual": "Information and Network Security",
 
 
 
 
 
775
  "expected": "Software and Applications",
776
  "path": "model_output.classification.iab_content.tier3.label"
777
  },
@@ -788,9 +829,9 @@
788
  },
789
  {
790
  "actual": {
791
- "model_output.classification.iab_content.mapping_mode": "exact",
792
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
793
- "model_output.classification.iab_content.tier2.label": "Virtual Reality",
794
  "model_output.classification.iab_content.tier3.label": null,
795
  "model_output.classification.iab_content.tier4.label": null
796
  },
@@ -804,7 +845,17 @@
804
  "id": "communication-software-hard",
805
  "mismatches": [
806
  {
807
- "actual": "Virtual Reality",
 
 
 
 
 
 
 
 
 
 
808
  "expected": "Computing",
809
  "path": "model_output.classification.iab_content.tier2.label"
810
  },
@@ -829,8 +880,8 @@
829
  "model_output.classification.iab_content.mapping_mode": "exact",
830
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
831
  "model_output.classification.iab_content.tier2.label": "Computing",
832
- "model_output.classification.iab_content.tier3.label": "Internet",
833
- "model_output.classification.iab_content.tier4.label": "Web Hosting"
834
  },
835
  "expected": {
836
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -840,19 +891,30 @@
840
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
841
  },
842
  "id": "web-hosting-easy",
843
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
844
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
845
- "pass": true,
846
  "status": "must_fix",
847
  "text": "Vercel vs Netlify for website hosting"
848
  },
849
  {
850
  "actual": {
851
- "model_output.classification.iab_content.mapping_mode": "exact",
852
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
853
- "model_output.classification.iab_content.tier2.label": "Computing",
854
- "model_output.classification.iab_content.tier3.label": "Internet",
855
- "model_output.classification.iab_content.tier4.label": "Web Hosting"
856
  },
857
  "expected": {
858
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -862,19 +924,40 @@
862
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
863
  },
864
  "id": "web-hosting-medium",
865
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
867
- "pass": true,
868
  "status": "must_fix",
869
  "text": "Best hosting platform for a startup website"
870
  },
871
  {
872
  "actual": {
873
- "model_output.classification.iab_content.mapping_mode": "exact",
874
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
875
  "model_output.classification.iab_content.tier2.label": "Computing",
876
- "model_output.classification.iab_content.tier3.label": "Internet",
877
- "model_output.classification.iab_content.tier4.label": "Web Hosting"
878
  },
879
  "expected": {
880
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -884,9 +967,25 @@
884
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
885
  },
886
  "id": "web-hosting-hard",
887
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
889
- "pass": true,
890
  "status": "must_fix",
891
  "text": "Need a managed hosting provider to deploy and run our marketing site"
892
  },
@@ -934,8 +1033,8 @@
934
  "actual": {
935
  "model_output.classification.iab_content.mapping_mode": "exact",
936
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
937
- "model_output.classification.iab_content.tier2.label": "Computing",
938
- "model_output.classification.iab_content.tier3.label": "Laptops"
939
  },
940
  "expected": {
941
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -944,18 +1043,29 @@
944
  "model_output.classification.iab_content.tier3.label": "Laptops"
945
  },
946
  "id": "laptops-hard",
947
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
948
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
949
- "pass": true,
950
  "status": "must_fix",
951
  "text": "Need a portable computer with good battery life for everyday work"
952
  },
953
  {
954
  "actual": {
955
- "model_output.classification.iab_content.mapping_mode": "exact",
956
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
957
- "model_output.classification.iab_content.tier2.label": "Computing",
958
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
959
  },
960
  "expected": {
961
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -966,7 +1076,17 @@
966
  "id": "desktops-easy",
967
  "mismatches": [
968
  {
969
- "actual": "Software and Applications",
 
 
 
 
 
 
 
 
 
 
970
  "expected": "Desktops",
971
  "path": "model_output.classification.iab_content.tier3.label"
972
  }
@@ -978,10 +1098,10 @@
978
  },
979
  {
980
  "actual": {
981
- "model_output.classification.iab_content.mapping_mode": "exact",
982
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
983
  "model_output.classification.iab_content.tier2.label": "Computing",
984
- "model_output.classification.iab_content.tier3.label": "Desktops"
985
  },
986
  "expected": {
987
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -990,18 +1110,29 @@
990
  "model_output.classification.iab_content.tier3.label": "Desktops"
991
  },
992
  "id": "desktops-medium",
993
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
994
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
995
- "pass": true,
996
  "status": "must_fix",
997
  "text": "Which desktop computer should I buy for a home office?"
998
  },
999
  {
1000
  "actual": {
1001
- "model_output.classification.iab_content.mapping_mode": "exact",
1002
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1003
- "model_output.classification.iab_content.tier2.label": "Computing",
1004
- "model_output.classification.iab_content.tier3.label": "Desktops"
1005
  },
1006
  "expected": {
1007
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1010,9 +1141,25 @@
1010
  "model_output.classification.iab_content.tier3.label": "Desktops"
1011
  },
1012
  "id": "desktops-hard",
1013
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1014
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
1015
- "pass": true,
1016
  "status": "must_fix",
1017
  "text": "Need a desktop PC with strong performance for creative work"
1018
  },
@@ -1078,8 +1225,8 @@
1078
  },
1079
  {
1080
  "actual": {
1081
- "model_output.classification.iab_content.mapping_mode": "exact",
1082
- "model_output.classification.iab_content.tier1.label": "Style & Fashion"
1083
  },
1084
  "expected": {
1085
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1088,9 +1235,9 @@
1088
  "id": "style-fashion-parent-easy",
1089
  "mismatches": [
1090
  {
1091
- "actual": "exact",
1092
- "expected": "nearest_equivalent",
1093
- "path": "model_output.classification.iab_content.mapping_mode"
1094
  }
1095
  ],
1096
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion.",
@@ -1144,9 +1291,9 @@
1144
  },
1145
  {
1146
  "actual": {
1147
- "model_output.classification.iab_content.mapping_mode": "exact",
1148
- "model_output.classification.iab_content.tier1.label": "Sports",
1149
- "model_output.classification.iab_content.tier2.label": "Bodybuilding",
1150
  "model_output.classification.iab_content.tier3.label": null
1151
  },
1152
  "expected": {
@@ -1158,12 +1305,12 @@
1158
  "id": "womens-shoes-easy",
1159
  "mismatches": [
1160
  {
1161
- "actual": "Sports",
1162
- "expected": "Style & Fashion",
1163
- "path": "model_output.classification.iab_content.tier1.label"
1164
  },
1165
  {
1166
- "actual": "Bodybuilding",
1167
  "expected": "Women's Fashion",
1168
  "path": "model_output.classification.iab_content.tier2.label"
1169
  },
@@ -1180,9 +1327,9 @@
1180
  },
1181
  {
1182
  "actual": {
1183
- "model_output.classification.iab_content.mapping_mode": "exact",
1184
- "model_output.classification.iab_content.tier1.label": "Sports",
1185
- "model_output.classification.iab_content.tier2.label": "Walking",
1186
  "model_output.classification.iab_content.tier3.label": null
1187
  },
1188
  "expected": {
@@ -1194,12 +1341,12 @@
1194
  "id": "womens-shoes-medium",
1195
  "mismatches": [
1196
  {
1197
- "actual": "Sports",
1198
- "expected": "Style & Fashion",
1199
- "path": "model_output.classification.iab_content.tier1.label"
1200
  },
1201
  {
1202
- "actual": "Walking",
1203
  "expected": "Women's Fashion",
1204
  "path": "model_output.classification.iab_content.tier2.label"
1205
  },
@@ -1238,8 +1385,8 @@
1238
  "actual": {
1239
  "model_output.classification.iab_content.mapping_mode": "exact",
1240
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1241
- "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1242
- "model_output.classification.iab_content.tier3.label": null
1243
  },
1244
  "expected": {
1245
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1250,12 +1397,12 @@
1250
  "id": "mens-shoes-easy",
1251
  "mismatches": [
1252
  {
1253
- "actual": "Children's Clothing",
1254
  "expected": "Men's Fashion",
1255
  "path": "model_output.classification.iab_content.tier2.label"
1256
  },
1257
  {
1258
- "actual": null,
1259
  "expected": "Men's Shoes and Footwear",
1260
  "path": "model_output.classification.iab_content.tier3.label"
1261
  }
@@ -1269,8 +1416,8 @@
1269
  "actual": {
1270
  "model_output.classification.iab_content.mapping_mode": "exact",
1271
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1272
- "model_output.classification.iab_content.tier2.label": "Men's Fashion",
1273
- "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1274
  },
1275
  "expected": {
1276
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1279,9 +1426,20 @@
1279
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1280
  },
1281
  "id": "mens-shoes-medium",
1282
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1283
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
1284
- "pass": true,
1285
  "status": "must_fix",
1286
  "text": "Good men's dress shoes for office use"
1287
  },
@@ -1289,8 +1447,8 @@
1289
  "actual": {
1290
  "model_output.classification.iab_content.mapping_mode": "exact",
1291
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1292
- "model_output.classification.iab_content.tier2.label": "Children's Clothing",
1293
- "model_output.classification.iab_content.tier3.label": null
1294
  },
1295
  "expected": {
1296
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1301,12 +1459,12 @@
1301
  "id": "mens-shoes-hard",
1302
  "mismatches": [
1303
  {
1304
- "actual": "Children's Clothing",
1305
  "expected": "Men's Fashion",
1306
  "path": "model_output.classification.iab_content.tier2.label"
1307
  },
1308
  {
1309
- "actual": null,
1310
  "expected": "Men's Shoes and Footwear",
1311
  "path": "model_output.classification.iab_content.tier3.label"
1312
  }
@@ -1338,10 +1496,10 @@
1338
  },
1339
  {
1340
  "actual": {
1341
- "model_output.classification.iab_content.mapping_mode": "exact",
1342
  "model_output.classification.iab_content.tier1.label": "Travel",
1343
- "model_output.classification.iab_content.tier2.label": "Travel Type",
1344
- "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1345
  },
1346
  "expected": {
1347
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1350,9 +1508,25 @@
1350
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1351
  },
1352
  "id": "hotels-medium",
1353
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1354
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1355
- "pass": true,
1356
  "status": "must_fix",
1357
  "text": "Best hotels near Times Square for a weekend trip"
1358
  },
@@ -1360,7 +1534,7 @@
1360
  "actual": {
1361
  "model_output.classification.iab_content.mapping_mode": "exact",
1362
  "model_output.classification.iab_content.tier1.label": "Travel",
1363
- "model_output.classification.iab_content.tier2.label": "Travel Type",
1364
  "model_output.classification.iab_content.tier3.label": null
1365
  },
1366
  "expected": {
@@ -1371,6 +1545,11 @@
1371
  },
1372
  "id": "hotels-hard",
1373
  "mismatches": [
 
 
 
 
 
1374
  {
1375
  "actual": null,
1376
  "expected": "Hotels and Motels",
@@ -1457,8 +1636,8 @@
1457
  "actual": {
1458
  "model_output.classification.iab_content.mapping_mode": "exact",
1459
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1460
- "model_output.classification.iab_content.tier2.label": "Fitness and Exercise",
1461
- "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1462
  },
1463
  "expected": {
1464
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1467,9 +1646,20 @@
1467
  "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1468
  },
1469
  "id": "running-and-jogging-easy",
1470
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1471
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
1472
- "pass": true,
1473
  "status": "must_fix",
1474
  "text": "Best running plan for a first 10k"
1475
  },
@@ -1477,7 +1667,7 @@
1477
  "actual": {
1478
  "model_output.classification.iab_content.mapping_mode": "exact",
1479
  "model_output.classification.iab_content.tier1.label": "Sports",
1480
- "model_output.classification.iab_content.tier2.label": "Walking",
1481
  "model_output.classification.iab_content.tier3.label": null
1482
  },
1483
  "expected": {
@@ -1494,7 +1684,7 @@
1494
  "path": "model_output.classification.iab_content.tier1.label"
1495
  },
1496
  {
1497
- "actual": "Walking",
1498
  "expected": "Fitness and Exercise",
1499
  "path": "model_output.classification.iab_content.tier2.label"
1500
  },
@@ -1630,9 +1820,9 @@
1630
  },
1631
  {
1632
  "actual": {
1633
- "model_output.classification.iab_content.mapping_mode": "exact",
1634
  "model_output.classification.iab_content.tier1.label": "Travel",
1635
- "model_output.classification.iab_content.tier2.label": "Travel Type"
1636
  },
1637
  "expected": {
1638
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -1647,12 +1837,7 @@
1647
  "path": "model_output.classification.iab_content.tier1.label"
1648
  },
1649
  {
1650
- "actual": "exact",
1651
- "expected": "nearest_equivalent",
1652
- "path": "model_output.classification.iab_content.mapping_mode"
1653
- },
1654
- {
1655
- "actual": "Travel Type",
1656
  "expected": "Fiction",
1657
  "path": "model_output.classification.iab_content.tier2.label"
1658
  }
@@ -1665,8 +1850,8 @@
1665
  {
1666
  "actual": {
1667
  "model_output.classification.iab_content.mapping_mode": "exact",
1668
- "model_output.classification.iab_content.tier1.label": "Books and Literature",
1669
- "model_output.classification.iab_content.tier2.label": "Fiction"
1670
  },
1671
  "expected": {
1672
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1674,9 +1859,20 @@
1674
  "model_output.classification.iab_content.tier2.label": "Fiction"
1675
  },
1676
  "id": "fiction-hard",
1677
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1678
  "notes": "Cross-vertical hard IAB mapping case for Books and Literature > Fiction.",
1679
- "pass": true,
1680
  "status": "must_fix",
1681
  "text": "Looking for a character-driven novel, not comics or poetry"
1682
  },
@@ -1684,7 +1880,7 @@
1684
  "actual": {
1685
  "model_output.classification.iab_content.mapping_mode": "exact",
1686
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1687
- "model_output.classification.iab_content.tier2.label": "Interior Decorating"
1688
  },
1689
  "expected": {
1690
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1694,7 +1890,7 @@
1694
  "id": "home-improvement-easy",
1695
  "mismatches": [
1696
  {
1697
- "actual": "Interior Decorating",
1698
  "expected": "Home Improvement",
1699
  "path": "model_output.classification.iab_content.tier2.label"
1700
  }
@@ -1707,8 +1903,8 @@
1707
  {
1708
  "actual": {
1709
  "model_output.classification.iab_content.mapping_mode": "exact",
1710
- "model_output.classification.iab_content.tier1.label": "Home & Garden",
1711
- "model_output.classification.iab_content.tier2.label": "Interior Decorating"
1712
  },
1713
  "expected": {
1714
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1718,7 +1914,12 @@
1718
  "id": "home-improvement-medium",
1719
  "mismatches": [
1720
  {
1721
- "actual": "Interior Decorating",
 
 
 
 
 
1722
  "expected": "Home Improvement",
1723
  "path": "model_output.classification.iab_content.tier2.label"
1724
  }
@@ -1759,9 +1960,9 @@
1759
  },
1760
  {
1761
  "actual": {
1762
- "model_output.classification.iab_content.mapping_mode": "exact",
1763
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1764
- "model_output.classification.iab_content.tier2.label": "Augmented Reality"
1765
  },
1766
  "expected": {
1767
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -1771,12 +1972,17 @@
1771
  "id": "online-education-easy",
1772
  "mismatches": [
1773
  {
1774
- "actual": "Technology & Computing",
1775
  "expected": "Education",
1776
  "path": "model_output.classification.iab_content.tier1.label"
1777
  },
1778
  {
1779
- "actual": "Augmented Reality",
 
 
 
 
 
1780
  "expected": "Online Education",
1781
  "path": "model_output.classification.iab_content.tier2.label"
1782
  }
@@ -1906,23 +2112,34 @@
1906
  },
1907
  {
1908
  "actual": {
1909
- "model_output.classification.iab_content.mapping_mode": "exact",
1910
- "model_output.classification.iab_content.tier1.label": "Medical Health"
1911
  },
1912
  "expected": {
1913
  "model_output.classification.iab_content.mapping_mode": "exact",
1914
  "model_output.classification.iab_content.tier1.label": "Medical Health"
1915
  },
1916
  "id": "medical-health-easy",
1917
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
1918
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
1919
- "pass": true,
1920
  "status": "must_fix",
1921
  "text": "what do these allergy symptoms mean"
1922
  },
1923
  {
1924
  "actual": {
1925
- "model_output.classification.iab_content.mapping_mode": "exact",
1926
  "model_output.classification.iab_content.tier1.label": "Medical Health"
1927
  },
1928
  "expected": {
@@ -1930,9 +2147,15 @@
1930
  "model_output.classification.iab_content.tier1.label": "Medical Health"
1931
  },
1932
  "id": "medical-health-medium",
1933
- "mismatches": [],
 
 
 
 
 
 
1934
  "notes": "Cross-vertical medium IAB mapping case for Medical Health.",
1935
- "pass": true,
1936
  "status": "must_fix",
1937
  "text": "when should i see a doctor for persistent knee pain"
1938
  },
@@ -2036,9 +2259,9 @@
2036
  },
2037
  {
2038
  "actual": {
2039
- "model_output.classification.iab_content.mapping_mode": "exact",
2040
  "model_output.classification.iab_content.tier1.label": "Holidays",
2041
- "model_output.classification.iab_content.tier2.label": "National & Civic Holidays"
2042
  },
2043
  "expected": {
2044
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2053,7 +2276,12 @@
2053
  "path": "model_output.classification.iab_content.tier1.label"
2054
  },
2055
  {
2056
- "actual": "National & Civic Holidays",
 
 
 
 
 
2057
  "expected": "Financial Planning",
2058
  "path": "model_output.classification.iab_content.tier2.label"
2059
  }
@@ -2131,9 +2359,9 @@
2131
  },
2132
  {
2133
  "actual": {
2134
- "model_output.classification.iab_content.mapping_mode": "exact",
2135
- "model_output.classification.iab_content.tier1.label": "Genres",
2136
- "model_output.classification.iab_content.tier2.label": "Family/Children"
2137
  },
2138
  "expected": {
2139
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2143,12 +2371,17 @@
2143
  "id": "parenting-medium",
2144
  "mismatches": [
2145
  {
2146
- "actual": "Genres",
2147
  "expected": "Family and Relationships",
2148
  "path": "model_output.classification.iab_content.tier1.label"
2149
  },
2150
  {
2151
- "actual": "Family/Children",
 
 
 
 
 
2152
  "expected": "Parenting",
2153
  "path": "model_output.classification.iab_content.tier2.label"
2154
  }
@@ -2234,7 +2467,7 @@
2234
  "actual": {
2235
  "model_output.classification.iab_content.mapping_mode": "exact",
2236
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2237
- "model_output.classification.iab_content.tier2.label": "Movies"
2238
  },
2239
  "expected": {
2240
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2242,17 +2475,23 @@
2242
  "model_output.classification.iab_content.tier2.label": "Movies"
2243
  },
2244
  "id": "movies-easy",
2245
- "mismatches": [],
 
 
 
 
 
 
2246
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
2247
- "pass": true,
2248
  "status": "must_fix",
2249
  "text": "What movie should we watch tonight?"
2250
  },
2251
  {
2252
  "actual": {
2253
  "model_output.classification.iab_content.mapping_mode": "exact",
2254
- "model_output.classification.iab_content.tier1.label": "Entertainment",
2255
- "model_output.classification.iab_content.tier2.label": "Movies"
2256
  },
2257
  "expected": {
2258
  "model_output.classification.iab_content.mapping_mode": "exact",
@@ -2260,9 +2499,20 @@
2260
  "model_output.classification.iab_content.tier2.label": "Movies"
2261
  },
2262
  "id": "movies-medium",
2263
- "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
2264
  "notes": "Cross-vertical medium IAB mapping case for Entertainment > Movies.",
2265
- "pass": true,
2266
  "status": "must_fix",
2267
  "text": "Best thriller movies from the last few years"
2268
  },
 
1
  {
2
  "by_status": {
3
  "must_fix": {
4
+ "failed": 64,
5
+ "passed": 26,
6
  "total": 90
7
  }
8
  },
9
  "cases_path": "/content/agentic-intent-classifier/examples/iab_cross_vertical_mapping_cases.json",
10
  "count": 90,
11
+ "failed": 64,
12
+ "passed": 26,
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
+ "model_output.classification.iab_content.tier1.label": "Travel",
18
+ "model_output.classification.iab_content.tier2.label": "Travel Type"
19
  },
20
  "expected": {
21
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
25
  "id": "auto-buying-easy",
26
  "mismatches": [
27
  {
28
+ "actual": "Travel",
29
  "expected": "Automotive",
30
  "path": "model_output.classification.iab_content.tier1.label"
31
  },
32
  {
33
+ "actual": "Travel Type",
 
 
 
 
 
34
  "expected": "Auto Buying and Selling",
35
  "path": "model_output.classification.iab_content.tier2.label"
36
  }
 
42
  },
43
  {
44
  "actual": {
45
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
46
  "model_output.classification.iab_content.tier1.label": "Automotive",
47
  "model_output.classification.iab_content.tier2.label": "Auto Body Styles"
48
  },
 
53
  },
54
  "id": "auto-buying-medium",
55
  "mismatches": [
 
 
 
 
 
56
  {
57
  "actual": "Auto Body Styles",
58
  "expected": "Auto Buying and Selling",
 
66
  },
67
  {
68
  "actual": {
69
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
70
  "model_output.classification.iab_content.tier1.label": "Automotive",
71
+ "model_output.classification.iab_content.tier2.label": null
72
  },
73
  "expected": {
74
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
78
  "id": "auto-buying-hard",
79
  "mismatches": [
80
  {
81
+ "actual": null,
 
 
 
 
 
82
  "expected": "Auto Buying and Selling",
83
  "path": "model_output.classification.iab_content.tier2.label"
84
  }
 
92
  "actual": {
93
  "model_output.classification.iab_content.mapping_mode": "exact",
94
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
95
+ "model_output.classification.iab_content.tier2.label": null,
96
+ "model_output.classification.iab_content.tier3.label": null
97
  },
98
  "expected": {
99
  "model_output.classification.iab_content.mapping_mode": "exact",
 
109
  "path": "model_output.classification.iab_content.tier1.label"
110
  },
111
  {
112
+ "actual": null,
113
  "expected": "Business",
114
  "path": "model_output.classification.iab_content.tier2.label"
115
  },
116
  {
117
+ "actual": null,
118
  "expected": "Sales",
119
  "path": "model_output.classification.iab_content.tier3.label"
120
  }
 
126
  },
127
  {
128
  "actual": {
129
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
130
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
131
+ "model_output.classification.iab_content.tier2.label": null,
132
  "model_output.classification.iab_content.tier3.label": null
133
  },
134
  "expected": {
 
145
  "path": "model_output.classification.iab_content.tier1.label"
146
  },
147
  {
148
+ "actual": "nearest_equivalent",
149
+ "expected": "exact",
150
+ "path": "model_output.classification.iab_content.mapping_mode"
151
+ },
152
+ {
153
+ "actual": null,
154
  "expected": "Business",
155
  "path": "model_output.classification.iab_content.tier2.label"
156
  },
 
170
  "model_output.classification.iab_content.mapping_mode": "exact",
171
  "model_output.classification.iab_content.tier1.label": "Business and Finance",
172
  "model_output.classification.iab_content.tier2.label": "Business",
173
+ "model_output.classification.iab_content.tier3.label": "Startups"
174
  },
175
  "expected": {
176
  "model_output.classification.iab_content.mapping_mode": "exact",
 
179
  "model_output.classification.iab_content.tier3.label": "Sales"
180
  },
181
  "id": "sales-crm-hard",
182
+ "mismatches": [
183
+ {
184
+ "actual": "Startups",
185
+ "expected": "Sales",
186
+ "path": "model_output.classification.iab_content.tier3.label"
187
+ }
188
+ ],
189
  "notes": "Cross-vertical hard IAB mapping case for Business and Finance > Business > Sales.",
190
+ "pass": false,
191
  "status": "must_fix",
192
  "text": "Need software to manage leads and pipeline for a startup sales team"
193
  },
194
  {
195
  "actual": {
196
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
197
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
198
+ "model_output.classification.iab_content.tier2.label": null,
199
  "model_output.classification.iab_content.tier3.label": null
200
  },
201
  "expected": {
 
207
  "id": "marketing-tools-easy",
208
  "mismatches": [
209
  {
210
+ "actual": "Technology & Computing",
211
  "expected": "Business and Finance",
212
  "path": "model_output.classification.iab_content.tier1.label"
213
  },
214
  {
215
+ "actual": "nearest_equivalent",
216
+ "expected": "exact",
217
+ "path": "model_output.classification.iab_content.mapping_mode"
218
+ },
219
+ {
220
+ "actual": null,
221
  "expected": "Business",
222
  "path": "model_output.classification.iab_content.tier2.label"
223
  },
 
234
  },
235
  {
236
  "actual": {
237
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
238
+ "model_output.classification.iab_content.tier1.label": "Technology & Computing",
239
+ "model_output.classification.iab_content.tier2.label": null,
240
  "model_output.classification.iab_content.tier3.label": null
241
  },
242
  "expected": {
 
248
  "id": "marketing-tools-medium",
249
  "mismatches": [
250
  {
251
+ "actual": "Technology & Computing",
252
  "expected": "Business and Finance",
253
  "path": "model_output.classification.iab_content.tier1.label"
254
  },
255
  {
256
+ "actual": "nearest_equivalent",
257
+ "expected": "exact",
258
+ "path": "model_output.classification.iab_content.mapping_mode"
259
+ },
260
+ {
261
+ "actual": null,
262
  "expected": "Business",
263
  "path": "model_output.classification.iab_content.tier2.label"
264
  },
 
275
  },
276
  {
277
  "actual": {
278
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
279
+ "model_output.classification.iab_content.tier1.label": "Careers",
280
+ "model_output.classification.iab_content.tier2.label": null,
281
  "model_output.classification.iab_content.tier3.label": null
282
  },
283
  "expected": {
 
289
  "id": "marketing-tools-hard",
290
  "mismatches": [
291
  {
292
+ "actual": "Careers",
293
  "expected": "Business and Finance",
294
  "path": "model_output.classification.iab_content.tier1.label"
295
  },
296
  {
297
+ "actual": "nearest_equivalent",
298
+ "expected": "exact",
299
+ "path": "model_output.classification.iab_content.mapping_mode"
300
+ },
301
+ {
302
+ "actual": null,
303
  "expected": "Business",
304
  "path": "model_output.classification.iab_content.tier2.label"
305
  },
 
316
  },
317
  {
318
  "actual": {
319
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
320
+ "model_output.classification.iab_content.tier1.label": "Careers",
321
+ "model_output.classification.iab_content.tier2.label": null,
322
+ "model_output.classification.iab_content.tier3.label": null
323
  },
324
  "expected": {
325
  "model_output.classification.iab_content.mapping_mode": "exact",
 
330
  "id": "business-it-easy",
331
  "mismatches": [
332
  {
333
+ "actual": "Careers",
334
  "expected": "Business and Finance",
335
  "path": "model_output.classification.iab_content.tier1.label"
336
  },
337
  {
338
+ "actual": "nearest_equivalent",
339
+ "expected": "exact",
340
+ "path": "model_output.classification.iab_content.mapping_mode"
341
+ },
342
+ {
343
+ "actual": null,
344
  "expected": "Business",
345
  "path": "model_output.classification.iab_content.tier2.label"
346
  },
347
  {
348
+ "actual": null,
349
  "expected": "Business I.T.",
350
  "path": "model_output.classification.iab_content.tier3.label"
351
  }
 
357
  },
358
  {
359
  "actual": {
360
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
361
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
362
+ "model_output.classification.iab_content.tier2.label": null,
363
  "model_output.classification.iab_content.tier3.label": null
364
  },
365
  "expected": {
 
371
  "id": "business-it-medium",
372
  "mismatches": [
373
  {
374
+ "actual": "Personal Finance",
375
  "expected": "Business and Finance",
376
  "path": "model_output.classification.iab_content.tier1.label"
377
  },
378
  {
379
+ "actual": "nearest_equivalent",
380
+ "expected": "exact",
381
+ "path": "model_output.classification.iab_content.mapping_mode"
382
+ },
383
+ {
384
+ "actual": null,
385
  "expected": "Business",
386
  "path": "model_output.classification.iab_content.tier2.label"
387
  },
 
453
  {
454
  "actual": {
455
  "model_output.classification.iab_content.mapping_mode": "exact",
456
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
457
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
458
  },
459
  "expected": {
460
  "model_output.classification.iab_content.mapping_mode": "exact",
 
462
  "model_output.classification.iab_content.tier2.label": "Dining Out"
463
  },
464
  "id": "dining-out-medium",
465
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
466
  "notes": "Cross-vertical medium IAB mapping case for Food & Drink > Dining Out.",
467
+ "pass": true,
468
  "status": "must_fix",
469
  "text": "Good restaurants for a client dinner downtown"
470
  },
471
  {
472
  "actual": {
473
  "model_output.classification.iab_content.mapping_mode": "exact",
474
+ "model_output.classification.iab_content.tier1.label": "Food & Drink",
475
+ "model_output.classification.iab_content.tier2.label": "Dining Out"
476
  },
477
  "expected": {
478
  "model_output.classification.iab_content.mapping_mode": "exact",
 
480
  "model_output.classification.iab_content.tier2.label": "Dining Out"
481
  },
482
  "id": "dining-out-hard",
483
+ "mismatches": [],
 
 
 
 
 
 
 
 
 
 
 
484
  "notes": "Cross-vertical hard IAB mapping case for Food & Drink > Dining Out.",
485
+ "pass": true,
486
  "status": "must_fix",
487
  "text": "Need a place to eat tonight where I can make a reservation online"
488
  },
 
549
  {
550
  "actual": {
551
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
552
+ "model_output.classification.iab_content.tier1.label": "Real Estate",
553
  "model_output.classification.iab_content.tier2.label": null
554
  },
555
  "expected": {
 
560
  "id": "artificial-intelligence-easy",
561
  "mismatches": [
562
  {
563
+ "actual": "Real Estate",
564
  "expected": "Technology & Computing",
565
  "path": "model_output.classification.iab_content.tier1.label"
566
  },
 
582
  },
583
  {
584
  "actual": {
585
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
586
  "model_output.classification.iab_content.tier1.label": "Education",
587
+ "model_output.classification.iab_content.tier2.label": null
588
  },
589
  "expected": {
590
  "model_output.classification.iab_content.mapping_mode": "exact",
 
599
  "path": "model_output.classification.iab_content.tier1.label"
600
  },
601
  {
602
+ "actual": "nearest_equivalent",
603
+ "expected": "exact",
604
+ "path": "model_output.classification.iab_content.mapping_mode"
605
+ },
606
+ {
607
+ "actual": null,
608
  "expected": "Artificial Intelligence",
609
  "path": "model_output.classification.iab_content.tier2.label"
610
  }
 
646
  {
647
  "actual": {
648
  "model_output.classification.iab_content.mapping_mode": "exact",
649
+ "model_output.classification.iab_content.tier1.label": "Careers",
650
+ "model_output.classification.iab_content.tier2.label": "Job Search",
651
+ "model_output.classification.iab_content.tier3.label": null
652
  },
653
  "expected": {
654
  "model_output.classification.iab_content.mapping_mode": "exact",
 
657
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
658
  },
659
  "id": "software-apps-easy",
660
+ "mismatches": [
661
+ {
662
+ "actual": "Careers",
663
+ "expected": "Technology & Computing",
664
+ "path": "model_output.classification.iab_content.tier1.label"
665
+ },
666
+ {
667
+ "actual": "Job Search",
668
+ "expected": "Computing",
669
+ "path": "model_output.classification.iab_content.tier2.label"
670
+ },
671
+ {
672
+ "actual": null,
673
+ "expected": "Software and Applications",
674
+ "path": "model_output.classification.iab_content.tier3.label"
675
+ }
676
+ ],
677
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
678
+ "pass": false,
679
  "status": "must_fix",
680
  "text": "Best workflow software for a small operations team"
681
  },
 
683
  "actual": {
684
  "model_output.classification.iab_content.mapping_mode": "exact",
685
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
686
+ "model_output.classification.iab_content.tier2.label": null,
687
+ "model_output.classification.iab_content.tier3.label": null
688
  },
689
  "expected": {
690
  "model_output.classification.iab_content.mapping_mode": "exact",
 
693
  "model_output.classification.iab_content.tier3.label": "Software and Applications"
694
  },
695
  "id": "software-apps-medium",
696
+ "mismatches": [
697
+ {
698
+ "actual": null,
699
+ "expected": "Computing",
700
+ "path": "model_output.classification.iab_content.tier2.label"
701
+ },
702
+ {
703
+ "actual": null,
704
+ "expected": "Software and Applications",
705
+ "path": "model_output.classification.iab_content.tier3.label"
706
+ }
707
+ ],
708
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Computer Software and Applications.",
709
+ "pass": false,
710
  "status": "must_fix",
711
  "text": "Need project management software for a distributed team"
712
  },
713
  {
714
  "actual": {
715
  "model_output.classification.iab_content.mapping_mode": "exact",
716
+ "model_output.classification.iab_content.tier1.label": "Careers",
717
+ "model_output.classification.iab_content.tier2.label": "Job Search",
718
  "model_output.classification.iab_content.tier3.label": null
719
  },
720
  "expected": {
 
726
  "id": "software-apps-hard",
727
  "mismatches": [
728
  {
729
+ "actual": "Careers",
730
+ "expected": "Technology & Computing",
731
+ "path": "model_output.classification.iab_content.tier1.label"
732
+ },
733
+ {
734
+ "actual": "Job Search",
735
  "expected": "Computing",
736
  "path": "model_output.classification.iab_content.tier2.label"
737
  },
 
791
  },
792
  {
793
  "actual": {
794
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
795
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
796
  "model_output.classification.iab_content.tier2.label": "Computing",
797
+ "model_output.classification.iab_content.tier3.label": null,
798
  "model_output.classification.iab_content.tier4.label": null
799
  },
800
  "expected": {
 
807
  "id": "communication-software-medium",
808
  "mismatches": [
809
  {
810
+ "actual": "nearest_equivalent",
811
+ "expected": "exact",
812
+ "path": "model_output.classification.iab_content.mapping_mode"
813
+ },
814
+ {
815
+ "actual": null,
816
  "expected": "Software and Applications",
817
  "path": "model_output.classification.iab_content.tier3.label"
818
  },
 
829
  },
830
  {
831
  "actual": {
832
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
833
+ "model_output.classification.iab_content.tier1.label": "Careers",
834
+ "model_output.classification.iab_content.tier2.label": null,
835
  "model_output.classification.iab_content.tier3.label": null,
836
  "model_output.classification.iab_content.tier4.label": null
837
  },
 
845
  "id": "communication-software-hard",
846
  "mismatches": [
847
  {
848
+ "actual": "Careers",
849
+ "expected": "Technology & Computing",
850
+ "path": "model_output.classification.iab_content.tier1.label"
851
+ },
852
+ {
853
+ "actual": "nearest_equivalent",
854
+ "expected": "exact",
855
+ "path": "model_output.classification.iab_content.mapping_mode"
856
+ },
857
+ {
858
+ "actual": null,
859
  "expected": "Computing",
860
  "path": "model_output.classification.iab_content.tier2.label"
861
  },
 
880
  "model_output.classification.iab_content.mapping_mode": "exact",
881
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
882
  "model_output.classification.iab_content.tier2.label": "Computing",
883
+ "model_output.classification.iab_content.tier3.label": "Data Storage and Warehousing",
884
+ "model_output.classification.iab_content.tier4.label": null
885
  },
886
  "expected": {
887
  "model_output.classification.iab_content.mapping_mode": "exact",
 
891
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
892
  },
893
  "id": "web-hosting-easy",
894
+ "mismatches": [
895
+ {
896
+ "actual": "Data Storage and Warehousing",
897
+ "expected": "Internet",
898
+ "path": "model_output.classification.iab_content.tier3.label"
899
+ },
900
+ {
901
+ "actual": null,
902
+ "expected": "Web Hosting",
903
+ "path": "model_output.classification.iab_content.tier4.label"
904
+ }
905
+ ],
906
  "notes": "Cross-vertical easy IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
907
+ "pass": false,
908
  "status": "must_fix",
909
  "text": "Vercel vs Netlify for website hosting"
910
  },
911
  {
912
  "actual": {
913
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
914
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
915
+ "model_output.classification.iab_content.tier2.label": null,
916
+ "model_output.classification.iab_content.tier3.label": null,
917
+ "model_output.classification.iab_content.tier4.label": null
918
  },
919
  "expected": {
920
  "model_output.classification.iab_content.mapping_mode": "exact",
 
924
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
925
  },
926
  "id": "web-hosting-medium",
927
+ "mismatches": [
928
+ {
929
+ "actual": "nearest_equivalent",
930
+ "expected": "exact",
931
+ "path": "model_output.classification.iab_content.mapping_mode"
932
+ },
933
+ {
934
+ "actual": null,
935
+ "expected": "Computing",
936
+ "path": "model_output.classification.iab_content.tier2.label"
937
+ },
938
+ {
939
+ "actual": null,
940
+ "expected": "Internet",
941
+ "path": "model_output.classification.iab_content.tier3.label"
942
+ },
943
+ {
944
+ "actual": null,
945
+ "expected": "Web Hosting",
946
+ "path": "model_output.classification.iab_content.tier4.label"
947
+ }
948
+ ],
949
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
950
+ "pass": false,
951
  "status": "must_fix",
952
  "text": "Best hosting platform for a startup website"
953
  },
954
  {
955
  "actual": {
956
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
957
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
958
  "model_output.classification.iab_content.tier2.label": "Computing",
959
+ "model_output.classification.iab_content.tier3.label": null,
960
+ "model_output.classification.iab_content.tier4.label": null
961
  },
962
  "expected": {
963
  "model_output.classification.iab_content.mapping_mode": "exact",
 
967
  "model_output.classification.iab_content.tier4.label": "Web Hosting"
968
  },
969
  "id": "web-hosting-hard",
970
+ "mismatches": [
971
+ {
972
+ "actual": "nearest_equivalent",
973
+ "expected": "exact",
974
+ "path": "model_output.classification.iab_content.mapping_mode"
975
+ },
976
+ {
977
+ "actual": null,
978
+ "expected": "Internet",
979
+ "path": "model_output.classification.iab_content.tier3.label"
980
+ },
981
+ {
982
+ "actual": null,
983
+ "expected": "Web Hosting",
984
+ "path": "model_output.classification.iab_content.tier4.label"
985
+ }
986
+ ],
987
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Internet > Web Hosting.",
988
+ "pass": false,
989
  "status": "must_fix",
990
  "text": "Need a managed hosting provider to deploy and run our marketing site"
991
  },
 
1033
  "actual": {
1034
  "model_output.classification.iab_content.mapping_mode": "exact",
1035
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1036
+ "model_output.classification.iab_content.tier2.label": "Consumer Electronics",
1037
+ "model_output.classification.iab_content.tier3.label": null
1038
  },
1039
  "expected": {
1040
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1043
  "model_output.classification.iab_content.tier3.label": "Laptops"
1044
  },
1045
  "id": "laptops-hard",
1046
+ "mismatches": [
1047
+ {
1048
+ "actual": "Consumer Electronics",
1049
+ "expected": "Computing",
1050
+ "path": "model_output.classification.iab_content.tier2.label"
1051
+ },
1052
+ {
1053
+ "actual": null,
1054
+ "expected": "Laptops",
1055
+ "path": "model_output.classification.iab_content.tier3.label"
1056
+ }
1057
+ ],
1058
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Laptops.",
1059
+ "pass": false,
1060
  "status": "must_fix",
1061
  "text": "Need a portable computer with good battery life for everyday work"
1062
  },
1063
  {
1064
  "actual": {
1065
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1066
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1067
+ "model_output.classification.iab_content.tier2.label": null,
1068
+ "model_output.classification.iab_content.tier3.label": null
1069
  },
1070
  "expected": {
1071
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1076
  "id": "desktops-easy",
1077
  "mismatches": [
1078
  {
1079
+ "actual": "nearest_equivalent",
1080
+ "expected": "exact",
1081
+ "path": "model_output.classification.iab_content.mapping_mode"
1082
+ },
1083
+ {
1084
+ "actual": null,
1085
+ "expected": "Computing",
1086
+ "path": "model_output.classification.iab_content.tier2.label"
1087
+ },
1088
+ {
1089
+ "actual": null,
1090
  "expected": "Desktops",
1091
  "path": "model_output.classification.iab_content.tier3.label"
1092
  }
 
1098
  },
1099
  {
1100
  "actual": {
1101
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1102
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1103
  "model_output.classification.iab_content.tier2.label": "Computing",
1104
+ "model_output.classification.iab_content.tier3.label": null
1105
  },
1106
  "expected": {
1107
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1110
  "model_output.classification.iab_content.tier3.label": "Desktops"
1111
  },
1112
  "id": "desktops-medium",
1113
+ "mismatches": [
1114
+ {
1115
+ "actual": "nearest_equivalent",
1116
+ "expected": "exact",
1117
+ "path": "model_output.classification.iab_content.mapping_mode"
1118
+ },
1119
+ {
1120
+ "actual": null,
1121
+ "expected": "Desktops",
1122
+ "path": "model_output.classification.iab_content.tier3.label"
1123
+ }
1124
+ ],
1125
  "notes": "Cross-vertical medium IAB mapping case for Technology & Computing > Computing > Desktops.",
1126
+ "pass": false,
1127
  "status": "must_fix",
1128
  "text": "Which desktop computer should I buy for a home office?"
1129
  },
1130
  {
1131
  "actual": {
1132
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1133
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
1134
+ "model_output.classification.iab_content.tier2.label": null,
1135
+ "model_output.classification.iab_content.tier3.label": null
1136
  },
1137
  "expected": {
1138
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1141
  "model_output.classification.iab_content.tier3.label": "Desktops"
1142
  },
1143
  "id": "desktops-hard",
1144
+ "mismatches": [
1145
+ {
1146
+ "actual": "nearest_equivalent",
1147
+ "expected": "exact",
1148
+ "path": "model_output.classification.iab_content.mapping_mode"
1149
+ },
1150
+ {
1151
+ "actual": null,
1152
+ "expected": "Computing",
1153
+ "path": "model_output.classification.iab_content.tier2.label"
1154
+ },
1155
+ {
1156
+ "actual": null,
1157
+ "expected": "Desktops",
1158
+ "path": "model_output.classification.iab_content.tier3.label"
1159
+ }
1160
+ ],
1161
  "notes": "Cross-vertical hard IAB mapping case for Technology & Computing > Computing > Desktops.",
1162
+ "pass": false,
1163
  "status": "must_fix",
1164
  "text": "Need a desktop PC with strong performance for creative work"
1165
  },
 
1225
  },
1226
  {
1227
  "actual": {
1228
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1229
+ "model_output.classification.iab_content.tier1.label": "Shopping"
1230
  },
1231
  "expected": {
1232
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1235
  "id": "style-fashion-parent-easy",
1236
  "mismatches": [
1237
  {
1238
+ "actual": "Shopping",
1239
+ "expected": "Style & Fashion",
1240
+ "path": "model_output.classification.iab_content.tier1.label"
1241
  }
1242
  ],
1243
  "notes": "Cross-vertical easy IAB mapping case for Style & Fashion.",
 
1291
  },
1292
  {
1293
  "actual": {
1294
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1295
+ "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1296
+ "model_output.classification.iab_content.tier2.label": null,
1297
  "model_output.classification.iab_content.tier3.label": null
1298
  },
1299
  "expected": {
 
1305
  "id": "womens-shoes-easy",
1306
  "mismatches": [
1307
  {
1308
+ "actual": "nearest_equivalent",
1309
+ "expected": "exact",
1310
+ "path": "model_output.classification.iab_content.mapping_mode"
1311
  },
1312
  {
1313
+ "actual": null,
1314
  "expected": "Women's Fashion",
1315
  "path": "model_output.classification.iab_content.tier2.label"
1316
  },
 
1327
  },
1328
  {
1329
  "actual": {
1330
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1331
+ "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1332
+ "model_output.classification.iab_content.tier2.label": null,
1333
  "model_output.classification.iab_content.tier3.label": null
1334
  },
1335
  "expected": {
 
1341
  "id": "womens-shoes-medium",
1342
  "mismatches": [
1343
  {
1344
+ "actual": "nearest_equivalent",
1345
+ "expected": "exact",
1346
+ "path": "model_output.classification.iab_content.mapping_mode"
1347
  },
1348
  {
1349
+ "actual": null,
1350
  "expected": "Women's Fashion",
1351
  "path": "model_output.classification.iab_content.tier2.label"
1352
  },
 
1385
  "actual": {
1386
  "model_output.classification.iab_content.mapping_mode": "exact",
1387
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1388
+ "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1389
+ "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1390
  },
1391
  "expected": {
1392
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1397
  "id": "mens-shoes-easy",
1398
  "mismatches": [
1399
  {
1400
+ "actual": "Women's Fashion",
1401
  "expected": "Men's Fashion",
1402
  "path": "model_output.classification.iab_content.tier2.label"
1403
  },
1404
  {
1405
+ "actual": "Women's Shoes and Footwear",
1406
  "expected": "Men's Shoes and Footwear",
1407
  "path": "model_output.classification.iab_content.tier3.label"
1408
  }
 
1416
  "actual": {
1417
  "model_output.classification.iab_content.mapping_mode": "exact",
1418
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1419
+ "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1420
+ "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1421
  },
1422
  "expected": {
1423
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1426
  "model_output.classification.iab_content.tier3.label": "Men's Shoes and Footwear"
1427
  },
1428
  "id": "mens-shoes-medium",
1429
+ "mismatches": [
1430
+ {
1431
+ "actual": "Women's Fashion",
1432
+ "expected": "Men's Fashion",
1433
+ "path": "model_output.classification.iab_content.tier2.label"
1434
+ },
1435
+ {
1436
+ "actual": "Women's Shoes and Footwear",
1437
+ "expected": "Men's Shoes and Footwear",
1438
+ "path": "model_output.classification.iab_content.tier3.label"
1439
+ }
1440
+ ],
1441
  "notes": "Cross-vertical medium IAB mapping case for Style & Fashion > Men's Fashion > Men's Shoes and Footwear.",
1442
+ "pass": false,
1443
  "status": "must_fix",
1444
  "text": "Good men's dress shoes for office use"
1445
  },
 
1447
  "actual": {
1448
  "model_output.classification.iab_content.mapping_mode": "exact",
1449
  "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1450
+ "model_output.classification.iab_content.tier2.label": "Women's Fashion",
1451
+ "model_output.classification.iab_content.tier3.label": "Women's Shoes and Footwear"
1452
  },
1453
  "expected": {
1454
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1459
  "id": "mens-shoes-hard",
1460
  "mismatches": [
1461
  {
1462
+ "actual": "Women's Fashion",
1463
  "expected": "Men's Fashion",
1464
  "path": "model_output.classification.iab_content.tier2.label"
1465
  },
1466
  {
1467
+ "actual": "Women's Shoes and Footwear",
1468
  "expected": "Men's Shoes and Footwear",
1469
  "path": "model_output.classification.iab_content.tier3.label"
1470
  }
 
1496
  },
1497
  {
1498
  "actual": {
1499
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1500
  "model_output.classification.iab_content.tier1.label": "Travel",
1501
+ "model_output.classification.iab_content.tier2.label": null,
1502
+ "model_output.classification.iab_content.tier3.label": null
1503
  },
1504
  "expected": {
1505
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1508
  "model_output.classification.iab_content.tier3.label": "Hotels and Motels"
1509
  },
1510
  "id": "hotels-medium",
1511
+ "mismatches": [
1512
+ {
1513
+ "actual": "nearest_equivalent",
1514
+ "expected": "exact",
1515
+ "path": "model_output.classification.iab_content.mapping_mode"
1516
+ },
1517
+ {
1518
+ "actual": null,
1519
+ "expected": "Travel Type",
1520
+ "path": "model_output.classification.iab_content.tier2.label"
1521
+ },
1522
+ {
1523
+ "actual": null,
1524
+ "expected": "Hotels and Motels",
1525
+ "path": "model_output.classification.iab_content.tier3.label"
1526
+ }
1527
+ ],
1528
  "notes": "Cross-vertical medium IAB mapping case for Travel > Travel Type > Hotels and Motels.",
1529
+ "pass": false,
1530
  "status": "must_fix",
1531
  "text": "Best hotels near Times Square for a weekend trip"
1532
  },
 
1534
  "actual": {
1535
  "model_output.classification.iab_content.mapping_mode": "exact",
1536
  "model_output.classification.iab_content.tier1.label": "Travel",
1537
+ "model_output.classification.iab_content.tier2.label": null,
1538
  "model_output.classification.iab_content.tier3.label": null
1539
  },
1540
  "expected": {
 
1545
  },
1546
  "id": "hotels-hard",
1547
  "mismatches": [
1548
+ {
1549
+ "actual": null,
1550
+ "expected": "Travel Type",
1551
+ "path": "model_output.classification.iab_content.tier2.label"
1552
+ },
1553
  {
1554
  "actual": null,
1555
  "expected": "Hotels and Motels",
 
1636
  "actual": {
1637
  "model_output.classification.iab_content.mapping_mode": "exact",
1638
  "model_output.classification.iab_content.tier1.label": "Healthy Living",
1639
+ "model_output.classification.iab_content.tier2.label": null,
1640
+ "model_output.classification.iab_content.tier3.label": null
1641
  },
1642
  "expected": {
1643
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1646
  "model_output.classification.iab_content.tier3.label": "Running and Jogging"
1647
  },
1648
  "id": "running-and-jogging-easy",
1649
+ "mismatches": [
1650
+ {
1651
+ "actual": null,
1652
+ "expected": "Fitness and Exercise",
1653
+ "path": "model_output.classification.iab_content.tier2.label"
1654
+ },
1655
+ {
1656
+ "actual": null,
1657
+ "expected": "Running and Jogging",
1658
+ "path": "model_output.classification.iab_content.tier3.label"
1659
+ }
1660
+ ],
1661
  "notes": "Cross-vertical easy IAB mapping case for Healthy Living > Fitness and Exercise > Running and Jogging.",
1662
+ "pass": false,
1663
  "status": "must_fix",
1664
  "text": "Best running plan for a first 10k"
1665
  },
 
1667
  "actual": {
1668
  "model_output.classification.iab_content.mapping_mode": "exact",
1669
  "model_output.classification.iab_content.tier1.label": "Sports",
1670
+ "model_output.classification.iab_content.tier2.label": "Bodybuilding",
1671
  "model_output.classification.iab_content.tier3.label": null
1672
  },
1673
  "expected": {
 
1684
  "path": "model_output.classification.iab_content.tier1.label"
1685
  },
1686
  {
1687
+ "actual": "Bodybuilding",
1688
  "expected": "Fitness and Exercise",
1689
  "path": "model_output.classification.iab_content.tier2.label"
1690
  },
 
1820
  },
1821
  {
1822
  "actual": {
1823
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1824
  "model_output.classification.iab_content.tier1.label": "Travel",
1825
+ "model_output.classification.iab_content.tier2.label": null
1826
  },
1827
  "expected": {
1828
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
1837
  "path": "model_output.classification.iab_content.tier1.label"
1838
  },
1839
  {
1840
+ "actual": null,
 
 
 
 
 
1841
  "expected": "Fiction",
1842
  "path": "model_output.classification.iab_content.tier2.label"
1843
  }
 
1850
  {
1851
  "actual": {
1852
  "model_output.classification.iab_content.mapping_mode": "exact",
1853
+ "model_output.classification.iab_content.tier1.label": "Genres",
1854
+ "model_output.classification.iab_content.tier2.label": "Romance"
1855
  },
1856
  "expected": {
1857
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1859
  "model_output.classification.iab_content.tier2.label": "Fiction"
1860
  },
1861
  "id": "fiction-hard",
1862
+ "mismatches": [
1863
+ {
1864
+ "actual": "Genres",
1865
+ "expected": "Books and Literature",
1866
+ "path": "model_output.classification.iab_content.tier1.label"
1867
+ },
1868
+ {
1869
+ "actual": "Romance",
1870
+ "expected": "Fiction",
1871
+ "path": "model_output.classification.iab_content.tier2.label"
1872
+ }
1873
+ ],
1874
  "notes": "Cross-vertical hard IAB mapping case for Books and Literature > Fiction.",
1875
+ "pass": false,
1876
  "status": "must_fix",
1877
  "text": "Looking for a character-driven novel, not comics or poetry"
1878
  },
 
1880
  "actual": {
1881
  "model_output.classification.iab_content.mapping_mode": "exact",
1882
  "model_output.classification.iab_content.tier1.label": "Home & Garden",
1883
+ "model_output.classification.iab_content.tier2.label": "Remodeling & Construction"
1884
  },
1885
  "expected": {
1886
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1890
  "id": "home-improvement-easy",
1891
  "mismatches": [
1892
  {
1893
+ "actual": "Remodeling & Construction",
1894
  "expected": "Home Improvement",
1895
  "path": "model_output.classification.iab_content.tier2.label"
1896
  }
 
1903
  {
1904
  "actual": {
1905
  "model_output.classification.iab_content.mapping_mode": "exact",
1906
+ "model_output.classification.iab_content.tier1.label": "Style & Fashion",
1907
+ "model_output.classification.iab_content.tier2.label": "Personal Care"
1908
  },
1909
  "expected": {
1910
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1914
  "id": "home-improvement-medium",
1915
  "mismatches": [
1916
  {
1917
+ "actual": "Style & Fashion",
1918
+ "expected": "Home & Garden",
1919
+ "path": "model_output.classification.iab_content.tier1.label"
1920
+ },
1921
+ {
1922
+ "actual": "Personal Care",
1923
  "expected": "Home Improvement",
1924
  "path": "model_output.classification.iab_content.tier2.label"
1925
  }
 
1960
  },
1961
  {
1962
  "actual": {
1963
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
1964
+ "model_output.classification.iab_content.tier1.label": "Careers",
1965
+ "model_output.classification.iab_content.tier2.label": null
1966
  },
1967
  "expected": {
1968
  "model_output.classification.iab_content.mapping_mode": "exact",
 
1972
  "id": "online-education-easy",
1973
  "mismatches": [
1974
  {
1975
+ "actual": "Careers",
1976
  "expected": "Education",
1977
  "path": "model_output.classification.iab_content.tier1.label"
1978
  },
1979
  {
1980
+ "actual": "nearest_equivalent",
1981
+ "expected": "exact",
1982
+ "path": "model_output.classification.iab_content.mapping_mode"
1983
+ },
1984
+ {
1985
+ "actual": null,
1986
  "expected": "Online Education",
1987
  "path": "model_output.classification.iab_content.tier2.label"
1988
  }
 
2112
  },
2113
  {
2114
  "actual": {
2115
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2116
+ "model_output.classification.iab_content.tier1.label": "Food & Drink"
2117
  },
2118
  "expected": {
2119
  "model_output.classification.iab_content.mapping_mode": "exact",
2120
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2121
  },
2122
  "id": "medical-health-easy",
2123
+ "mismatches": [
2124
+ {
2125
+ "actual": "Food & Drink",
2126
+ "expected": "Medical Health",
2127
+ "path": "model_output.classification.iab_content.tier1.label"
2128
+ },
2129
+ {
2130
+ "actual": "nearest_equivalent",
2131
+ "expected": "exact",
2132
+ "path": "model_output.classification.iab_content.mapping_mode"
2133
+ }
2134
+ ],
2135
  "notes": "Cross-vertical easy IAB mapping case for Medical Health.",
2136
+ "pass": false,
2137
  "status": "must_fix",
2138
  "text": "what do these allergy symptoms mean"
2139
  },
2140
  {
2141
  "actual": {
2142
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2143
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2144
  },
2145
  "expected": {
 
2147
  "model_output.classification.iab_content.tier1.label": "Medical Health"
2148
  },
2149
  "id": "medical-health-medium",
2150
+ "mismatches": [
2151
+ {
2152
+ "actual": "nearest_equivalent",
2153
+ "expected": "exact",
2154
+ "path": "model_output.classification.iab_content.mapping_mode"
2155
+ }
2156
+ ],
2157
  "notes": "Cross-vertical medium IAB mapping case for Medical Health.",
2158
+ "pass": false,
2159
  "status": "must_fix",
2160
  "text": "when should i see a doctor for persistent knee pain"
2161
  },
 
2259
  },
2260
  {
2261
  "actual": {
2262
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2263
  "model_output.classification.iab_content.tier1.label": "Holidays",
2264
+ "model_output.classification.iab_content.tier2.label": null
2265
  },
2266
  "expected": {
2267
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2276
  "path": "model_output.classification.iab_content.tier1.label"
2277
  },
2278
  {
2279
+ "actual": "nearest_equivalent",
2280
+ "expected": "exact",
2281
+ "path": "model_output.classification.iab_content.mapping_mode"
2282
+ },
2283
+ {
2284
+ "actual": null,
2285
  "expected": "Financial Planning",
2286
  "path": "model_output.classification.iab_content.tier2.label"
2287
  }
 
2359
  },
2360
  {
2361
  "actual": {
2362
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
2363
+ "model_output.classification.iab_content.tier1.label": "Shopping",
2364
+ "model_output.classification.iab_content.tier2.label": null
2365
  },
2366
  "expected": {
2367
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2371
  "id": "parenting-medium",
2372
  "mismatches": [
2373
  {
2374
+ "actual": "Shopping",
2375
  "expected": "Family and Relationships",
2376
  "path": "model_output.classification.iab_content.tier1.label"
2377
  },
2378
  {
2379
+ "actual": "nearest_equivalent",
2380
+ "expected": "exact",
2381
+ "path": "model_output.classification.iab_content.mapping_mode"
2382
+ },
2383
+ {
2384
+ "actual": null,
2385
  "expected": "Parenting",
2386
  "path": "model_output.classification.iab_content.tier2.label"
2387
  }
 
2467
  "actual": {
2468
  "model_output.classification.iab_content.mapping_mode": "exact",
2469
  "model_output.classification.iab_content.tier1.label": "Entertainment",
2470
+ "model_output.classification.iab_content.tier2.label": null
2471
  },
2472
  "expected": {
2473
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2475
  "model_output.classification.iab_content.tier2.label": "Movies"
2476
  },
2477
  "id": "movies-easy",
2478
+ "mismatches": [
2479
+ {
2480
+ "actual": null,
2481
+ "expected": "Movies",
2482
+ "path": "model_output.classification.iab_content.tier2.label"
2483
+ }
2484
+ ],
2485
  "notes": "Cross-vertical easy IAB mapping case for Entertainment > Movies.",
2486
+ "pass": false,
2487
  "status": "must_fix",
2488
  "text": "What movie should we watch tonight?"
2489
  },
2490
  {
2491
  "actual": {
2492
  "model_output.classification.iab_content.mapping_mode": "exact",
2493
+ "model_output.classification.iab_content.tier1.label": "Genres",
2494
+ "model_output.classification.iab_content.tier2.label": "Horror"
2495
  },
2496
  "expected": {
2497
  "model_output.classification.iab_content.mapping_mode": "exact",
 
2499
  "model_output.classification.iab_content.tier2.label": "Movies"
2500
  },
2501
  "id": "movies-medium",
2502
+ "mismatches": [
2503
+ {
2504
+ "actual": "Genres",
2505
+ "expected": "Entertainment",
2506
+ "path": "model_output.classification.iab_content.tier1.label"
2507
+ },
2508
+ {
2509
+ "actual": "Horror",
2510
+ "expected": "Movies",
2511
+ "path": "model_output.classification.iab_content.tier2.label"
2512
+ }
2513
+ ],
2514
  "notes": "Cross-vertical medium IAB mapping case for Entertainment > Movies.",
2515
+ "pass": false,
2516
  "status": "must_fix",
2517
  "text": "Best thriller movies from the last few years"
2518
  },
artifacts/evaluation/latest/iab_quality_target_eval.json CHANGED
@@ -13,7 +13,7 @@
13
  "results": [
14
  {
15
  "actual": {
16
- "model_output.classification.iab_content.mapping_mode": "exact",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
@@ -28,6 +28,11 @@
28
  "actual": null,
29
  "expected": "Auto Buying and Selling",
30
  "path": "model_output.classification.iab_content.tier2.label"
 
 
 
 
 
31
  }
32
  ],
33
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
@@ -90,8 +95,8 @@
90
  "actual": {
91
  "model_output.classification.iab_content.mapping_mode": "exact",
92
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
93
- "model_output.classification.iab_content.tier2.label": "Computing",
94
- "model_output.classification.iab_content.tier3.label": "Software and Applications"
95
  },
96
  "expected": {
97
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -107,12 +112,12 @@
107
  "path": "model_output.classification.iab_content.tier1.label"
108
  },
109
  {
110
- "actual": "Computing",
111
  "expected": "Business",
112
  "path": "model_output.classification.iab_content.tier2.label"
113
  },
114
  {
115
- "actual": "Software and Applications",
116
  "expected": "Sales",
117
  "path": "model_output.classification.iab_content.tier3.label"
118
  },
@@ -129,9 +134,9 @@
129
  },
130
  {
131
  "actual": {
132
- "model_output.classification.iab_content.mapping_mode": "exact",
133
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
134
- "model_output.classification.iab_content.tier2.label": "Robotics",
135
  "model_output.classification.iab_content.tier3.label": null
136
  },
137
  "expected": {
@@ -148,7 +153,7 @@
148
  "path": "model_output.classification.iab_content.tier1.label"
149
  },
150
  {
151
- "actual": "Robotics",
152
  "expected": "Business",
153
  "path": "model_output.classification.iab_content.tier2.label"
154
  },
@@ -156,6 +161,11 @@
156
  "actual": null,
157
  "expected": "Sales",
158
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
159
  }
160
  ],
161
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
@@ -165,9 +175,9 @@
165
  },
166
  {
167
  "actual": {
168
- "model_output.classification.iab_content.mapping_mode": "exact",
169
  "model_output.classification.iab_content.tier1.label": "Careers",
170
- "model_output.classification.iab_content.tier2.label": "Job Search",
171
  "model_output.classification.iab_content.tier3.label": null
172
  },
173
  "expected": {
@@ -184,7 +194,7 @@
184
  "path": "model_output.classification.iab_content.tier1.label"
185
  },
186
  {
187
- "actual": "Job Search",
188
  "expected": "Business",
189
  "path": "model_output.classification.iab_content.tier2.label"
190
  },
@@ -192,6 +202,11 @@
192
  "actual": null,
193
  "expected": "Marketing and Advertising",
194
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
195
  }
196
  ],
197
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
@@ -202,7 +217,7 @@
202
  {
203
  "actual": {
204
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
205
- "model_output.classification.iab_content.tier1.label": "Science",
206
  "model_output.classification.iab_content.tier2.label": null
207
  },
208
  "expected": {
@@ -213,7 +228,7 @@
213
  "id": "ml-explanation-maps-to-ai",
214
  "mismatches": [
215
  {
216
- "actual": "Science",
217
  "expected": "Technology & Computing",
218
  "path": "model_output.classification.iab_content.tier1.label"
219
  },
@@ -235,10 +250,10 @@
235
  },
236
  {
237
  "actual": {
238
- "model_output.classification.iab_content.mapping_mode": "exact",
239
- "model_output.classification.iab_content.tier1.label": "Technology & Computing",
240
- "model_output.classification.iab_content.tier2.label": "Computing",
241
- "model_output.classification.iab_content.tier3.label": "Information and Network Security"
242
  },
243
  "expected": {
244
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
@@ -249,24 +264,19 @@
249
  "id": "support-credential-help-maps-to-business-it",
250
  "mismatches": [
251
  {
252
- "actual": "Technology & Computing",
253
  "expected": "Business and Finance",
254
  "path": "model_output.classification.iab_content.tier1.label"
255
  },
256
  {
257
- "actual": "Computing",
258
  "expected": "Business",
259
  "path": "model_output.classification.iab_content.tier2.label"
260
  },
261
  {
262
- "actual": "Information and Network Security",
263
  "expected": "Business I.T.",
264
  "path": "model_output.classification.iab_content.tier3.label"
265
- },
266
- {
267
- "actual": "exact",
268
- "expected": "nearest_equivalent",
269
- "path": "model_output.classification.iab_content.mapping_mode"
270
  }
271
  ],
272
  "notes": "Credential and account help should map to business IT rather than generic business.",
@@ -294,9 +304,9 @@
294
  },
295
  {
296
  "actual": {
297
- "model_output.classification.iab_content.mapping_mode": "exact",
298
- "model_output.classification.iab_content.tier1.label": "Sensitive Topics",
299
- "model_output.classification.iab_content.tier2.label": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
300
  "model_output.classification.iab_content.tier3.label": null
301
  },
302
  "expected": {
@@ -308,12 +318,12 @@
308
  "id": "trial-signup-maps-to-software",
309
  "mismatches": [
310
  {
311
- "actual": "Sensitive Topics",
312
  "expected": "Technology & Computing",
313
  "path": "model_output.classification.iab_content.tier1.label"
314
  },
315
  {
316
- "actual": "Crime & Harmful Acts to Individuals, Society & Human Right Violations",
317
  "expected": "Computing",
318
  "path": "model_output.classification.iab_content.tier2.label"
319
  },
@@ -321,11 +331,6 @@
321
  "actual": null,
322
  "expected": "Software and Applications",
323
  "path": "model_output.classification.iab_content.tier3.label"
324
- },
325
- {
326
- "actual": "exact",
327
- "expected": "nearest_equivalent",
328
- "path": "model_output.classification.iab_content.mapping_mode"
329
  }
330
  ],
331
  "notes": "Software action queries should map to the software/application branch.",
 
13
  "results": [
14
  {
15
  "actual": {
16
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
17
  "model_output.classification.iab_content.tier1.label": "Automotive",
18
  "model_output.classification.iab_content.tier2.label": null
19
  },
 
28
  "actual": null,
29
  "expected": "Auto Buying and Selling",
30
  "path": "model_output.classification.iab_content.tier2.label"
31
+ },
32
+ {
33
+ "actual": "nearest_equivalent",
34
+ "expected": "exact",
35
+ "path": "model_output.classification.iab_content.mapping_mode"
36
  }
37
  ],
38
  "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.",
 
95
  "actual": {
96
  "model_output.classification.iab_content.mapping_mode": "exact",
97
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
98
+ "model_output.classification.iab_content.tier2.label": null,
99
+ "model_output.classification.iab_content.tier3.label": null
100
  },
101
  "expected": {
102
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
112
  "path": "model_output.classification.iab_content.tier1.label"
113
  },
114
  {
115
+ "actual": null,
116
  "expected": "Business",
117
  "path": "model_output.classification.iab_content.tier2.label"
118
  },
119
  {
120
+ "actual": null,
121
  "expected": "Sales",
122
  "path": "model_output.classification.iab_content.tier3.label"
123
  },
 
134
  },
135
  {
136
  "actual": {
137
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
138
  "model_output.classification.iab_content.tier1.label": "Technology & Computing",
139
+ "model_output.classification.iab_content.tier2.label": null,
140
  "model_output.classification.iab_content.tier3.label": null
141
  },
142
  "expected": {
 
153
  "path": "model_output.classification.iab_content.tier1.label"
154
  },
155
  {
156
+ "actual": null,
157
  "expected": "Business",
158
  "path": "model_output.classification.iab_content.tier2.label"
159
  },
 
161
  "actual": null,
162
  "expected": "Sales",
163
  "path": "model_output.classification.iab_content.tier3.label"
164
+ },
165
+ {
166
+ "actual": "nearest_equivalent",
167
+ "expected": "exact",
168
+ "path": "model_output.classification.iab_content.mapping_mode"
169
  }
170
  ],
171
  "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.",
 
175
  },
176
  {
177
  "actual": {
178
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
179
  "model_output.classification.iab_content.tier1.label": "Careers",
180
+ "model_output.classification.iab_content.tier2.label": null,
181
  "model_output.classification.iab_content.tier3.label": null
182
  },
183
  "expected": {
 
194
  "path": "model_output.classification.iab_content.tier1.label"
195
  },
196
  {
197
+ "actual": null,
198
  "expected": "Business",
199
  "path": "model_output.classification.iab_content.tier2.label"
200
  },
 
202
  "actual": null,
203
  "expected": "Marketing and Advertising",
204
  "path": "model_output.classification.iab_content.tier3.label"
205
+ },
206
+ {
207
+ "actual": "nearest_equivalent",
208
+ "expected": "exact",
209
+ "path": "model_output.classification.iab_content.mapping_mode"
210
  }
211
  ],
212
  "notes": "Marketing tool discovery should map to the marketing and advertising branch.",
 
217
  {
218
  "actual": {
219
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
220
+ "model_output.classification.iab_content.tier1.label": "Real Estate",
221
  "model_output.classification.iab_content.tier2.label": null
222
  },
223
  "expected": {
 
228
  "id": "ml-explanation-maps-to-ai",
229
  "mismatches": [
230
  {
231
+ "actual": "Real Estate",
232
  "expected": "Technology & Computing",
233
  "path": "model_output.classification.iab_content.tier1.label"
234
  },
 
250
  },
251
  {
252
  "actual": {
253
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
254
+ "model_output.classification.iab_content.tier1.label": "Personal Finance",
255
+ "model_output.classification.iab_content.tier2.label": null,
256
+ "model_output.classification.iab_content.tier3.label": null
257
  },
258
  "expected": {
259
  "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
 
264
  "id": "support-credential-help-maps-to-business-it",
265
  "mismatches": [
266
  {
267
+ "actual": "Personal Finance",
268
  "expected": "Business and Finance",
269
  "path": "model_output.classification.iab_content.tier1.label"
270
  },
271
  {
272
+ "actual": null,
273
  "expected": "Business",
274
  "path": "model_output.classification.iab_content.tier2.label"
275
  },
276
  {
277
+ "actual": null,
278
  "expected": "Business I.T.",
279
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
280
  }
281
  ],
282
  "notes": "Credential and account help should map to business IT rather than generic business.",
 
304
  },
305
  {
306
  "actual": {
307
+ "model_output.classification.iab_content.mapping_mode": "nearest_equivalent",
308
+ "model_output.classification.iab_content.tier1.label": "Sports",
309
+ "model_output.classification.iab_content.tier2.label": null,
310
  "model_output.classification.iab_content.tier3.label": null
311
  },
312
  "expected": {
 
318
  "id": "trial-signup-maps-to-software",
319
  "mismatches": [
320
  {
321
+ "actual": "Sports",
322
  "expected": "Technology & Computing",
323
  "path": "model_output.classification.iab_content.tier1.label"
324
  },
325
  {
326
+ "actual": null,
327
  "expected": "Computing",
328
  "path": "model_output.classification.iab_content.tier2.label"
329
  },
 
331
  "actual": null,
332
  "expected": "Software and Applications",
333
  "path": "model_output.classification.iab_content.tier3.label"
 
 
 
 
 
334
  }
335
  ],
336
  "notes": "Software action queries should map to the software/application branch.",
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,14,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
- comparison,2,1,11,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,1,0,0,13,0,0,0,0,0,0,0,0,1,0,0,0,0,0
6
- deal_seeking,0,1,0,0,13,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0
8
- signup,0,0,0,0,0,0,15,1,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,0,13,0,0,0,0,2,0,0,0,0,0
10
- booking,0,0,0,0,0,0,1,0,13,0,1,0,0,0,0,0,0,0
11
- download,0,0,0,0,0,0,0,0,0,13,1,1,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0
13
- task_execution,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0
14
- onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,1,16,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,12,1,2,0,0
16
- account_help,0,0,0,0,0,0,2,0,0,0,0,1,0,3,8,1,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,13,0,0
18
- follow_up,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,14,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,2,0,12,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,1,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,0,2,0,0,12,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0
8
+ signup,0,0,0,0,0,0,14,0,0,0,0,2,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,1,12,0,0,0,0,2,0,0,0,0,0
10
+ booking,0,0,0,0,0,0,3,0,9,0,1,2,0,0,0,0,0,0
11
+ download,0,0,0,0,0,0,0,0,0,14,0,1,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0
13
+ task_execution,0,0,0,0,0,0,0,0,0,0,0,17,1,0,0,0,0,0
14
+ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,17,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,11,1,2,0,1
16
+ account_help,0,0,0,0,0,0,0,0,0,0,1,1,0,3,10,0,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,13,0,0
18
+ follow_up,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,1
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15
artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_report.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "accepted_accuracy": 0.9104,
3
- "accepted_coverage": 0.9675,
4
- "accuracy": 0.8917,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype_benchmark.jsonl",
@@ -12,52 +12,52 @@
12
  "accuracy": 0.913,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
- "macro_f1": 0.9109
16
  },
17
  "hard": {
18
- "accepted_accuracy": 0.8554,
19
- "accepted_coverage": 0.9121,
20
- "accuracy": 0.8132,
21
  "count": 91,
22
- "fallback_rate": 0.0879,
23
- "macro_f1": 0.8025
24
  },
25
  "medium": {
26
- "accepted_accuracy": 0.957,
27
  "accepted_coverage": 0.9894,
28
- "accuracy": 0.9468,
29
  "count": 94,
30
  "fallback_rate": 0.0106,
31
- "macro_f1": 0.9469
32
  }
33
  },
34
- "fallback_rate": 0.0325,
35
  "head": "intent_subtype",
36
- "macro_f1": 0.8886,
37
  "per_class_metrics": {
38
  "account_help": {
39
- "f1-score": 0.64,
40
- "precision": 0.8,
41
- "recall": 0.5333333333333333,
42
  "support": 15.0
43
  },
44
- "accuracy": 0.8916967509025271,
45
  "billing_help": {
46
- "f1-score": 0.8387096774193549,
47
- "precision": 0.8125,
48
  "recall": 0.8666666666666667,
49
  "support": 15.0
50
  },
51
  "booking": {
52
- "f1-score": 0.9285714285714286,
53
  "precision": 1.0,
54
- "recall": 0.8666666666666667,
55
  "support": 15.0
56
  },
57
  "comparison": {
58
- "f1-score": 0.8148148148148148,
59
- "precision": 0.9166666666666666,
60
- "recall": 0.7333333333333333,
61
  "support": 15.0
62
  },
63
  "contact_sales": {
@@ -67,15 +67,15 @@
67
  "support": 15.0
68
  },
69
  "deal_seeking": {
70
- "f1-score": 0.896551724137931,
71
- "precision": 0.9285714285714286,
72
- "recall": 0.8666666666666667,
73
  "support": 15.0
74
  },
75
  "download": {
76
- "f1-score": 0.9285714285714286,
77
  "precision": 1.0,
78
- "recall": 0.8666666666666667,
79
  "support": 15.0
80
  },
81
  "education": {
@@ -85,15 +85,15 @@
85
  "support": 15.0
86
  },
87
  "emotional_reflection": {
88
- "f1-score": 1.0,
89
- "precision": 1.0,
90
  "recall": 1.0,
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
- "f1-score": 0.896551724137931,
95
- "precision": 0.9285714285714286,
96
- "recall": 0.8666666666666667,
97
  "support": 15.0
98
  },
99
  "follow_up": {
@@ -103,21 +103,21 @@
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
- "f1-score": 0.8886471209737711,
107
- "precision": 0.8965122159975102,
108
- "recall": 0.8895561002178651,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
- "f1-score": 0.8648648648648649,
113
- "precision": 0.8,
114
- "recall": 0.9411764705882353,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
- "f1-score": 0.9032258064516129,
119
- "precision": 0.875,
120
- "recall": 0.9333333333333333,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
@@ -127,33 +127,33 @@
127
  "support": 16.0
128
  },
129
  "purchase": {
130
- "f1-score": 0.896551724137931,
131
- "precision": 0.9285714285714286,
132
- "recall": 0.8666666666666667,
133
  "support": 15.0
134
  },
135
  "signup": {
136
- "f1-score": 0.8823529411764706,
137
- "precision": 0.8333333333333334,
138
- "recall": 0.9375,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
- "f1-score": 0.9230769230769231,
143
- "precision": 0.8571428571428571,
144
- "recall": 1.0,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
- "f1-score": 0.8,
149
- "precision": 0.8,
150
- "recall": 0.8,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
- "f1-score": 0.8891181699377334,
155
- "precision": 0.8953221541324111,
156
- "recall": 0.8916967509025271,
157
  "support": 277.0
158
  }
159
  },
 
1
  {
2
+ "accepted_accuracy": 0.8901,
3
+ "accepted_coverage": 0.9856,
4
+ "accuracy": 0.8845,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_difficulty_benchmark_confusion_matrix.csv",
6
  "count": 277,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype_benchmark.jsonl",
 
12
  "accuracy": 0.913,
13
  "count": 92,
14
  "fallback_rate": 0.0,
15
+ "macro_f1": 0.9124
16
  },
17
  "hard": {
18
+ "accepted_accuracy": 0.8295,
19
+ "accepted_coverage": 0.967,
20
+ "accuracy": 0.8242,
21
  "count": 91,
22
+ "fallback_rate": 0.033,
23
+ "macro_f1": 0.8183
24
  },
25
  "medium": {
26
+ "accepted_accuracy": 0.9247,
27
  "accepted_coverage": 0.9894,
28
+ "accuracy": 0.9149,
29
  "count": 94,
30
  "fallback_rate": 0.0106,
31
+ "macro_f1": 0.9117
32
  }
33
  },
34
+ "fallback_rate": 0.0144,
35
  "head": "intent_subtype",
36
+ "macro_f1": 0.8824,
37
  "per_class_metrics": {
38
  "account_help": {
39
+ "f1-score": 0.7142857142857143,
40
+ "precision": 0.7692307692307693,
41
+ "recall": 0.6666666666666666,
42
  "support": 15.0
43
  },
44
+ "accuracy": 0.8844765342960289,
45
  "billing_help": {
46
+ "f1-score": 0.8666666666666667,
47
+ "precision": 0.8666666666666667,
48
  "recall": 0.8666666666666667,
49
  "support": 15.0
50
  },
51
  "booking": {
52
+ "f1-score": 0.75,
53
  "precision": 1.0,
54
+ "recall": 0.6,
55
  "support": 15.0
56
  },
57
  "comparison": {
58
+ "f1-score": 0.8888888888888888,
59
+ "precision": 1.0,
60
+ "recall": 0.8,
61
  "support": 15.0
62
  },
63
  "contact_sales": {
 
67
  "support": 15.0
68
  },
69
  "deal_seeking": {
70
+ "f1-score": 0.8888888888888888,
71
+ "precision": 1.0,
72
+ "recall": 0.8,
73
  "support": 15.0
74
  },
75
  "download": {
76
+ "f1-score": 0.9655172413793104,
77
  "precision": 1.0,
78
+ "recall": 0.9333333333333333,
79
  "support": 15.0
80
  },
81
  "education": {
 
85
  "support": 15.0
86
  },
87
  "emotional_reflection": {
88
+ "f1-score": 0.9375,
89
+ "precision": 0.8823529411764706,
90
  "recall": 1.0,
91
  "support": 15.0
92
  },
93
  "evaluation": {
94
+ "f1-score": 0.9333333333333333,
95
+ "precision": 0.9333333333333333,
96
+ "recall": 0.9333333333333333,
97
  "support": 15.0
98
  },
99
  "follow_up": {
 
103
  "support": 15.0
104
  },
105
  "macro avg": {
106
+ "f1-score": 0.8824228919733669,
107
+ "precision": 0.8968567719420234,
108
+ "recall": 0.8825617283950618,
109
  "support": 277.0
110
  },
111
  "onboarding_setup": {
112
+ "f1-score": 0.918918918918919,
113
+ "precision": 0.85,
114
+ "recall": 1.0,
115
  "support": 17.0
116
  },
117
  "product_discovery": {
118
+ "f1-score": 0.9375,
119
+ "precision": 0.8823529411764706,
120
+ "recall": 1.0,
121
  "support": 15.0
122
  },
123
  "provider_selection": {
 
127
  "support": 16.0
128
  },
129
  "purchase": {
130
+ "f1-score": 0.8888888888888888,
131
+ "precision": 1.0,
132
+ "recall": 0.8,
133
  "support": 15.0
134
  },
135
  "signup": {
136
+ "f1-score": 0.8235294117647058,
137
+ "precision": 0.7777777777777778,
138
+ "recall": 0.875,
139
  "support": 16.0
140
  },
141
  "task_execution": {
142
+ "f1-score": 0.8292682926829268,
143
+ "precision": 0.7391304347826086,
144
+ "recall": 0.9444444444444444,
145
  "support": 18.0
146
  },
147
  "troubleshooting": {
148
+ "f1-score": 0.7586206896551724,
149
+ "precision": 0.7857142857142857,
150
+ "recall": 0.7333333333333333,
151
  "support": 15.0
152
  },
153
  "weighted avg": {
154
+ "f1-score": 0.8822131766431675,
155
+ "precision": 0.8945403392673653,
156
+ "recall": 0.8844765342960289,
157
  "support": 277.0
158
  }
159
  },
artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv CHANGED
@@ -1,9 +1,9 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
- comparison,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -15,5 +15,5 @@ onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
- follow_up,1,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,7,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,1,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
+ follow_up,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_extended_cases_report.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accepted_accuracy": 0.8302,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8302,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.7668,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.8,
14
- "precision": 1.0,
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
18
- "accuracy": 0.8301886792452831,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
@@ -29,9 +29,9 @@
29
  "support": 0.0
30
  },
31
  "comparison": {
32
- "f1-score": 1.0,
33
  "precision": 1.0,
34
- "recall": 1.0,
35
  "support": 2.0
36
  },
37
  "contact_sales": {
@@ -41,9 +41,9 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.8571428571428571,
45
- "precision": 0.75,
46
- "recall": 1.0,
47
  "support": 9.0
48
  },
49
  "download": {
@@ -71,15 +71,15 @@
71
  "support": 3.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.7368421052631579,
75
  "precision": 1.0,
76
- "recall": 0.5833333333333334,
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.46858256266151,
81
- "precision": 0.4565696649029982,
82
- "recall": 0.513888888888889,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
@@ -113,8 +113,8 @@
113
  "support": 0.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.6666666666666666,
117
- "precision": 0.5,
118
  "recall": 1.0,
119
  "support": 1.0
120
  },
@@ -125,9 +125,9 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8018611395225099,
129
- "precision": 0.8208295896975142,
130
- "recall": 0.8301886792452831,
131
  "support": 53.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8113,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8113,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_extended_cases_confusion_matrix.csv",
6
  "count": 53,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/extended_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.7517,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.6666666666666666,
14
+ "precision": 0.6666666666666666,
15
  "recall": 0.6666666666666666,
16
  "support": 3.0
17
  },
18
+ "accuracy": 0.8113207547169812,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
 
29
  "support": 0.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.6666666666666666,
33
  "precision": 1.0,
34
+ "recall": 0.5,
35
  "support": 2.0
36
  },
37
  "contact_sales": {
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.7619047619047619,
45
+ "precision": 0.6666666666666666,
46
+ "recall": 0.8888888888888888,
47
  "support": 9.0
48
  },
49
  "download": {
 
71
  "support": 3.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.8,
75
  "precision": 1.0,
76
+ "recall": 0.6666666666666666,
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.45939292189292186,
81
+ "precision": 0.4611992945326278,
82
+ "recall": 0.48456790123456783,
83
  "support": 53.0
84
  },
85
  "onboarding_setup": {
 
113
  "support": 0.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 1.0,
117
+ "precision": 1.0,
118
  "recall": 1.0,
119
  "support": 1.0
120
  },
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.7861520554916781,
129
+ "precision": 0.7972446840371369,
130
+ "recall": 0.8113207547169812,
131
  "support": 53.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_hard_cases_confusion_matrix.csv CHANGED
@@ -2,7 +2,7 @@
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
 
2
  education,29,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
  product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,2,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,9,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_subtype_hard_cases_report.json CHANGED
@@ -7,7 +7,7 @@
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.8447,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
@@ -53,8 +53,8 @@
53
  "support": 0.0
54
  },
55
  "education": {
56
- "f1-score": 0.9508196721311475,
57
- "precision": 0.90625,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
@@ -77,8 +77,8 @@
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.7038911013311109,
81
- "precision": 0.7234953703703704,
82
  "recall": 0.7212962962962962,
83
  "support": 94.0
84
  },
@@ -89,8 +89,8 @@
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.8888888888888888,
93
- "precision": 0.8,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
@@ -125,8 +125,8 @@
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8798004011763282,
129
- "precision": 0.8911125886524823,
130
  "recall": 0.8936170212765957,
131
  "support": 94.0
132
  }
 
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/hard_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8426,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.8,
 
53
  "support": 0.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9666666666666667,
57
+ "precision": 0.9354838709677419,
58
  "recall": 1.0,
59
  "support": 29.0
60
  },
 
77
  "support": 12.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.7021723995980289,
81
+ "precision": 0.7210790702726187,
82
  "recall": 0.7212962962962962,
83
  "support": 94.0
84
  },
 
89
  "support": 6.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.8421052631578947,
93
+ "precision": 0.7272727272727273,
94
  "recall": 1.0,
95
  "support": 8.0
96
  },
 
125
  "support": 3.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8807077824069889,
129
+ "precision": 0.8939419937189327,
130
  "recall": 0.8936170212765957,
131
  "support": 94.0
132
  }
artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv CHANGED
@@ -1,9 +1,9 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6
- deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,4,0,0,0,0,0,0,1,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -12,8 +12,8 @@ download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
- follow_up,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,7,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
  comparison,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6
+ deal_seeking,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,1,0,4,0,0,0,0,0,0,1,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13
  task_execution,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18
+ follow_up,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,8,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_subtype_test_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.9,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.9,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.8531,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 1.0,
@@ -15,7 +15,7 @@
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.9,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
@@ -41,9 +41,9 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.6666666666666666,
45
- "precision": 0.5,
46
- "recall": 1.0,
47
  "support": 2.0
48
  },
49
  "download": {
@@ -65,9 +65,9 @@
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.0,
69
- "precision": 0.0,
70
- "recall": 0.0,
71
  "support": 2.0
72
  },
73
  "follow_up": {
@@ -77,9 +77,9 @@
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.6635221022395855,
81
- "precision": 0.6578042328042328,
82
- "recall": 0.6885521885521885,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
@@ -89,9 +89,9 @@
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 1.0,
93
  "precision": 1.0,
94
- "recall": 1.0,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
@@ -113,21 +113,21 @@
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.9230769230769231,
117
- "precision": 0.8571428571428571,
118
  "recall": 1.0,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 1.0,
123
  "precision": 1.0,
124
- "recall": 1.0,
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8939882610403741,
129
- "precision": 0.9094217687074829,
130
- "recall": 0.9,
131
  "support": 70.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8714,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8714,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_test_confusion_matrix.csv",
6
  "count": 70,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/test.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8317,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 1.0,
 
15
  "recall": 1.0,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.8714285714285714,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.3333333333333333,
45
+ "precision": 0.25,
46
+ "recall": 0.5,
47
  "support": 2.0
48
  },
49
  "download": {
 
65
  "support": 5.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.5,
69
+ "precision": 0.5,
70
+ "recall": 0.5,
71
  "support": 2.0
72
  },
73
  "follow_up": {
 
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.646896135613619,
81
+ "precision": 0.6657407407407407,
82
+ "recall": 0.6538299663299663,
83
  "support": 70.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 4.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.9333333333333333,
93
  "precision": 1.0,
94
+ "recall": 0.875,
95
  "support": 8.0
96
  },
97
  "provider_selection": {
 
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8571428571428571,
117
+ "precision": 0.75,
118
  "recall": 1.0,
119
  "support": 6.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 0.6666666666666666,
123
  "precision": 1.0,
124
+ "recall": 0.5,
125
  "support": 2.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8759558172936446,
129
+ "precision": 0.9073809523809524,
130
+ "recall": 0.8714285714285714,
131
  "support": 70.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv CHANGED
@@ -1,19 +1,19 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,29,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4
- comparison,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
- evaluation,4,4,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,10,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,2,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,2,4,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
- contact_sales,0,0,0,0,0,0,2,0,1,0,6,0,0,0,0,0,0,0
13
- task_execution,0,0,0,0,0,0,1,0,0,1,0,17,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,1,0,0,0,16,0,0,0,0,0
15
- troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,10,2,0,1,0
16
- account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,2,5,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0
18
- follow_up,0,0,0,0,1,0,0,0,0,0,0,5,0,0,0,0,30,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4
+ comparison,0,0,14,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
+ evaluation,3,5,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,10,1,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,23,0,0,0,0,0,0,0,0,0,0,2,0
8
  signup,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0
9
  purchase,0,0,0,0,0,0,2,4,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0
12
+ contact_sales,0,0,0,0,0,0,2,0,1,0,5,1,0,0,0,0,0,0
13
+ task_execution,0,0,0,0,0,0,0,1,0,0,0,18,0,0,0,0,0,0
14
  onboarding_setup,0,0,0,0,0,0,0,0,1,0,0,0,16,0,0,0,0,0
15
+ troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,9,3,0,1,0
16
+ account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,6,0,0,0
17
  billing_help,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0
18
+ follow_up,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,32,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20
artifacts/evaluation/latest/intent_subtype_train_report.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accepted_accuracy": 0.8978,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.8978,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/train.jsonl",
8
- "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.877,
11
  "per_class_metrics": {
12
  "account_help": {
13
- "f1-score": 0.7142857142857143,
14
- "precision": 0.7142857142857143,
15
- "recall": 0.7142857142857143,
16
  "support": 7.0
17
  },
18
- "accuracy": 0.8977635782747604,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
@@ -29,32 +29,32 @@
29
  "support": 5.0
30
  },
31
  "comparison": {
32
- "f1-score": 0.967741935483871,
33
- "precision": 0.9375,
34
- "recall": 1.0,
35
  "support": 15.0
36
  },
37
  "contact_sales": {
38
- "f1-score": 0.8,
39
  "precision": 1.0,
40
- "recall": 0.6666666666666666,
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.9090909090909091,
45
- "precision": 0.9090909090909091,
46
  "recall": 0.9090909090909091,
47
  "support": 11.0
48
  },
49
  "download": {
50
- "f1-score": 0.9411764705882353,
51
- "precision": 0.8888888888888888,
52
  "recall": 1.0,
53
  "support": 8.0
54
  },
55
  "education": {
56
- "f1-score": 0.9629629629629629,
57
- "precision": 0.9285714285714286,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
@@ -65,21 +65,21 @@
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
- "f1-score": 0.6923076923076923,
69
- "precision": 1.0,
70
  "recall": 0.5294117647058824,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
- "f1-score": 0.8571428571428571,
75
- "precision": 0.8823529411764706,
76
- "recall": 0.8333333333333334,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.8770498618135788,
81
- "precision": 0.8988923431325393,
82
- "recall": 0.876671278202288,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
@@ -89,9 +89,9 @@
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.90625,
93
- "precision": 0.8787878787878788,
94
- "recall": 0.9354838709677419,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
@@ -101,33 +101,33 @@
101
  "support": 25.0
102
  },
103
  "purchase": {
104
- "f1-score": 0.8,
105
- "precision": 1.0,
106
  "recall": 0.6666666666666666,
107
  "support": 6.0
108
  },
109
  "signup": {
110
- "f1-score": 0.8648648648648649,
111
- "precision": 0.7619047619047619,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.8292682926829268,
117
- "precision": 0.7727272727272727,
118
- "recall": 0.8947368421052632,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
- "f1-score": 0.8,
123
- "precision": 0.8333333333333334,
124
- "recall": 0.7692307692307693,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.894423568060199,
129
- "precision": 0.9063956713482179,
130
- "recall": 0.8977635782747604,
131
  "support": 313.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.9068,
3
+ "accepted_coverage": 0.9936,
4
+ "accuracy": 0.9042,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_train_confusion_matrix.csv",
6
  "count": 313,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/train.jsonl",
8
+ "fallback_rate": 0.0064,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.8787,
11
  "per_class_metrics": {
12
  "account_help": {
13
+ "f1-score": 0.75,
14
+ "precision": 0.6666666666666666,
15
+ "recall": 0.8571428571428571,
16
  "support": 7.0
17
  },
18
+ "accuracy": 0.9041533546325878,
19
  "billing_help": {
20
  "f1-score": 1.0,
21
  "precision": 1.0,
 
29
  "support": 5.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.9655172413793104,
33
+ "precision": 1.0,
34
+ "recall": 0.9333333333333333,
35
  "support": 15.0
36
  },
37
  "contact_sales": {
38
+ "f1-score": 0.7142857142857143,
39
  "precision": 1.0,
40
+ "recall": 0.5555555555555556,
41
  "support": 9.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.9523809523809523,
45
+ "precision": 1.0,
46
  "recall": 0.9090909090909091,
47
  "support": 11.0
48
  },
49
  "download": {
50
+ "f1-score": 1.0,
51
+ "precision": 1.0,
52
  "recall": 1.0,
53
  "support": 8.0
54
  },
55
  "education": {
56
+ "f1-score": 0.9719626168224299,
57
+ "precision": 0.9454545454545454,
58
  "recall": 1.0,
59
  "support": 52.0
60
  },
 
65
  "support": 20.0
66
  },
67
  "evaluation": {
68
+ "f1-score": 0.6666666666666666,
69
+ "precision": 0.9,
70
  "recall": 0.5294117647058824,
71
  "support": 17.0
72
  },
73
  "follow_up": {
74
+ "f1-score": 0.8888888888888888,
75
+ "precision": 0.8888888888888888,
76
+ "recall": 0.8888888888888888,
77
  "support": 36.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.8786951095392168,
81
+ "precision": 0.9007433723013433,
82
+ "recall": 0.8782602497120292,
83
  "support": 313.0
84
  },
85
  "onboarding_setup": {
 
89
  "support": 17.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.9090909090909091,
93
+ "precision": 0.8571428571428571,
94
+ "recall": 0.967741935483871,
95
  "support": 31.0
96
  },
97
  "provider_selection": {
 
101
  "support": 25.0
102
  },
103
  "purchase": {
104
+ "f1-score": 0.7272727272727273,
105
+ "precision": 0.8,
106
  "recall": 0.6666666666666666,
107
  "support": 6.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.8888888888888888,
111
+ "precision": 0.8,
112
  "recall": 1.0,
113
  "support": 16.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.8571428571428571,
117
+ "precision": 0.782608695652174,
118
+ "recall": 0.9473684210526315,
119
  "support": 19.0
120
  },
121
  "troubleshooting": {
122
+ "f1-score": 0.782608695652174,
123
+ "precision": 0.9,
124
+ "recall": 0.6923076923076923,
125
  "support": 13.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.9005147505975646,
129
+ "precision": 0.9118244687664052,
130
+ "recall": 0.9041533546325878,
131
  "support": 313.0
132
  }
133
  },
artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv CHANGED
@@ -1,12 +1,12 @@
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
- product_discovery,0,9,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
- comparison,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
- purchase,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
@@ -14,6 +14,6 @@ task_execution,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0
14
  onboarding_setup,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
- billing_help,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
18
  follow_up,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,9,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
 
1
  ,education,product_discovery,comparison,evaluation,deal_seeking,provider_selection,signup,purchase,booking,download,contact_sales,task_execution,onboarding_setup,troubleshooting,account_help,billing_help,follow_up,emotional_reflection
2
  education,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3
+ product_discovery,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4
+ comparison,0,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0
5
  evaluation,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6
  deal_seeking,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
7
  provider_selection,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0
8
  signup,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
9
+ purchase,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
10
  booking,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0
11
  download,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
12
  contact_sales,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
14
  onboarding_setup,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0
15
  troubleshooting,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
16
  account_help,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
17
+ billing_help,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
18
  follow_up,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,9,0
19
  emotional_reflection,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_subtype_val_report.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "accepted_accuracy": 0.8608,
3
- "accepted_coverage": 0.9875,
4
- "accuracy": 0.85,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/val.jsonl",
8
- "fallback_rate": 0.0125,
9
  "head": "intent_subtype",
10
- "macro_f1": 0.6722,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
@@ -15,7 +15,7 @@
15
  "recall": 0.5,
16
  "support": 2.0
17
  },
18
- "accuracy": 0.85,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
@@ -29,9 +29,9 @@
29
  "support": 3.0
30
  },
31
  "comparison": {
32
- "f1-score": 0.5,
33
- "precision": 0.5,
34
- "recall": 0.5,
35
  "support": 4.0
36
  },
37
  "contact_sales": {
@@ -41,8 +41,8 @@
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
- "f1-score": 0.8,
45
- "precision": 0.6666666666666666,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
@@ -77,21 +77,21 @@
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
- "f1-score": 0.5974890931031281,
81
- "precision": 0.5811447811447812,
82
- "recall": 0.6353535353535353,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
86
- "f1-score": 0.8888888888888888,
87
- "precision": 1.0,
88
  "recall": 0.8,
89
  "support": 5.0
90
  },
91
  "product_discovery": {
92
- "f1-score": 0.8571428571428571,
93
- "precision": 0.9,
94
- "recall": 0.8181818181818182,
95
  "support": 11.0
96
  },
97
  "provider_selection": {
@@ -107,14 +107,14 @@
107
  "support": 2.0
108
  },
109
  "signup": {
110
- "f1-score": 0.8,
111
- "precision": 0.6666666666666666,
112
  "recall": 1.0,
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
- "f1-score": 0.8421052631578947,
117
- "precision": 0.7272727272727273,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
@@ -125,9 +125,9 @@
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
- "f1-score": 0.8380398913951546,
129
- "precision": 0.8423106060606059,
130
- "recall": 0.85,
131
  "support": 80.0
132
  }
133
  },
 
1
  {
2
+ "accepted_accuracy": 0.8625,
3
+ "accepted_coverage": 1.0,
4
+ "accuracy": 0.8625,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_subtype_val_confusion_matrix.csv",
6
  "count": 80,
7
  "dataset_path": "/content/agentic-intent-classifier/data/subtype/val.jsonl",
8
+ "fallback_rate": 0.0,
9
  "head": "intent_subtype",
10
+ "macro_f1": 0.6561,
11
  "per_class_metrics": {
12
  "account_help": {
13
  "f1-score": 0.5,
 
15
  "recall": 0.5,
16
  "support": 2.0
17
  },
18
+ "accuracy": 0.8625,
19
  "billing_help": {
20
  "f1-score": 0.0,
21
  "precision": 0.0,
 
29
  "support": 3.0
30
  },
31
  "comparison": {
32
+ "f1-score": 0.4,
33
+ "precision": 1.0,
34
+ "recall": 0.25,
35
  "support": 4.0
36
  },
37
  "contact_sales": {
 
41
  "support": 0.0
42
  },
43
  "deal_seeking": {
44
+ "f1-score": 0.6666666666666666,
45
+ "precision": 0.5,
46
  "recall": 1.0,
47
  "support": 2.0
48
  },
 
77
  "support": 11.0
78
  },
79
  "macro avg": {
80
+ "f1-score": 0.5832054560954817,
81
+ "precision": 0.5891975308641975,
82
+ "recall": 0.6315656565656567,
83
  "support": 80.0
84
  },
85
  "onboarding_setup": {
86
+ "f1-score": 0.8,
87
+ "precision": 0.8,
88
  "recall": 0.8,
89
  "support": 5.0
90
  },
91
  "product_discovery": {
92
+ "f1-score": 0.9565217391304348,
93
+ "precision": 0.9166666666666666,
94
+ "recall": 1.0,
95
  "support": 11.0
96
  },
97
  "provider_selection": {
 
107
  "support": 2.0
108
  },
109
  "signup": {
110
+ "f1-score": 0.6666666666666666,
111
+ "precision": 0.5,
112
  "recall": 1.0,
113
  "support": 2.0
114
  },
115
  "task_execution": {
116
+ "f1-score": 0.9411764705882353,
117
+ "precision": 0.8888888888888888,
118
  "recall": 1.0,
119
  "support": 8.0
120
  },
 
125
  "support": 1.0
126
  },
127
  "weighted avg": {
128
+ "f1-score": 0.8443893861892583,
129
+ "precision": 0.8649305555555555,
130
+ "recall": 0.8625,
131
  "support": 80.0
132
  }
133
  },
artifacts/evaluation/latest/intent_type_hard_cases_report.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "accepted_accuracy": 1.0,
3
- "accepted_coverage": 0.9836,
4
  "accuracy": 1.0,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
  "dataset_path": "/content/agentic-intent-classifier/data/hard_cases.jsonl",
8
- "fallback_rate": 0.0164,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
11
  "per_class_metrics": {
 
1
  {
2
  "accepted_accuracy": 1.0,
3
+ "accepted_coverage": 1.0,
4
  "accuracy": 1.0,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_hard_cases_confusion_matrix.csv",
6
  "count": 61,
7
  "dataset_path": "/content/agentic-intent-classifier/data/hard_cases.jsonl",
8
+ "fallback_rate": 0.0,
9
  "head": "intent_type",
10
  "macro_f1": 1.0,
11
  "per_class_metrics": {
artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv CHANGED
@@ -1,11 +1,11 @@
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
- commercial,0,0,10,0,0,0,0,0,0,0
5
- transactional,0,0,0,8,0,0,0,0,0,0
6
  support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
- creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
  ambiguous,1,0,1,0,0,0,0,0,7,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
 
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
+ commercial,1,0,9,0,0,0,0,0,0,0
5
+ transactional,0,0,0,7,0,0,1,0,0,0
6
  support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
+ creative_generation,0,0,0,1,0,0,0,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
10
  ambiguous,1,0,1,0,0,0,0,0,7,0
11
  prohibited,0,0,0,0,0,0,0,0,0,1
artifacts/evaluation/latest/intent_type_test_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9362,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.9362,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv",
6
  "count": 47,
7
  "dataset_path": "/content/agentic-intent-classifier/data/test.jsonl",
8
- "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 0.9235,
11
  "per_class_metrics": {
12
- "accuracy": 0.9361702127659575,
13
  "ambiguous": {
14
  "f1-score": 0.875,
15
  "precision": 1.0,
@@ -23,15 +23,15 @@
23
  "support": 1.0
24
  },
25
  "commercial": {
26
- "f1-score": 0.9523809523809523,
27
- "precision": 0.9090909090909091,
28
- "recall": 1.0,
29
  "support": 10.0
30
  },
31
  "creative_generation": {
32
- "f1-score": 1.0,
33
- "precision": 1.0,
34
- "recall": 1.0,
35
  "support": 1.0
36
  },
37
  "exploratory": {
@@ -41,15 +41,15 @@
41
  "support": 1.0
42
  },
43
  "informational": {
44
- "f1-score": 0.9411764705882353,
45
- "precision": 0.8888888888888888,
46
  "recall": 1.0,
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
- "f1-score": 0.9235224089635853,
51
- "precision": 0.9297979797979797,
52
- "recall": 0.9444444444444444,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
@@ -71,15 +71,15 @@
71
  "support": 3.0
72
  },
73
  "transactional": {
74
- "f1-score": 1.0,
75
- "precision": 1.0,
76
- "recall": 1.0,
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 0.9360614458549377,
81
- "precision": 0.9511068128089405,
82
- "recall": 0.9361702127659575,
83
  "support": 47.0
84
  }
85
  },
 
1
  {
2
+ "accepted_accuracy": 0.8889,
3
+ "accepted_coverage": 0.9574,
4
+ "accuracy": 0.8723,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_test_confusion_matrix.csv",
6
  "count": 47,
7
  "dataset_path": "/content/agentic-intent-classifier/data/test.jsonl",
8
+ "fallback_rate": 0.0426,
9
  "head": "intent_type",
10
+ "macro_f1": 0.8006,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8723404255319149,
13
  "ambiguous": {
14
  "f1-score": 0.875,
15
  "precision": 1.0,
 
23
  "support": 1.0
24
  },
25
  "commercial": {
26
+ "f1-score": 0.9,
27
+ "precision": 0.9,
28
+ "recall": 0.9,
29
  "support": 10.0
30
  },
31
  "creative_generation": {
32
+ "f1-score": 0.0,
33
+ "precision": 0.0,
34
+ "recall": 0.0,
35
  "support": 1.0
36
  },
37
  "exploratory": {
 
41
  "support": 1.0
42
  },
43
  "informational": {
44
+ "f1-score": 0.8888888888888888,
45
+ "precision": 0.8,
46
  "recall": 1.0,
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.8005555555555555,
51
+ "precision": 0.8074999999999999,
52
+ "recall": 0.8219444444444445,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
 
71
  "support": 3.0
72
  },
73
  "transactional": {
74
+ "f1-score": 0.875,
75
+ "precision": 0.875,
76
+ "recall": 0.875,
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.8734633569739952,
81
+ "precision": 0.8914893617021277,
82
+ "recall": 0.8723404255319149,
83
  "support": 47.0
84
  }
85
  },
artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv CHANGED
@@ -1,7 +1,7 @@
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,0,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
- commercial,0,0,12,0,0,0,0,0,0,0
5
  transactional,0,0,0,0,0,0,0,0,0,0
6
  support,0,0,0,0,0,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,0,0,0,0,0
 
1
  ,informational,exploratory,commercial,transactional,support,personal_reflection,creative_generation,chit_chat,ambiguous,prohibited
2
  informational,0,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
+ commercial,1,0,11,0,0,0,0,0,0,0
5
  transactional,0,0,0,0,0,0,0,0,0,0
6
  support,0,0,0,0,0,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,0,0,0,0,0
artifacts/evaluation/latest/intent_type_third_wave_cases_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.8846,
3
  "accepted_coverage": 1.0,
4
- "accuracy": 0.8846,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv",
6
  "count": 26,
7
  "dataset_path": "/content/agentic-intent-classifier/data/third_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 0.8209,
11
  "per_class_metrics": {
12
- "accuracy": 0.8846153846153846,
13
  "ambiguous": {
14
  "f1-score": 0.8235294117647058,
15
  "precision": 1.0,
@@ -23,9 +23,9 @@
23
  "support": 1.0
24
  },
25
  "commercial": {
26
- "f1-score": 0.9230769230769231,
27
- "precision": 0.8571428571428571,
28
- "recall": 1.0,
29
  "support": 12.0
30
  },
31
  "creative_generation": {
@@ -47,9 +47,9 @@
47
  "support": 0.0
48
  },
49
  "macro avg": {
50
- "f1-score": 0.5746606334841629,
51
- "precision": 0.5857142857142857,
52
- "recall": 0.5700000000000001,
53
  "support": 26.0
54
  },
55
  "personal_reflection": {
@@ -77,9 +77,9 @@
77
  "support": 0.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 0.8966237382526975,
81
- "precision": 0.9340659340659341,
82
- "recall": 0.8846153846153846,
83
  "support": 26.0
84
  }
85
  },
 
1
  {
2
+ "accepted_accuracy": 0.8462,
3
  "accepted_coverage": 1.0,
4
+ "accuracy": 0.8462,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_third_wave_cases_confusion_matrix.csv",
6
  "count": 26,
7
  "dataset_path": "/content/agentic-intent-classifier/data/third_wave_cases.jsonl",
8
  "fallback_rate": 0.0,
9
  "head": "intent_type",
10
+ "macro_f1": 0.8148,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8461538461538461,
13
  "ambiguous": {
14
  "f1-score": 0.8235294117647058,
15
  "precision": 1.0,
 
23
  "support": 1.0
24
  },
25
  "commercial": {
26
+ "f1-score": 0.88,
27
+ "precision": 0.8461538461538461,
28
+ "recall": 0.9166666666666666,
29
  "support": 12.0
30
  },
31
  "creative_generation": {
 
47
  "support": 0.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.5703529411764705,
51
+ "precision": 0.5846153846153846,
52
+ "recall": 0.5616666666666666,
53
  "support": 26.0
54
  },
55
  "personal_reflection": {
 
77
  "support": 0.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.8767420814479638,
81
+ "precision": 0.9289940828402367,
82
+ "recall": 0.8461538461538461,
83
  "support": 26.0
84
  }
85
  },
artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv CHANGED
@@ -7,5 +7,5 @@ support,0,0,0,0,10,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,20,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,5,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,5,0,0
10
- ambiguous,0,0,0,0,0,0,0,0,31,0
11
  prohibited,0,0,0,0,0,0,0,0,0,5
 
7
  personal_reflection,0,0,0,0,0,20,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,5,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,5,0,0
10
+ ambiguous,0,0,0,0,1,0,0,0,30,0
11
  prohibited,0,0,0,0,0,0,0,0,0,5
artifacts/evaluation/latest/intent_type_train_report.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "accepted_accuracy": 1.0,
3
- "accepted_coverage": 0.9945,
4
- "accuracy": 1.0,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv",
6
  "count": 183,
7
  "dataset_path": "/content/agentic-intent-classifier/data/train.jsonl",
8
- "fallback_rate": 0.0055,
9
  "head": "intent_type",
10
- "macro_f1": 1.0,
11
  "per_class_metrics": {
12
- "accuracy": 1.0,
13
  "ambiguous": {
14
- "f1-score": 1.0,
15
  "precision": 1.0,
16
- "recall": 1.0,
17
  "support": 31.0
18
  },
19
  "chit_chat": {
@@ -47,9 +47,9 @@
47
  "support": 38.0
48
  },
49
  "macro avg": {
50
- "f1-score": 1.0,
51
- "precision": 1.0,
52
- "recall": 1.0,
53
  "support": 183.0
54
  },
55
  "personal_reflection": {
@@ -65,8 +65,8 @@
65
  "support": 5.0
66
  },
67
  "support": {
68
- "f1-score": 1.0,
69
- "precision": 1.0,
70
  "recall": 1.0,
71
  "support": 10.0
72
  },
@@ -77,9 +77,9 @@
77
  "support": 28.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 1.0,
81
- "precision": 1.0,
82
- "recall": 1.0,
83
  "support": 183.0
84
  }
85
  },
 
1
  {
2
+ "accepted_accuracy": 0.9945,
3
+ "accepted_coverage": 1.0,
4
+ "accuracy": 0.9945,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_train_confusion_matrix.csv",
6
  "count": 183,
7
  "dataset_path": "/content/agentic-intent-classifier/data/train.jsonl",
8
+ "fallback_rate": 0.0,
9
  "head": "intent_type",
10
+ "macro_f1": 0.9936,
11
  "per_class_metrics": {
12
+ "accuracy": 0.994535519125683,
13
  "ambiguous": {
14
+ "f1-score": 0.9836065573770492,
15
  "precision": 1.0,
16
+ "recall": 0.967741935483871,
17
  "support": 31.0
18
  },
19
  "chit_chat": {
 
47
  "support": 38.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.9935987509758002,
51
+ "precision": 0.990909090909091,
52
+ "recall": 0.9967741935483871,
53
  "support": 183.0
54
  },
55
  "personal_reflection": {
 
65
  "support": 5.0
66
  },
67
  "support": {
68
+ "f1-score": 0.9523809523809523,
69
+ "precision": 0.9090909090909091,
70
  "recall": 1.0,
71
  "support": 10.0
72
  },
 
77
  "support": 28.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.9946208349863281,
81
+ "precision": 0.9950322901142573,
82
+ "recall": 0.994535519125683,
83
  "support": 183.0
84
  }
85
  },
artifacts/evaluation/latest/intent_type_val_confusion_matrix.csv CHANGED
@@ -2,8 +2,8 @@
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
  commercial,0,1,9,0,0,0,0,0,0,0
5
- transactional,0,0,0,7,0,0,1,0,0,0
6
- support,0,0,0,0,3,0,0,0,0,0
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
 
2
  informational,8,0,0,0,0,0,0,0,0,0
3
  exploratory,0,1,0,0,0,0,0,0,0,0
4
  commercial,0,1,9,0,0,0,0,0,0,0
5
+ transactional,0,0,0,5,0,0,3,0,0,0
6
+ support,0,0,0,0,2,0,0,0,0,1
7
  personal_reflection,0,0,0,0,0,5,0,0,0,0
8
  creative_generation,0,0,0,0,0,0,1,0,0,0
9
  chit_chat,0,0,0,0,0,0,0,1,0,0
artifacts/evaluation/latest/intent_type_val_report.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "accepted_accuracy": 0.9362,
3
- "accepted_coverage": 1.0,
4
- "accuracy": 0.9362,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_val_confusion_matrix.csv",
6
  "count": 47,
7
  "dataset_path": "/content/agentic-intent-classifier/data/val.jsonl",
8
- "fallback_rate": 0.0,
9
  "head": "intent_type",
10
- "macro_f1": 0.9108,
11
  "per_class_metrics": {
12
- "accuracy": 0.9361702127659575,
13
  "ambiguous": {
14
  "f1-score": 0.9411764705882353,
15
  "precision": 1.0,
@@ -29,8 +29,8 @@
29
  "support": 10.0
30
  },
31
  "creative_generation": {
32
- "f1-score": 0.6666666666666666,
33
- "precision": 0.5,
34
  "recall": 1.0,
35
  "support": 1.0
36
  },
@@ -47,9 +47,9 @@
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
- "f1-score": 0.9107843137254902,
51
- "precision": 0.89,
52
- "recall": 0.966388888888889,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
@@ -59,27 +59,27 @@
59
  "support": 5.0
60
  },
61
  "prohibited": {
62
- "f1-score": 1.0,
63
- "precision": 1.0,
64
  "recall": 1.0,
65
  "support": 1.0
66
  },
67
  "support": {
68
- "f1-score": 1.0,
69
  "precision": 1.0,
70
- "recall": 1.0,
71
  "support": 3.0
72
  },
73
  "transactional": {
74
- "f1-score": 0.9333333333333333,
75
  "precision": 1.0,
76
- "recall": 0.875,
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
- "f1-score": 0.9419274092615769,
81
- "precision": 0.9574468085106383,
82
- "recall": 0.9361702127659575,
83
  "support": 47.0
84
  }
85
  },
 
1
  {
2
+ "accepted_accuracy": 0.8913,
3
+ "accepted_coverage": 0.9787,
4
+ "accuracy": 0.8723,
5
  "confusion_matrix_path": "/content/agentic-intent-classifier/artifacts/evaluation/latest/intent_type_val_confusion_matrix.csv",
6
  "count": 47,
7
  "dataset_path": "/content/agentic-intent-classifier/data/val.jsonl",
8
+ "fallback_rate": 0.0213,
9
  "head": "intent_type",
10
+ "macro_f1": 0.8144,
11
  "per_class_metrics": {
12
+ "accuracy": 0.8723404255319149,
13
  "ambiguous": {
14
  "f1-score": 0.9411764705882353,
15
  "precision": 1.0,
 
29
  "support": 10.0
30
  },
31
  "creative_generation": {
32
+ "f1-score": 0.4,
33
+ "precision": 0.25,
34
  "recall": 1.0,
35
  "support": 1.0
36
  },
 
47
  "support": 8.0
48
  },
49
  "macro avg": {
50
+ "f1-score": 0.8143740573152337,
51
+ "precision": 0.8150000000000001,
52
+ "recall": 0.9080555555555556,
53
  "support": 47.0
54
  },
55
  "personal_reflection": {
 
59
  "support": 5.0
60
  },
61
  "prohibited": {
62
+ "f1-score": 0.6666666666666666,
63
+ "precision": 0.5,
64
  "recall": 1.0,
65
  "support": 1.0
66
  },
67
  "support": {
68
+ "f1-score": 0.8,
69
  "precision": 1.0,
70
+ "recall": 0.6666666666666666,
71
  "support": 3.0
72
  },
73
  "transactional": {
74
+ "f1-score": 0.7692307692307693,
75
  "precision": 1.0,
76
+ "recall": 0.625,
77
  "support": 8.0
78
  },
79
  "weighted avg": {
80
+ "f1-score": 0.8884631430313531,
81
+ "precision": 0.9414893617021277,
82
+ "recall": 0.8723404255319149,
83
  "support": 47.0
84
  }
85
  },
artifacts/evaluation/latest/summary.json CHANGED
The diff for this file is too large to render. See raw diff
 
iab_classifier_model_output/train_metrics.json CHANGED
@@ -4,22 +4,22 @@
4
  "test_count": 3282,
5
  "test_metrics": {
6
  "epoch": 3.0,
7
- "test_accuracy": 0.9439366240097502,
8
- "test_loss": 1.8114978075027466,
9
- "test_macro_f1": 0.9150587607646185,
10
- "test_runtime": 9.356,
11
- "test_samples_per_second": 350.793,
12
- "test_steps_per_second": 22.018
13
  },
14
  "train_count": 13211,
15
  "val_count": 3282,
16
  "val_metrics": {
17
  "epoch": 3.0,
18
- "val_accuracy": 0.9485070079219988,
19
- "val_loss": 1.8056248426437378,
20
- "val_macro_f1": 0.9206922853424929,
21
- "val_runtime": 9.3831,
22
- "val_samples_per_second": 349.776,
23
- "val_steps_per_second": 21.954
24
  }
25
  }
 
4
  "test_count": 3282,
5
  "test_metrics": {
6
  "epoch": 3.0,
7
+ "test_accuracy": 0.9320536258379037,
8
+ "test_loss": 2.1225109100341797,
9
+ "test_macro_f1": 0.8928275998599801,
10
+ "test_runtime": 11.2269,
11
+ "test_samples_per_second": 292.333,
12
+ "test_steps_per_second": 18.349
13
  },
14
  "train_count": 13211,
15
  "val_count": 3282,
16
  "val_metrics": {
17
  "epoch": 3.0,
18
+ "val_accuracy": 0.9320536258379037,
19
+ "val_loss": 2.119333505630493,
20
+ "val_macro_f1": 0.8965787473982275,
21
+ "val_runtime": 11.1915,
22
+ "val_samples_per_second": 293.257,
23
+ "val_steps_per_second": 18.407
24
  }
25
  }