| { |
| "by_status": { |
| "must_fix": { |
| "failed": 9, |
| "passed": 3, |
| "total": 12 |
| } |
| }, |
| "cases_path": "/content/agentic-intent-classifier/examples/iab_mapping_cases.json", |
| "count": 12, |
| "failed": 9, |
| "passed": 3, |
| "results": [ |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Automotive", |
| "model_output.classification.iab_content.tier2.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Automotive", |
| "model_output.classification.iab_content.tier2.label": "Auto Buying and Selling" |
| }, |
| "id": "car-buying-maps-to-automotive-buying", |
| "mismatches": [ |
| { |
| "actual": null, |
| "expected": "Auto Buying and Selling", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": "nearest_equivalent", |
| "expected": "exact", |
| "path": "model_output.classification.iab_content.mapping_mode" |
| } |
| ], |
| "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "Which car to buy in 2026" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": "Computing", |
| "model_output.classification.iab_content.tier3.label": "Laptops" |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": "Computing", |
| "model_output.classification.iab_content.tier3.label": "Laptops" |
| }, |
| "id": "laptop-buying-maps-to-laptops", |
| "mismatches": [], |
| "notes": "Laptop shopping should resolve into the laptops branch, not business sales.", |
| "pass": true, |
| "status": "must_fix", |
| "text": "Which laptop to buy in 2026" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": null, |
| "model_output.classification.iab_content.tier3.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": "Computing", |
| "model_output.classification.iab_content.tier3.label": "Laptops" |
| }, |
| "id": "labtop-buying-maps-to-laptops", |
| "mismatches": [ |
| { |
| "actual": null, |
| "expected": "Computing", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Laptops", |
| "path": "model_output.classification.iab_content.tier3.label" |
| } |
| ], |
| "notes": "Common typo handling should still land in the laptops branch.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "Which labtop to buy in 2026" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": null, |
| "model_output.classification.iab_content.tier3.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Business and Finance", |
| "model_output.classification.iab_content.tier2.label": "Business", |
| "model_output.classification.iab_content.tier3.label": "Sales" |
| }, |
| "id": "crm-awareness-maps-to-sales", |
| "mismatches": [ |
| { |
| "actual": "Technology & Computing", |
| "expected": "Business and Finance", |
| "path": "model_output.classification.iab_content.tier1.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Business", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Sales", |
| "path": "model_output.classification.iab_content.tier3.label" |
| }, |
| { |
| "actual": "exact", |
| "expected": "nearest_equivalent", |
| "path": "model_output.classification.iab_content.mapping_mode" |
| } |
| ], |
| "notes": "CRM education should resolve to the closest business/sales path, not generic software.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "What is CRM software?" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": null, |
| "model_output.classification.iab_content.tier3.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Business and Finance", |
| "model_output.classification.iab_content.tier2.label": "Business", |
| "model_output.classification.iab_content.tier3.label": "Sales" |
| }, |
| "id": "crm-comparison-maps-to-sales", |
| "mismatches": [ |
| { |
| "actual": "Technology & Computing", |
| "expected": "Business and Finance", |
| "path": "model_output.classification.iab_content.tier1.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Business", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Sales", |
| "path": "model_output.classification.iab_content.tier3.label" |
| }, |
| { |
| "actual": "nearest_equivalent", |
| "expected": "exact", |
| "path": "model_output.classification.iab_content.mapping_mode" |
| } |
| ], |
| "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "HubSpot vs Zoho for a small team" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Careers", |
| "model_output.classification.iab_content.tier2.label": null, |
| "model_output.classification.iab_content.tier3.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Business and Finance", |
| "model_output.classification.iab_content.tier2.label": "Business", |
| "model_output.classification.iab_content.tier3.label": "Marketing and Advertising" |
| }, |
| "id": "marketing-tools-map-to-marketing", |
| "mismatches": [ |
| { |
| "actual": "Careers", |
| "expected": "Business and Finance", |
| "path": "model_output.classification.iab_content.tier1.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Business", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Marketing and Advertising", |
| "path": "model_output.classification.iab_content.tier3.label" |
| }, |
| { |
| "actual": "nearest_equivalent", |
| "expected": "exact", |
| "path": "model_output.classification.iab_content.mapping_mode" |
| } |
| ], |
| "notes": "Marketing tool discovery should map to the marketing and advertising branch.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "Best AI SEO tools for content teams" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Real Estate", |
| "model_output.classification.iab_content.tier2.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": "Artificial Intelligence" |
| }, |
| "id": "ml-explanation-maps-to-ai", |
| "mismatches": [ |
| { |
| "actual": "Real Estate", |
| "expected": "Technology & Computing", |
| "path": "model_output.classification.iab_content.tier1.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Artificial Intelligence", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": "nearest_equivalent", |
| "expected": "exact", |
| "path": "model_output.classification.iab_content.mapping_mode" |
| } |
| ], |
| "notes": "ML and NLP educational prompts should land in the AI branch.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "What is intent classification in NLP?" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Personal Finance", |
| "model_output.classification.iab_content.tier2.label": null, |
| "model_output.classification.iab_content.tier3.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Business and Finance", |
| "model_output.classification.iab_content.tier2.label": "Business", |
| "model_output.classification.iab_content.tier3.label": "Business I.T." |
| }, |
| "id": "support-credential-help-maps-to-business-it", |
| "mismatches": [ |
| { |
| "actual": "Personal Finance", |
| "expected": "Business and Finance", |
| "path": "model_output.classification.iab_content.tier1.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Business", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Business I.T.", |
| "path": "model_output.classification.iab_content.tier3.label" |
| } |
| ], |
| "notes": "Credential and account help should map to business IT rather than generic business.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "How do I reset my password?" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Food & Drink", |
| "model_output.classification.iab_content.tier2.label": "Dining Out" |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Food & Drink", |
| "model_output.classification.iab_content.tier2.label": "Dining Out" |
| }, |
| "id": "restaurant-booking-maps-to-dining-out", |
| "mismatches": [], |
| "notes": "Generic dining requests should not inherit the repo's business default.", |
| "pass": true, |
| "status": "must_fix", |
| "text": "Book a table for 2 tonight" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Sports", |
| "model_output.classification.iab_content.tier2.label": null, |
| "model_output.classification.iab_content.tier3.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": "Computing", |
| "model_output.classification.iab_content.tier3.label": "Software and Applications" |
| }, |
| "id": "trial-signup-maps-to-software", |
| "mismatches": [ |
| { |
| "actual": "Sports", |
| "expected": "Technology & Computing", |
| "path": "model_output.classification.iab_content.tier1.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Computing", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Software and Applications", |
| "path": "model_output.classification.iab_content.tier3.label" |
| } |
| ], |
| "notes": "Software action queries should map to the software/application branch.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "Start my free trial" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Careers", |
| "model_output.classification.iab_content.tier2.label": "Remote Working", |
| "model_output.classification.iab_content.tier3.label": null, |
| "model_output.classification.iab_content.tier4.label": null |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Technology & Computing", |
| "model_output.classification.iab_content.tier2.label": "Computing", |
| "model_output.classification.iab_content.tier3.label": "Computer Software and Applications", |
| "model_output.classification.iab_content.tier4.label": "Communication" |
| }, |
| "id": "communication-software-maps-to-tier4", |
| "mismatches": [ |
| { |
| "actual": "Careers", |
| "expected": "Technology & Computing", |
| "path": "model_output.classification.iab_content.tier1.label" |
| }, |
| { |
| "actual": "Remote Working", |
| "expected": "Computing", |
| "path": "model_output.classification.iab_content.tier2.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Computer Software and Applications", |
| "path": "model_output.classification.iab_content.tier3.label" |
| }, |
| { |
| "actual": null, |
| "expected": "Communication", |
| "path": "model_output.classification.iab_content.tier4.label" |
| } |
| ], |
| "notes": "Full taxonomy support should preserve the tier4 communication branch.", |
| "pass": false, |
| "status": "must_fix", |
| "text": "best communication software for remote teams" |
| }, |
| { |
| "actual": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Food & Drink", |
| "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages" |
| }, |
| "expected": { |
| "model_output.classification.iab_content.mapping_mode": "exact", |
| "model_output.classification.iab_content.tier1.label": "Food & Drink", |
| "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages" |
| }, |
| "id": "vodka-query-maps-to-alcoholic-beverages", |
| "mismatches": [], |
| "notes": "Food and beverage prompts should not fall through to the business default.", |
| "pass": true, |
| "status": "must_fix", |
| "text": "what is best vodka drink should i try" |
| } |
| ] |
| } |
|
|