{ "by_status": { "must_fix": { "failed": 9, "passed": 3, "total": 12 } }, "cases_path": "/content/agentic-intent-classifier/examples/iab_mapping_cases.json", "count": 12, "failed": 9, "passed": 3, "results": [ { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Automotive", "model_output.classification.iab_content.tier2.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Automotive", "model_output.classification.iab_content.tier2.label": "Auto Buying and Selling" }, "id": "car-buying-maps-to-automotive-buying", "mismatches": [ { "actual": null, "expected": "Auto Buying and Selling", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": "nearest_equivalent", "expected": "exact", "path": "model_output.classification.iab_content.mapping_mode" } ], "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.", "pass": false, "status": "must_fix", "text": "Which car to buy in 2026" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops" }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops" }, "id": "laptop-buying-maps-to-laptops", "mismatches": [], "notes": "Laptop shopping should resolve into the laptops branch, not business sales.", "pass": true, "status": "must_fix", "text": "Which laptop to buy in 2026" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": null, "model_output.classification.iab_content.tier3.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops" }, "id": "labtop-buying-maps-to-laptops", "mismatches": [ { "actual": null, "expected": "Computing", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": null, "expected": "Laptops", "path": "model_output.classification.iab_content.tier3.label" } ], "notes": "Common typo handling should still land in the laptops branch.", "pass": false, "status": "must_fix", "text": "Which labtop to buy in 2026" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": null, "model_output.classification.iab_content.tier3.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Business and Finance", "model_output.classification.iab_content.tier2.label": "Business", "model_output.classification.iab_content.tier3.label": "Sales" }, "id": "crm-awareness-maps-to-sales", "mismatches": [ { "actual": "Technology & Computing", "expected": "Business and Finance", "path": "model_output.classification.iab_content.tier1.label" }, { "actual": null, "expected": "Business", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": null, "expected": "Sales", "path": "model_output.classification.iab_content.tier3.label" }, { "actual": "exact", "expected": "nearest_equivalent", "path": "model_output.classification.iab_content.mapping_mode" } ], "notes": "CRM education should resolve to the closest business/sales path, not generic software.", "pass": false, "status": "must_fix", "text": "What is CRM software?" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": null, "model_output.classification.iab_content.tier3.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Business and Finance", "model_output.classification.iab_content.tier2.label": "Business", "model_output.classification.iab_content.tier3.label": "Sales" }, "id": "crm-comparison-maps-to-sales", "mismatches": [ { "actual": "Technology & Computing", "expected": "Business and Finance", "path": "model_output.classification.iab_content.tier1.label" }, { "actual": null, "expected": "Business", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": null, "expected": "Sales", "path": "model_output.classification.iab_content.tier3.label" }, { "actual": "nearest_equivalent", "expected": "exact", "path": "model_output.classification.iab_content.mapping_mode" } ], "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.", "pass": false, "status": "must_fix", "text": "HubSpot vs Zoho for a small team" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Careers", "model_output.classification.iab_content.tier2.label": null, "model_output.classification.iab_content.tier3.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Business and Finance", "model_output.classification.iab_content.tier2.label": "Business", "model_output.classification.iab_content.tier3.label": "Marketing and Advertising" }, "id": "marketing-tools-map-to-marketing", "mismatches": [ { "actual": "Careers", "expected": "Business and Finance", "path": "model_output.classification.iab_content.tier1.label" }, { "actual": null, "expected": "Business", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": null, "expected": "Marketing and Advertising", "path": "model_output.classification.iab_content.tier3.label" }, { "actual": "nearest_equivalent", "expected": "exact", "path": "model_output.classification.iab_content.mapping_mode" } ], "notes": "Marketing tool discovery should map to the marketing and advertising branch.", "pass": false, "status": "must_fix", "text": "Best AI SEO tools for content teams" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Real Estate", "model_output.classification.iab_content.tier2.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Artificial Intelligence" }, "id": "ml-explanation-maps-to-ai", "mismatches": [ { "actual": "Real Estate", "expected": "Technology & Computing", "path": "model_output.classification.iab_content.tier1.label" }, { "actual": null, "expected": "Artificial Intelligence", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": "nearest_equivalent", "expected": "exact", "path": "model_output.classification.iab_content.mapping_mode" } ], "notes": "ML and NLP educational prompts should land in the AI branch.", "pass": false, "status": "must_fix", "text": "What is intent classification in NLP?" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Personal Finance", "model_output.classification.iab_content.tier2.label": null, "model_output.classification.iab_content.tier3.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Business and Finance", "model_output.classification.iab_content.tier2.label": "Business", "model_output.classification.iab_content.tier3.label": "Business I.T." }, "id": "support-credential-help-maps-to-business-it", "mismatches": [ { "actual": "Personal Finance", "expected": "Business and Finance", "path": "model_output.classification.iab_content.tier1.label" }, { "actual": null, "expected": "Business", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": null, "expected": "Business I.T.", "path": "model_output.classification.iab_content.tier3.label" } ], "notes": "Credential and account help should map to business IT rather than generic business.", "pass": false, "status": "must_fix", "text": "How do I reset my password?" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Food & Drink", "model_output.classification.iab_content.tier2.label": "Dining Out" }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Food & Drink", "model_output.classification.iab_content.tier2.label": "Dining Out" }, "id": "restaurant-booking-maps-to-dining-out", "mismatches": [], "notes": "Generic dining requests should not inherit the repo's business default.", "pass": true, "status": "must_fix", "text": "Book a table for 2 tonight" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Sports", "model_output.classification.iab_content.tier2.label": null, "model_output.classification.iab_content.tier3.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Software and Applications" }, "id": "trial-signup-maps-to-software", "mismatches": [ { "actual": "Sports", "expected": "Technology & Computing", "path": "model_output.classification.iab_content.tier1.label" }, { "actual": null, "expected": "Computing", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": null, "expected": "Software and Applications", "path": "model_output.classification.iab_content.tier3.label" } ], "notes": "Software action queries should map to the software/application branch.", "pass": false, "status": "must_fix", "text": "Start my free trial" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Careers", "model_output.classification.iab_content.tier2.label": "Remote Working", "model_output.classification.iab_content.tier3.label": null, "model_output.classification.iab_content.tier4.label": null }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Computer Software and Applications", "model_output.classification.iab_content.tier4.label": "Communication" }, "id": "communication-software-maps-to-tier4", "mismatches": [ { "actual": "Careers", "expected": "Technology & Computing", "path": "model_output.classification.iab_content.tier1.label" }, { "actual": "Remote Working", "expected": "Computing", "path": "model_output.classification.iab_content.tier2.label" }, { "actual": null, "expected": "Computer Software and Applications", "path": "model_output.classification.iab_content.tier3.label" }, { "actual": null, "expected": "Communication", "path": "model_output.classification.iab_content.tier4.label" } ], "notes": "Full taxonomy support should preserve the tier4 communication branch.", "pass": false, "status": "must_fix", "text": "best communication software for remote teams" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Food & Drink", "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages" }, "expected": { "model_output.classification.iab_content.mapping_mode": "exact", "model_output.classification.iab_content.tier1.label": "Food & Drink", "model_output.classification.iab_content.tier2.label": "Alcoholic Beverages" }, "id": "vodka-query-maps-to-alcoholic-beverages", "mismatches": [], "notes": "Food and beverage prompts should not fall through to the business default.", "pass": true, "status": "must_fix", "text": "what is best vodka drink should i try" } ] }