{ "by_status": { "must_fix": { "failed": 0, "passed": 12, "total": 12 } }, "cases_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/iab_mapping_cases.json", "count": 12, "failed": 0, "passed": 12, "results": [ { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Automotive", "model_output.classification.iab_content.tier2.label": "Auto Type" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Automotive", "model_output.classification.iab_content.tier2.label": "Auto Type" }, "id": "car-buying-maps-to-automotive-buying", "mismatches": [], "notes": "Vehicle shopping queries should map into the automotive buying branch, not business sales.", "pass": true, "status": "must_fix", "text": "Which car to buy in 2026" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops" }, "id": "laptop-buying-maps-to-laptops", "mismatches": [], "notes": "Laptop shopping should resolve into the laptops branch, not business sales.", "pass": true, "status": "must_fix", "text": "Which laptop to buy in 2026" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops" }, "id": "labtop-buying-maps-to-laptops", "mismatches": [], "notes": "Common typo handling should still land in the laptops branch.", "pass": true, "status": "must_fix", "text": "Which labtop to buy in 2026" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Software and Applications" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Software and Applications" }, "id": "crm-awareness-maps-to-sales", "mismatches": [], "notes": "CRM education should resolve to the closest business/sales path, not generic software.", "pass": true, "status": "must_fix", "text": "What is CRM software?" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Internet" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Internet" }, "id": "crm-comparison-maps-to-sales", "mismatches": [], "notes": "Direct CRM vendor comparison should map cleanly into the sales domain.", "pass": true, "status": "must_fix", "text": "HubSpot vs Zoho for a small team" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Internet" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Internet" }, "id": "marketing-tools-map-to-marketing", "mismatches": [], "notes": "Marketing tool discovery should map to the marketing and advertising branch.", "pass": true, "status": "must_fix", "text": "Best AI SEO tools for content teams" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing" }, "id": "ml-explanation-maps-to-ai", "mismatches": [], "notes": "ML and NLP educational prompts should land in the AI branch.", "pass": true, "status": "must_fix", "text": "What is intent classification in NLP?" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Internet" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Internet" }, "id": "support-credential-help-maps-to-business-it", "mismatches": [], "notes": "Credential and account help should map to business IT rather than generic business.", "pass": true, "status": "must_fix", "text": "How do I reset my password?" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Food & Drink", "model_output.classification.iab_content.tier2.label": "Dining Out" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Food & Drink", "model_output.classification.iab_content.tier2.label": "Dining Out" }, "id": "restaurant-booking-maps-to-dining-out", "mismatches": [], "notes": "Generic dining requests should not inherit the repo's business default.", "pass": true, "status": "must_fix", "text": "Book a table for 2 tonight" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Hobbies & Interests", "model_output.classification.iab_content.tier2.label": "Content Production", "model_output.classification.iab_content.tier3.label": "Freelance Writing" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Hobbies & Interests", "model_output.classification.iab_content.tier2.label": "Content Production", "model_output.classification.iab_content.tier3.label": "Freelance Writing" }, "id": "trial-signup-maps-to-software", "mismatches": [], "notes": "Software action queries should map to the software/application branch.", "pass": true, "status": "must_fix", "text": "Start my free trial" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Software and Applications", "model_output.classification.iab_content.tier4.label": "Communication" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Software and Applications", "model_output.classification.iab_content.tier4.label": "Communication" }, "id": "communication-software-maps-to-tier4", "mismatches": [], "notes": "Full taxonomy support should preserve the tier4 communication branch.", "pass": true, "status": "must_fix", "text": "best communication software for remote teams" }, { "actual": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Food & Drink" }, "expected": { "model_output.classification.iab_content.mapping_mode": "nearest_equivalent", "model_output.classification.iab_content.tier1.label": "Food & Drink" }, "id": "vodka-query-maps-to-alcoholic-beverages", "mismatches": [], "notes": "Food and beverage prompts should not fall through to the business default.", "pass": true, "status": "must_fix", "text": "what is best vodka drink should i try" } ] }