[ { "id": "auto-buying-query-allowed", "status": "must_fix", "text": "Which car to buy in 2026", "notes": "High-intent automotive shopping should clear as monetizable instead of failing safe.", "expected": { "model_output.classification.iab_content.tier1.label": "Automotive", "model_output.classification.iab_content.tier2.label": "Auto Buying and Selling", "model_output.classification.intent.type": "commercial", "system_decision.policy.monetization_eligibility": "allowed" } }, { "id": "laptop-buying-query-allowed", "status": "must_fix", "text": "Which laptop to buy in 2026", "notes": "High-intent laptop shopping should clear as monetizable instead of failing safe.", "expected": { "model_output.classification.iab_content.tier1.label": "Technology & Computing", "model_output.classification.iab_content.tier2.label": "Computing", "model_output.classification.iab_content.tier3.label": "Laptops", "model_output.classification.intent.type": "commercial", "system_decision.policy.monetization_eligibility": "allowed" } }, { "id": "commercial-comparison-clean-pass", "status": "must_fix", "text": "HubSpot vs Zoho for a small team", "notes": "Clear comparison intent should now pass without fallback and expose a comparison opportunity.", "expected": { "model_output.classification.intent.type": "commercial", "model_output.classification.intent.subtype": "comparison", "model_output.classification.intent.decision_phase": "consideration", "model_output.fallback": null, "system_decision.policy.monetization_eligibility": "allowed_with_caution", "system_decision.opportunity.type": "comparison_slot", "system_decision.opportunity.strength": "high" } }, { "id": "provider-selection-allowed", "status": "must_fix", "text": "Which CRM should I buy for a 3-person startup?", "notes": "Subtype and phase should now rescue a buying query even when the intent head remains conservative.", "expected": { "model_output.classification.intent.subtype": "provider_selection", "model_output.classification.intent.decision_phase": "decision", "model_output.fallback": null, "system_decision.policy.monetization_eligibility": "allowed", "system_decision.opportunity.type": "decision_moment", "system_decision.opportunity.strength": "high" } }, { "id": "education-query-restricted", "status": "must_fix", "text": "What is CRM software?", "notes": "Clear educational prompts should stay out of monetizable pathways.", "expected": { "model_output.classification.intent.type": "informational", "model_output.classification.intent.subtype": "education", "model_output.classification.intent.decision_phase": "awareness", "model_output.fallback": null, "system_decision.policy.monetization_eligibility": "restricted", "system_decision.opportunity.type": "none" } }, { "id": "ambiguous-follow-up-safe-block", "status": "must_fix", "text": "Tell me more", "notes": "Short follow-ups should still fail safe instead of leaking into commercial handling.", "expected": { "model_output.classification.intent.type": "ambiguous", "model_output.classification.intent.subtype": "follow_up", "model_output.fallback.reason": "ambiguous_query", "system_decision.policy.monetization_eligibility": "not_allowed", "system_decision.opportunity.type": "none" } }, { "id": "support-query-policy-block", "status": "must_fix", "text": "I cannot log into my account", "notes": "Support traffic must stay blocked even if the subtype head is still imperfect.", "expected": { "model_output.classification.intent.type": "support", "model_output.classification.intent.decision_phase": "support", "model_output.fallback.reason": "policy_default", "system_decision.policy.monetization_eligibility": "not_allowed", "system_decision.opportunity.type": "none" } }, { "id": "prohibited-query-policy-block", "status": "must_fix", "text": "How do I hack into someone else's account", "notes": "Unsafe queries should route into the prohibited intent type and stay blocked.", "expected": { "model_output.classification.intent.type": "prohibited", "model_output.fallback.reason": "policy_default", "system_decision.policy.monetization_eligibility": "not_allowed", "system_decision.opportunity.type": "none" } }, { "id": "signup-action-still-over-fallbacks", "status": "acceptable_weakness", "text": "Start my free trial", "notes": "The subtype head recognizes signup, but the combined action path still falls back on low confidence.", "expected": { "model_output.classification.intent.subtype": "signup", "model_output.fallback.reason": "confidence_below_threshold", "system_decision.policy.monetization_eligibility": "not_allowed" } }, { "id": "price-seeking-underclassified", "status": "acceptable_weakness", "text": "What costs less HubSpot or Zoho?", "notes": "Price-seeking prompts still underclassify and should remain visible until the next data pass.", "expected": { "model_output.classification.intent.subtype": "education", "model_output.classification.intent.decision_phase": "awareness", "system_decision.policy.monetization_eligibility": "restricted", "system_decision.opportunity.type": "none" } }, { "id": "support-subtype-account-help", "status": "must_fix", "text": "I cannot log into my account", "notes": "Login-help prompts should land in the account-help subtype instead of reflection-style labels.", "expected": { "model_output.classification.intent.subtype": "account_help", "system_decision.policy.monetization_eligibility": "not_allowed" } }, { "id": "discovery-subtype-shortlist", "status": "must_fix", "text": "What project management tools should a remote ops team shortlist?", "notes": "Shortlist-building queries should stay in product discovery instead of drifting into fit-evaluation labels.", "expected": { "model_output.classification.intent.subtype": "product_discovery", "model_output.classification.intent.decision_phase": "consideration" } }, { "id": "evaluation-subtype-fit-check", "status": "must_fix", "text": "Would ClickUp be a good fit for a remote ops team?", "notes": "Single-vendor fit checks should map to evaluation rather than broad discovery.", "expected": { "model_output.classification.intent.subtype": "evaluation", "model_output.classification.intent.decision_phase": "consideration" } }, { "id": "comparison-vs-provider-selection-boundary", "status": "must_fix", "text": "Compare HubSpot and Pipedrive for a 5-person sales team", "notes": "Side-by-side comparison language should not be upgraded into provider selection.", "expected": { "model_output.classification.intent.subtype": "comparison", "model_output.classification.intent.decision_phase": "consideration", "system_decision.opportunity.type": "comparison_slot" } }, { "id": "signup-vs-account-help-boundary", "status": "must_fix", "text": "Create a new trial account for our sales team", "notes": "New-account requests should stay in signup instead of leaking into account-help support labels.", "expected": { "model_output.classification.intent.subtype": "signup", "model_output.classification.intent.decision_phase": "action" } }, { "id": "booking-vs-contact-sales-boundary", "status": "must_fix", "text": "Have a sales rep contact me about enterprise pricing", "notes": "Rep outreach requests should stay in contact-sales rather than the booking/demo bucket.", "expected": { "model_output.classification.intent.subtype": "contact_sales", "model_output.classification.intent.decision_phase": "action" } }, { "id": "task-vs-onboarding-boundary", "status": "must_fix", "text": "Export the weekly pipeline report for me", "notes": "Single in-product workflow requests should stay in task execution rather than onboarding setup.", "expected": { "model_output.classification.intent.subtype": "task_execution", "model_output.classification.intent.decision_phase": "action" } } ]