| { | |
| "benchmark_name": "Marketing Memory Routing Benchmark", | |
| "version": "1.0.0", | |
| "description": "50 challenging marketing scenarios for evaluating memory routing classification models. Covers brand positioning, strategic decisions, performance metrics, tools/integrations, user preferences, business priorities, and knowledge artifacts.", | |
| "num_scenarios": 50, | |
| "categories": [ | |
| "company.brand_core", | |
| "company.strategic_signatures", | |
| "company.knowledge_artifacts", | |
| "company.business_priorities", | |
| "company.tools_config", | |
| "company.performance_context", | |
| "user.communication_style", | |
| "user.strategic_approach", | |
| "user.role_context", | |
| "user.workflow_patterns", | |
| "user.session_history", | |
| "user.interaction_preferences", | |
| "none" | |
| ], | |
| "difficulty_distribution": { | |
| "easy": 14, | |
| "medium": 26, | |
| "hard": 10 | |
| }, | |
| "scenarios": [ | |
| { | |
| "id": "brand_1", | |
| "conversation": "USER: Our brand DNA is 'accessible luxury'. We're not Chanel, but we're definitely not Target either. Think Everlane meets Aritzia.\nASSISTANT: So premium quality at attainable price points, with a focus on transparency?\nUSER: Exactly. Every piece of content should feel elevated but not pretentious.", | |
| "expected_categories": ["company.brand_core"], | |
| "difficulty": "medium", | |
| "category_focus": "Brand positioning with comparisons" | |
| }, | |
| { | |
| "id": "brand_2", | |
| "conversation": "USER: We've been using 'Innovation for Everyone' as our tagline for 5 years. It's core to who we are.\nASSISTANT: That's a strong positioning statement. Does it extend to your visual identity too?\nUSER: Yes, our whole design system is built around democratizing technology.", | |
| "expected_categories": ["company.brand_core"], | |
| "difficulty": "easy", | |
| "category_focus": "Tagline and identity" | |
| }, | |
| { | |
| "id": "brand_3", | |
| "conversation": "USER: I hate when brands try too hard to be funny. Our voice should be warm and helpful, never sarcastic.\nASSISTANT: So authentic helpfulness over trendy humor?\nUSER: Right. We're the trusted friend, not the class clown.", | |
| "expected_categories": ["company.brand_core"], | |
| "difficulty": "medium", | |
| "category_focus": "Voice and tone definition" | |
| }, | |
| { | |
| "id": "brand_4", | |
| "conversation": "USER: Our competitors use fear tactics - 'Don't miss out!' We never do that. We believe in empowerment.\nASSISTANT: Positive framing over scarcity messaging.\nUSER: Always. Even our sales emails should feel like opportunities, not threats.", | |
| "expected_categories": ["company.brand_core", "company.strategic_signatures"], | |
| "difficulty": "hard", | |
| "category_focus": "Brand values + decision framework" | |
| }, | |
| { | |
| "id": "brand_5", | |
| "conversation": "USER: What font are we using for the Q4 campaign headers?\nASSISTANT: The brand guidelines specify Montserrat for headers.\nUSER: Thanks, just needed to confirm.", | |
| "expected_categories": ["none"], | |
| "difficulty": "easy", | |
| "category_focus": "Transactional brand query" | |
| }, | |
| { | |
| "id": "brand_6", | |
| "conversation": "USER: We position ourselves as the 'anti-agency agency'. No jargon, no BS, just results.\nASSISTANT: Transparency and authenticity as differentiators.\nUSER: And we back it up with our pricing model - no hidden fees, ever.", | |
| "expected_categories": ["company.brand_core"], | |
| "difficulty": "medium", | |
| "category_focus": "Agency positioning" | |
| }, | |
| { | |
| "id": "brand_7", | |
| "conversation": "USER: Our brand is built on three pillars: sustainability, transparency, and community.\nASSISTANT: Those are strong values. How do they manifest in your marketing?\nUSER: Every campaign must touch at least one pillar. It's non-negotiable.", | |
| "expected_categories": ["company.brand_core", "company.strategic_signatures"], | |
| "difficulty": "hard", | |
| "category_focus": "Brand pillars as decision framework" | |
| }, | |
| { | |
| "id": "brand_8", | |
| "conversation": "USER: We're rebranding next quarter but I can't share details yet.\nASSISTANT: Understood. Let me know when you're ready to discuss.\nUSER: Will do.", | |
| "expected_categories": ["none"], | |
| "difficulty": "medium", | |
| "category_focus": "Vague future reference" | |
| }, | |
| { | |
| "id": "strategy_1", | |
| "conversation": "USER: We never launch a campaign without at least 3 weeks of testing. It's our golden rule.\nASSISTANT: So testing is a non-negotiable gate in your process?\nUSER: Absolutely. Even if stakeholders push back, we hold the line.", | |
| "expected_categories": ["company.strategic_signatures"], | |
| "difficulty": "easy", | |
| "category_focus": "Decision framework" | |
| }, | |
| { | |
| "id": "strategy_2", | |
| "conversation": "USER: Our philosophy is 'measure twice, cut once'. We'd rather delay a launch than ship something half-baked.\nASSISTANT: Quality over speed.\nUSER: Every time. Our reputation depends on it.", | |
| "expected_categories": ["company.strategic_signatures"], | |
| "difficulty": "medium", | |
| "category_focus": "Strategic heuristic" | |
| }, | |
| { | |
| "id": "strategy_3", | |
| "conversation": "USER: When in doubt, we default to the customer's perspective. What would they want?\nASSISTANT: Customer-centric decision making.\nUSER: It's saved us from many internal-focused mistakes.", | |
| "expected_categories": ["company.strategic_signatures"], | |
| "difficulty": "medium", | |
| "category_focus": "Decision principle" | |
| }, | |
| { | |
| "id": "strategy_4", | |
| "conversation": "USER: I personally believe in testing everything. Data over gut instinct.\nASSISTANT: A data-driven approach.\nUSER: It's how I've always operated. Even small decisions get A/B tested.", | |
| "expected_categories": ["user.strategic_approach"], | |
| "difficulty": "medium", | |
| "category_focus": "Personal vs company strategy" | |
| }, | |
| { | |
| "id": "strategy_5", | |
| "conversation": "USER: The company prioritizes brand safety above all. But I think we're too conservative sometimes.\nASSISTANT: So there's tension between company policy and your personal view?\nUSER: Yes. I'd take more calculated risks if it were up to me.", | |
| "expected_categories": ["company.strategic_signatures", "user.strategic_approach"], | |
| "difficulty": "hard", | |
| "category_focus": "Company vs personal strategy conflict" | |
| }, | |
| { | |
| "id": "strategy_6", | |
| "conversation": "USER: We have a 'no surprises' policy with clients. They hear bad news from us first.\nASSISTANT: Proactive communication as a core principle.\nUSER: It's built trust with every client we've worked with.", | |
| "expected_categories": ["company.strategic_signatures"], | |
| "difficulty": "medium", | |
| "category_focus": "Client relationship principle" | |
| }, | |
| { | |
| "id": "strategy_7", | |
| "conversation": "USER: Should we go with vendor A or vendor B for the email platform?\nASSISTANT: What are the key differences you're weighing?\nUSER: Just cost really. They're pretty similar otherwise.", | |
| "expected_categories": ["none"], | |
| "difficulty": "easy", | |
| "category_focus": "Transactional decision" | |
| }, | |
| { | |
| "id": "strategy_8", | |
| "conversation": "USER: Our rule is simple: if we can't explain it to a 10-year-old, we don't publish it.\nASSISTANT: Clarity as a content standard.\nUSER: It keeps our messaging accessible to everyone.", | |
| "expected_categories": ["company.strategic_signatures", "company.brand_core"], | |
| "difficulty": "hard", | |
| "category_focus": "Content principle overlapping brand" | |
| }, | |
| { | |
| "id": "perf_1", | |
| "conversation": "USER: Last quarter's email campaign hit 42% open rate and 8% CTR. Best we've ever done.\nASSISTANT: Those are strong numbers. What drove the improvement?\nUSER: We switched to personalized subject lines based on past behavior.", | |
| "expected_categories": ["company.performance_context"], | |
| "difficulty": "easy", | |
| "category_focus": "Campaign metrics" | |
| }, | |
| { | |
| "id": "perf_2", | |
| "conversation": "USER: Our CAC has been climbing - $180 last month, up from $120 in Q1.\nASSISTANT: That's a significant increase. Any hypothesis on the cause?\nUSER: iOS privacy changes are killing our Facebook targeting.", | |
| "expected_categories": ["company.performance_context"], | |
| "difficulty": "medium", | |
| "category_focus": "Acquisition metrics with context" | |
| }, | |
| { | |
| "id": "perf_3", | |
| "conversation": "USER: The influencer campaign flopped. 2M impressions but only 50 conversions.\nASSISTANT: That's a 0.0025% conversion rate. What went wrong?\nUSER: Wrong audience. The influencer's followers weren't our target demo.", | |
| "expected_categories": ["company.performance_context"], | |
| "difficulty": "medium", | |
| "category_focus": "Campaign failure analysis" | |
| }, | |
| { | |
| "id": "perf_4", | |
| "conversation": "USER: We learned that video content outperforms static by 3x on engagement.\nASSISTANT: That's a significant insight. Are you shifting your content mix?\nUSER: Yes, Q4 will be 70% video. It's a major strategic shift.", | |
| "expected_categories": ["company.performance_context", "company.business_priorities"], | |
| "difficulty": "hard", | |
| "category_focus": "Learning leading to priority" | |
| }, | |
| { | |
| "id": "perf_5", | |
| "conversation": "USER: What was our email open rate last week?\nASSISTANT: I don't have access to that data in real-time.\nUSER: I'll check the dashboard.", | |
| "expected_categories": ["none"], | |
| "difficulty": "easy", | |
| "category_focus": "Data lookup request" | |
| }, | |
| { | |
| "id": "perf_6", | |
| "conversation": "USER: Our NPS dropped from 72 to 58 after the website redesign.\nASSISTANT: That's a significant drop. Customer feedback?\nUSER: They hate the new navigation. We're rolling back next week.", | |
| "expected_categories": ["company.performance_context"], | |
| "difficulty": "medium", | |
| "category_focus": "Customer satisfaction metrics" | |
| }, | |
| { | |
| "id": "perf_7", | |
| "conversation": "USER: The podcast is our best performing channel - 40% of qualified leads come from it.\nASSISTANT: That's impressive attribution. How are you tracking it?\nUSER: Custom UTM parameters and post-conversion surveys.", | |
| "expected_categories": ["company.performance_context"], | |
| "difficulty": "medium", | |
| "category_focus": "Channel performance" | |
| }, | |
| { | |
| "id": "perf_8", | |
| "conversation": "USER: We've tested 47 different landing page variants this year.\nASSISTANT: That's rigorous testing. What's been the biggest winner?\nUSER: Social proof above the fold. Increased conversions by 34%.", | |
| "expected_categories": ["company.performance_context", "company.knowledge_artifacts"], | |
| "difficulty": "hard", | |
| "category_focus": "Testing insights becoming knowledge" | |
| }, | |
| { | |
| "id": "tools_1", | |
| "conversation": "USER: We use HubSpot for CRM, Klaviyo for email, and Shopify for commerce. Everything needs to sync.\nASSISTANT: What's your current integration setup?\nUSER: Zapier connects them, but we're moving to native integrations.", | |
| "expected_categories": ["company.tools_config"], | |
| "difficulty": "easy", | |
| "category_focus": "Tech stack overview" | |
| }, | |
| { | |
| "id": "tools_2", | |
| "conversation": "USER: Our API key for the analytics platform is expiring next month.\nASSISTANT: I'll note that for renewal.\nUSER: Thanks. It's critical - everything breaks without it.", | |
| "expected_categories": ["company.tools_config"], | |
| "difficulty": "medium", | |
| "category_focus": "API configuration" | |
| }, | |
| { | |
| "id": "tools_3", | |
| "conversation": "USER: We just migrated from Mailchimp to Klaviyo. The segmentation is so much better.\nASSISTANT: What segments are you using?\nUSER: RFM-based: high-value, at-risk, and dormant customers.", | |
| "expected_categories": ["company.tools_config"], | |
| "difficulty": "medium", | |
| "category_focus": "Tool migration and usage" | |
| }, | |
| { | |
| "id": "tools_4", | |
| "conversation": "USER: Can you remind me how to export data from Google Analytics?\nASSISTANT: Go to Reports, select your date range, then click Export.\nUSER: Got it, thanks.", | |
| "expected_categories": ["none"], | |
| "difficulty": "easy", | |
| "category_focus": "How-to question" | |
| }, | |
| { | |
| "id": "tools_5", | |
| "conversation": "USER: Our Slack is integrated with everything - Asana, HubSpot, even our custom dashboards.\nASSISTANT: Centralized notifications.\nUSER: It's our command center. All marketing alerts go there.", | |
| "expected_categories": ["company.tools_config"], | |
| "difficulty": "medium", | |
| "category_focus": "Workflow integrations" | |
| }, | |
| { | |
| "id": "tools_6", | |
| "conversation": "USER: We're evaluating Notion vs Confluence for our marketing wiki.\nASSISTANT: What are your key requirements?\nUSER: Easy editing and good search. Our current docs are a mess.", | |
| "expected_categories": ["none"], | |
| "difficulty": "medium", | |
| "category_focus": "Tool evaluation (not decided)" | |
| }, | |
| { | |
| "id": "user_1", | |
| "conversation": "USER: I'm the VP of Growth, reporting to the CMO. I own all acquisition channels.\nASSISTANT: That's a broad scope. What's your team size?\nUSER: 12 people across paid, organic, and partnerships.", | |
| "expected_categories": ["user.role_context"], | |
| "difficulty": "easy", | |
| "category_focus": "Role and scope" | |
| }, | |
| { | |
| "id": "user_2", | |
| "conversation": "USER: I prefer bullet points over paragraphs. Get to the point quickly.\nASSISTANT: Noted. I'll keep responses concise and scannable.\nUSER: Perfect. Time is my scarcest resource.", | |
| "expected_categories": ["user.communication_style"], | |
| "difficulty": "easy", | |
| "category_focus": "Format preference" | |
| }, | |
| { | |
| "id": "user_3", | |
| "conversation": "USER: Don't sugarcoat things. If something's not working, tell me directly.\nASSISTANT: Direct feedback, even when it's uncomfortable.\nUSER: Especially when it's uncomfortable. That's when I need it most.", | |
| "expected_categories": ["user.interaction_preferences"], | |
| "difficulty": "medium", | |
| "category_focus": "Feedback style" | |
| }, | |
| { | |
| "id": "user_4", | |
| "conversation": "USER: I review all campaigns on Friday afternoons. That's when I have focus time.\nASSISTANT: I'll schedule campaign reviews for Fridays then.\nUSER: And send me a summary Thursday evening so I can prep.", | |
| "expected_categories": ["user.workflow_patterns"], | |
| "difficulty": "medium", | |
| "category_focus": "Review cadence" | |
| }, | |
| { | |
| "id": "user_5", | |
| "conversation": "USER: Yesterday we discussed the influencer strategy. Can you pull up those notes?\nASSISTANT: Yes, we covered micro-influencer targeting and budget allocation.\nUSER: Right. Let's continue from the budget discussion.", | |
| "expected_categories": ["user.session_history"], | |
| "difficulty": "easy", | |
| "category_focus": "Session continuity" | |
| }, | |
| { | |
| "id": "user_6", | |
| "conversation": "USER: I believe in 'strong opinions, loosely held'. I'll argue a point but change my mind with good data.\nASSISTANT: Intellectually flexible but not wishy-washy.\nUSER: Exactly. Challenge me when you think I'm wrong.", | |
| "expected_categories": ["user.strategic_approach", "user.interaction_preferences"], | |
| "difficulty": "hard", | |
| "category_focus": "Personal philosophy + interaction style" | |
| }, | |
| { | |
| "id": "user_7", | |
| "conversation": "USER: How's your day going?\nASSISTANT: Going well, thanks! How can I help you today?\nUSER: Just checking in before our meeting.", | |
| "expected_categories": ["none"], | |
| "difficulty": "easy", | |
| "category_focus": "Small talk" | |
| }, | |
| { | |
| "id": "user_8", | |
| "conversation": "USER: I'm a visual thinker. Show me charts and diagrams whenever possible.\nASSISTANT: I'll prioritize visual representations in my responses.\nUSER: And color-code things by priority if you can.", | |
| "expected_categories": ["user.communication_style"], | |
| "difficulty": "medium", | |
| "category_focus": "Visual preference" | |
| }, | |
| { | |
| "id": "user_9", | |
| "conversation": "USER: I've been in marketing for 15 years, so skip the basics.\nASSISTANT: I'll assume expert-level knowledge.\nUSER: Good. I hate when tools explain obvious concepts.", | |
| "expected_categories": ["user.interaction_preferences", "user.role_context"], | |
| "difficulty": "hard", | |
| "category_focus": "Experience level + interaction" | |
| }, | |
| { | |
| "id": "user_10", | |
| "conversation": "USER: My success metric is pipeline contribution. Everything else is vanity.\nASSISTANT: Revenue-focused measurement.\nUSER: If it doesn't move pipeline, I don't care about it.", | |
| "expected_categories": ["user.strategic_approach"], | |
| "difficulty": "medium", | |
| "category_focus": "Personal success definition" | |
| }, | |
| { | |
| "id": "priority_1", | |
| "conversation": "USER: Q4 is all about Black Friday and Cyber Monday. Everything else takes a back seat.\nASSISTANT: So BFCM is the top priority through year-end?\nUSER: 60% of our annual revenue happens in those two weeks.", | |
| "expected_categories": ["company.business_priorities"], | |
| "difficulty": "easy", | |
| "category_focus": "Seasonal priority" | |
| }, | |
| { | |
| "id": "priority_2", | |
| "conversation": "USER: We're launching a new product line next month. All marketing resources are shifting to support it.\nASSISTANT: What's the launch timeline?\nUSER: Soft launch March 1st, full campaign March 15th.", | |
| "expected_categories": ["company.business_priorities"], | |
| "difficulty": "medium", | |
| "category_focus": "Product launch priority" | |
| }, | |
| { | |
| "id": "priority_3", | |
| "conversation": "USER: The board wants us to focus on profitability over growth this year.\nASSISTANT: A shift from growth-at-all-costs.\nUSER: Yes. CAC payback under 12 months is now mandatory.", | |
| "expected_categories": ["company.business_priorities", "company.strategic_signatures"], | |
| "difficulty": "hard", | |
| "category_focus": "Strategic priority shift" | |
| }, | |
| { | |
| "id": "priority_4", | |
| "conversation": "USER: We need to hit 10,000 email subscribers by end of quarter.\nASSISTANT: What's the current count?\nUSER: 6,500. So we need about 120 new subs per day.", | |
| "expected_categories": ["company.business_priorities"], | |
| "difficulty": "medium", | |
| "category_focus": "Specific goal" | |
| }, | |
| { | |
| "id": "priority_5", | |
| "conversation": "USER: Retention is our #1 focus this year. We've been leaking customers.\nASSISTANT: Churn reduction over acquisition.\nUSER: We're pausing all acquisition campaigns until we fix the retention problem.", | |
| "expected_categories": ["company.business_priorities"], | |
| "difficulty": "medium", | |
| "category_focus": "Strategic focus" | |
| }, | |
| { | |
| "id": "priority_6", | |
| "conversation": "USER: What should we prioritize next quarter?\nASSISTANT: That depends on your goals. What's most important to the business?\nUSER: I'm not sure yet. Still figuring it out.", | |
| "expected_categories": ["none"], | |
| "difficulty": "medium", | |
| "category_focus": "Undefined priority" | |
| }, | |
| { | |
| "id": "knowledge_1", | |
| "conversation": "USER: We have a 50-page brand book that every agency partner must follow.\nASSISTANT: Comprehensive brand guidelines.\nUSER: It covers everything from logo usage to photography style.", | |
| "expected_categories": ["company.knowledge_artifacts"], | |
| "difficulty": "easy", | |
| "category_focus": "Brand documentation" | |
| }, | |
| { | |
| "id": "knowledge_2", | |
| "conversation": "USER: Our content playbook defines the exact process from ideation to publication.\nASSISTANT: A documented workflow.\nUSER: Every piece of content goes through the same 7-step process.", | |
| "expected_categories": ["company.knowledge_artifacts"], | |
| "difficulty": "medium", | |
| "category_focus": "Process documentation" | |
| }, | |
| { | |
| "id": "knowledge_3", | |
| "conversation": "USER: We have templates for every email type - welcome, abandoned cart, win-back, you name it.\nASSISTANT: A comprehensive email template library.\nUSER: It's saved us hundreds of hours. New team members can start producing immediately.", | |
| "expected_categories": ["company.knowledge_artifacts"], | |
| "difficulty": "medium", | |
| "category_focus": "Template library" | |
| }, | |
| { | |
| "id": "knowledge_4", | |
| "conversation": "USER: Our style guide says we never use exclamation marks in headlines.\nASSISTANT: A specific editorial rule.\nUSER: It's part of our understated brand voice.", | |
| "expected_categories": ["company.knowledge_artifacts", "company.brand_core"], | |
| "difficulty": "hard", | |
| "category_focus": "Style guide overlapping brand" | |
| } | |
| ] | |
| } | |