Spaces:
Sleeping
Sleeping
| """ | |
| SchemaShift EA Arena β Task Templates | |
| 12 tasks across 3 difficulty tiers with schema drift events. | |
| Each task simulates a real executive assistant workflow where | |
| APIs, forms, and policies change mid-episode. | |
| """ | |
| TASKS = [ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TIER 1: Simple (3-4 tool calls, 1 drift event) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| { | |
| "id": "reschedule_dinner", | |
| "title": "Reschedule dinner due to meeting conflict", | |
| "description": ( | |
| "Your VP moved the board prep meeting to 6:30 PM tonight. " | |
| "You have dinner with Alex at 7:00 PM at Lucia's. " | |
| "Reschedule dinner to 8:30 PM, update the restaurant booking, " | |
| "and email Alex about the change." | |
| ), | |
| "seed": { | |
| "calendar": [ | |
| {"id": 1, "title": "Board Prep", "time": "15:00", "attendees": ["vp@company.com"], "status": "scheduled"}, | |
| {"id": 2, "title": "Dinner with Alex", "time": "19:00", "location": "Lucia's", "attendees": ["alex@friends.com"], "status": "scheduled"}, | |
| ], | |
| "bookings": [ | |
| {"id": 101, "restaurant": "Lucia's", "time": "19:00", "party_size": 2, "status": "confirmed"}, | |
| ], | |
| "emails": [], | |
| "policies": {"max_booking_changes": 3}, | |
| }, | |
| "drift_at_step": 2, | |
| "drift_event": {"type": "schema_change", "tool": "bookings", "change": "time_field_renamed", "old_field": "time", "new_field": "reservation_time"}, | |
| "target": { | |
| "calendar": [{"id": 2, "time": "20:30", "status": "rescheduled"}], | |
| "bookings": [{"id": 101, "reservation_time": "20:30", "status": "confirmed"}], | |
| "emails": [{"to": "alex@friends.com", "contains": "reschedule"}], | |
| }, | |
| "max_steps": 10, | |
| }, | |
| { | |
| "id": "book_travel_simple", | |
| "title": "Book a flight for Monday meeting", | |
| "description": ( | |
| "Book a flight from SFO to LAX for Monday morning. " | |
| "The meeting is at 2 PM so arrive by noon. " | |
| "Email travel@company.com with the booking confirmation." | |
| ), | |
| "seed": { | |
| "calendar": [ | |
| {"id": 1, "title": "LA Client Meeting", "time": "14:00", "date": "2026-03-09", "location": "LA Office", "attendees": ["client@partner.com"], "status": "scheduled"}, | |
| ], | |
| "travel": [ | |
| {"flight": "UA101", "from": "SFO", "to": "LAX", "depart": "07:00", "arrive": "08:30", "price": 189, "status": "available"}, | |
| {"flight": "UA205", "from": "SFO", "to": "LAX", "depart": "09:00", "arrive": "10:30", "price": 249, "status": "available"}, | |
| ], | |
| "emails": [], | |
| "policies": {"max_flight_cost": 300}, | |
| }, | |
| "drift_at_step": 2, | |
| "drift_event": {"type": "policy_change", "tool": "travel", "change": "cost_limit_lowered", "old_limit": 300, "new_limit": 200}, | |
| "target": { | |
| "travel": [{"flight": "UA101", "status": "booked"}], | |
| "emails": [{"to": "travel@company.com", "contains": "booking"}], | |
| }, | |
| "max_steps": 8, | |
| }, | |
| { | |
| "id": "reply_email_urgent", | |
| "title": "Reply to urgent client email", | |
| "description": ( | |
| "Client Sarah at sarah@bigcorp.com sent an urgent email asking " | |
| "about the Q2 proposal deadline. The deadline is March 15. " | |
| "Reply to her email with the deadline and CC your manager mgr@company.com." | |
| ), | |
| "seed": { | |
| "emails": [ | |
| {"id": 1, "from": "sarah@bigcorp.com", "subject": "Q2 Proposal Deadline?", "body": "Hi, when is the Q2 proposal due? We need to plan resources.", "status": "unread"}, | |
| ], | |
| "docs": [ | |
| {"id": "q2-proposal", "title": "Q2 Proposal", "deadline": "2026-03-15", "status": "draft"}, | |
| ], | |
| "policies": {"reply_within_hours": 2, "cc_manager_on_client": True}, | |
| }, | |
| "drift_at_step": 1, | |
| "drift_event": {"type": "schema_change", "tool": "emails", "change": "cc_field_renamed", "old_field": "cc", "new_field": "carbon_copy"}, | |
| "target": { | |
| "emails": [{"to": "sarah@bigcorp.com", "contains": "March 15", "carbon_copy": "mgr@company.com"}], | |
| }, | |
| "max_steps": 6, | |
| }, | |
| { | |
| "id": "cancel_meeting_notify", | |
| "title": "Cancel tomorrow's standup and notify team", | |
| "description": ( | |
| "Cancel tomorrow's team standup (event 1) because the CEO " | |
| "called an all-hands at the same time. Email the team list: " | |
| "dev1@company.com, dev2@company.com, dev3@company.com." | |
| ), | |
| "seed": { | |
| "calendar": [ | |
| {"id": 1, "title": "Team Standup", "time": "09:00", "date": "2026-03-08", "attendees": ["dev1@company.com", "dev2@company.com", "dev3@company.com"], "status": "scheduled"}, | |
| {"id": 2, "title": "CEO All-Hands", "time": "09:00", "date": "2026-03-08", "attendees": ["all@company.com"], "status": "scheduled"}, | |
| ], | |
| "emails": [], | |
| "policies": {}, | |
| }, | |
| "drift_at_step": 2, | |
| "drift_event": {"type": "schema_change", "tool": "calendar", "change": "status_values_changed", "old_values": ["scheduled", "cancelled"], "new_values": ["active", "removed"]}, | |
| "target": { | |
| "calendar": [{"id": 1, "status": "removed"}], | |
| "emails": [{"to": "dev1@company.com"}, {"to": "dev2@company.com"}, {"to": "dev3@company.com"}], | |
| }, | |
| "max_steps": 10, | |
| }, | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TIER 2: Medium (5-6 tool calls, 2 drift events) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| { | |
| "id": "travel_with_approval", | |
| "title": "Book international travel with manager approval", | |
| "description": ( | |
| "Book a flight from SFO to London for the conference on March 20. " | |
| "Budget is $2000. Book hotel for 3 nights near the venue. " | |
| "Get manager approval (mgr@company.com) since international travel " | |
| "requires it. Email travel@company.com with full itinerary." | |
| ), | |
| "seed": { | |
| "travel": [ | |
| {"flight": "BA285", "from": "SFO", "to": "LHR", "depart": "19:00", "arrive": "13:00+1", "price": 1200, "status": "available"}, | |
| {"hotel": "Hilton Tower Bridge", "rate": 250, "nights": 3, "status": "available"}, | |
| ], | |
| "calendar": [ | |
| {"id": 1, "title": "London Conference", "date": "2026-03-20", "location": "ExCeL London", "status": "scheduled"}, | |
| ], | |
| "emails": [], | |
| "policies": {"intl_travel_requires_approval": True, "max_hotel_rate": 300}, | |
| }, | |
| "drift_at_step": 3, | |
| "drift_event": {"type": "policy_change", "tool": "travel", "change": "approval_requires_itemized", "new_requirement": "must include flight cost, hotel cost, and total in approval request"}, | |
| "target": { | |
| "travel": [{"flight": "BA285", "status": "booked"}, {"hotel": "Hilton Tower Bridge", "status": "booked"}], | |
| "emails": [{"to": "mgr@company.com", "contains": "approval"}, {"to": "travel@company.com", "contains": "itinerary"}], | |
| }, | |
| "max_steps": 12, | |
| }, | |
| { | |
| "id": "conflict_resolution", | |
| "title": "Resolve double-booked afternoon", | |
| "description": ( | |
| "You have 3 meetings at 2 PM: client call (high priority), " | |
| "team sync (can move), and 1:1 with intern (can move). " | |
| "Keep the client call, move team sync to 3 PM, move 1:1 to 4 PM. " | |
| "Email all affected attendees about changes." | |
| ), | |
| "seed": { | |
| "calendar": [ | |
| {"id": 1, "title": "Client Call", "time": "14:00", "priority": "high", "attendees": ["client@partner.com"], "status": "scheduled"}, | |
| {"id": 2, "title": "Team Sync", "time": "14:00", "priority": "medium", "attendees": ["team@company.com"], "status": "scheduled"}, | |
| {"id": 3, "title": "1:1 with Intern", "time": "14:00", "priority": "low", "attendees": ["intern@company.com"], "status": "scheduled"}, | |
| ], | |
| "emails": [], | |
| "policies": {"notify_on_reschedule": True}, | |
| }, | |
| "drift_at_step": 3, | |
| "drift_event": {"type": "actor_conflict", "tool": "calendar", "change": "attendee_requests_different_time", "actor": "team@company.com", "message": "3 PM doesn't work, can we do 3:30?"}, | |
| "target": { | |
| "calendar": [ | |
| {"id": 1, "time": "14:00", "status": "scheduled"}, | |
| {"id": 2, "time": "15:30", "status": "rescheduled"}, | |
| {"id": 3, "time": "16:00", "status": "rescheduled"}, | |
| ], | |
| "emails": [{"to": "team@company.com"}, {"to": "intern@company.com"}], | |
| }, | |
| "max_steps": 12, | |
| }, | |
| { | |
| "id": "expense_report", | |
| "title": "Submit expense report with receipt changes", | |
| "description": ( | |
| "Submit expense report for last week's client dinner ($185) " | |
| "and taxi ($42). Attach receipts, categorize correctly, " | |
| "and email finance@company.com for approval." | |
| ), | |
| "seed": { | |
| "expenses": [ | |
| {"id": 1, "type": "meal", "amount": 185, "description": "Client dinner at Nobu", "receipt": True, "status": "draft"}, | |
| {"id": 2, "type": "transport", "amount": 42, "description": "Taxi to restaurant", "receipt": True, "status": "draft"}, | |
| ], | |
| "emails": [], | |
| "policies": {"meal_limit": 200, "require_receipt_over": 25, "approval_required_over": 100}, | |
| }, | |
| "drift_at_step": 2, | |
| "drift_event": {"type": "policy_change", "tool": "expenses", "change": "meal_limit_lowered", "old_limit": 200, "new_limit": 150, "action": "meals over new limit require VP approval"}, | |
| "target": { | |
| "expenses": [{"id": 1, "status": "submitted"}, {"id": 2, "status": "submitted"}], | |
| "emails": [{"to": "finance@company.com", "contains": "expense"}, {"to": "vp@company.com", "contains": "approval"}], | |
| }, | |
| "max_steps": 10, | |
| }, | |
| { | |
| "id": "onboard_new_hire", | |
| "title": "Onboard new team member", | |
| "description": ( | |
| "New hire Jordan (jordan@company.com) starts Monday. " | |
| "Schedule a welcome meeting at 10 AM with the team, " | |
| "create their onboarding doc, add them to the team calendar, " | |
| "and email IT (it@company.com) to set up their accounts." | |
| ), | |
| "seed": { | |
| "calendar": [], | |
| "docs": [], | |
| "emails": [], | |
| "team": [ | |
| {"name": "Jordan Lee", "email": "jordan@company.com", "role": "engineer", "start_date": "2026-03-09"}, | |
| ], | |
| "policies": {"onboard_checklist": ["welcome_meeting", "onboarding_doc", "it_setup", "team_intro"]}, | |
| }, | |
| "drift_at_step": 3, | |
| "drift_event": {"type": "schema_change", "tool": "docs", "change": "template_format_changed", "old_format": "markdown", "new_format": "json"}, | |
| "target": { | |
| "calendar": [{"title_contains": "Welcome", "attendees_include": "jordan@company.com"}], | |
| "docs": [{"title_contains": "Onboarding"}], | |
| "emails": [{"to": "it@company.com", "contains": "account"}, {"to": "jordan@company.com", "contains": "welcome"}], | |
| }, | |
| "max_steps": 12, | |
| }, | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # TIER 3: Complex (7+ tool calls, 2-3 drift events) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| { | |
| "id": "full_day_reorg", | |
| "title": "Reorganize entire day after CEO emergency", | |
| "description": ( | |
| "CEO called emergency board meeting at 11 AM. Reorganize the day: " | |
| "move the 11 AM team review to 2 PM, cancel the noon lunch with vendor " | |
| "(email vendor@partner.com to apologize), keep the 3 PM client call, " | |
| "book a conference room for the board meeting, and email all attendees " | |
| "about every change." | |
| ), | |
| "seed": { | |
| "calendar": [ | |
| {"id": 1, "title": "Team Review", "time": "11:00", "attendees": ["team@company.com"], "status": "scheduled"}, | |
| {"id": 2, "title": "Lunch with Vendor", "time": "12:00", "attendees": ["vendor@partner.com"], "status": "scheduled"}, | |
| {"id": 3, "title": "Client Call", "time": "15:00", "attendees": ["client@bigcorp.com"], "status": "scheduled"}, | |
| ], | |
| "rooms": [ | |
| {"id": "conf-a", "name": "Board Room", "capacity": 20, "available": True}, | |
| {"id": "conf-b", "name": "Small Meeting", "capacity": 6, "available": True}, | |
| ], | |
| "emails": [], | |
| "policies": {"board_meeting_room_min_capacity": 15}, | |
| }, | |
| "drift_at_step": 3, | |
| "drift_event": {"type": "schema_change", "tool": "rooms", "change": "booking_requires_purpose", "new_required_field": "purpose"}, | |
| "target": { | |
| "calendar": [ | |
| {"id": 1, "time": "14:00", "status": "rescheduled"}, | |
| {"id": 2, "status": "cancelled"}, | |
| ], | |
| "rooms": [{"id": "conf-a", "status": "booked", "purpose": "CEO Board Meeting"}], | |
| "emails": [{"to": "vendor@partner.com", "contains": "cancel"}, {"to": "team@company.com", "contains": "moved"}], | |
| }, | |
| "max_steps": 15, | |
| }, | |
| { | |
| "id": "multi_actor_conflict", | |
| "title": "Handle conflicting requests from VP and client", | |
| "description": ( | |
| "VP wants you to schedule a strategy session Thursday 2-4 PM. " | |
| "Client just emailed requesting a demo at the same time. " | |
| "The client is higher priority. Schedule the demo for Thursday 2-3 PM, " | |
| "move VP strategy to Friday 2-4 PM, and email both explaining." | |
| ), | |
| "seed": { | |
| "calendar": [], | |
| "emails": [ | |
| {"id": 1, "from": "vp@company.com", "subject": "Strategy Session", "body": "Block Thursday 2-4 PM for strategy planning.", "status": "unread"}, | |
| {"id": 2, "from": "client@bigcorp.com", "subject": "Demo Request", "body": "Can we see the product demo Thursday 2 PM?", "status": "unread"}, | |
| ], | |
| "policies": {"client_priority_over_internal": True}, | |
| }, | |
| "drift_at_step": 4, | |
| "drift_event": {"type": "actor_conflict", "tool": "emails", "change": "vp_insists", "actor": "vp@company.com", "message": "Friday doesn't work. Can we do Thursday morning instead?"}, | |
| "target": { | |
| "calendar": [ | |
| {"title_contains": "Demo", "time": "14:00", "day": "Thursday"}, | |
| {"title_contains": "Strategy", "time": "10:00", "day": "Thursday"}, | |
| ], | |
| "emails": [{"to": "vp@company.com", "contains": "Thursday morning"}, {"to": "client@bigcorp.com", "contains": "demo confirmed"}], | |
| }, | |
| "max_steps": 15, | |
| }, | |
| { | |
| "id": "trip_planning_drift", | |
| "title": "Plan team offsite with multiple schema changes", | |
| "description": ( | |
| "Plan a 2-day team offsite for 8 people in Napa Valley. " | |
| "Book hotel, restaurant for team dinner, and transportation. " | |
| "Budget: $5000 total. Email team@company.com with the itinerary " | |
| "and finance@company.com for pre-approval." | |
| ), | |
| "seed": { | |
| "travel": [ | |
| {"hotel": "Napa Inn", "rate": 180, "rooms": 4, "nights": 2, "status": "available"}, | |
| {"transport": "Van rental", "cost": 200, "capacity": 10, "status": "available"}, | |
| ], | |
| "bookings": [ | |
| {"restaurant": "Bistro Don Giovanni", "party_size": 8, "time": "19:00", "cost_per_person": 65, "status": "available"}, | |
| ], | |
| "emails": [], | |
| "policies": {"offsite_requires_preapproval": True, "max_offsite_budget": 5000}, | |
| }, | |
| "drift_at_step": 3, | |
| "drift_event": {"type": "policy_change", "tool": "travel", "change": "budget_cut", "old_budget": 5000, "new_budget": 4000}, | |
| "target": { | |
| "travel": [{"hotel": "Napa Inn", "status": "booked"}, {"transport": "Van rental", "status": "booked"}], | |
| "bookings": [{"restaurant": "Bistro Don Giovanni", "status": "booked"}], | |
| "emails": [{"to": "team@company.com", "contains": "itinerary"}, {"to": "finance@company.com", "contains": "approval"}], | |
| }, | |
| "max_steps": 15, | |
| }, | |
| { | |
| "id": "crisis_management", | |
| "title": "Handle server outage during client demo", | |
| "description": ( | |
| "The production server went down during a client demo. " | |
| "Email the client (client@bigcorp.com) apologizing and offering " | |
| "to reschedule. Escalate to engineering (eng@company.com) with urgency. " | |
| "Cancel the next 2 non-critical meetings to free up time. " | |
| "Schedule a post-mortem for tomorrow at 10 AM. " | |
| "Email your VP (vp@company.com) with a status update." | |
| ), | |
| "seed": { | |
| "calendar": [ | |
| {"id": 1, "title": "Client Demo", "time": "14:00", "status": "in_progress", "attendees": ["client@bigcorp.com"]}, | |
| {"id": 2, "title": "Team Sync", "time": "15:00", "priority": "low", "status": "scheduled"}, | |
| {"id": 3, "title": "1:1 with PM", "time": "16:00", "priority": "low", "status": "scheduled"}, | |
| {"id": 4, "title": "Board Prep", "time": "17:00", "priority": "high", "status": "scheduled"}, | |
| ], | |
| "emails": [], | |
| "incidents": [{"id": "INC-001", "severity": "P1", "status": "active", "service": "production-api"}], | |
| "policies": {"p1_notify_vp": True, "p1_cancel_nonessential": True}, | |
| }, | |
| "drift_at_step": 4, | |
| "drift_event": {"type": "schema_change", "tool": "calendar", "change": "cancel_requires_reason", "new_required_field": "cancellation_reason"}, | |
| "target": { | |
| "calendar": [ | |
| {"id": 2, "status": "cancelled", "cancellation_reason_contains": "outage"}, | |
| {"id": 3, "status": "cancelled", "cancellation_reason_contains": "outage"}, | |
| {"title_contains": "Post-mortem", "time": "10:00"}, | |
| ], | |
| "emails": [ | |
| {"to": "client@bigcorp.com", "contains": "apologize"}, | |
| {"to": "eng@company.com", "contains": "escalat"}, | |
| {"to": "vp@company.com", "contains": "status"}, | |
| ], | |
| }, | |
| "max_steps": 18, | |
| }, | |
| ] | |