schemashift / tasks.py
SidraMiconi's picture
deploy SchemaShift
a17a9f5
"""
SchemaShift EA Arena β€” Task Templates
12 tasks across 3 difficulty tiers with schema drift events.
Each task simulates a real executive assistant workflow where
APIs, forms, and policies change mid-episode.
"""
TASKS = [
# ═══════════════════════════════════════════════════════════
# TIER 1: Simple (3-4 tool calls, 1 drift event)
# ═══════════════════════════════════════════════════════════
{
"id": "reschedule_dinner",
"title": "Reschedule dinner due to meeting conflict",
"description": (
"Your VP moved the board prep meeting to 6:30 PM tonight. "
"You have dinner with Alex at 7:00 PM at Lucia's. "
"Reschedule dinner to 8:30 PM, update the restaurant booking, "
"and email Alex about the change."
),
"seed": {
"calendar": [
{"id": 1, "title": "Board Prep", "time": "15:00", "attendees": ["vp@company.com"], "status": "scheduled"},
{"id": 2, "title": "Dinner with Alex", "time": "19:00", "location": "Lucia's", "attendees": ["alex@friends.com"], "status": "scheduled"},
],
"bookings": [
{"id": 101, "restaurant": "Lucia's", "time": "19:00", "party_size": 2, "status": "confirmed"},
],
"emails": [],
"policies": {"max_booking_changes": 3},
},
"drift_at_step": 2,
"drift_event": {"type": "schema_change", "tool": "bookings", "change": "time_field_renamed", "old_field": "time", "new_field": "reservation_time"},
"target": {
"calendar": [{"id": 2, "time": "20:30", "status": "rescheduled"}],
"bookings": [{"id": 101, "reservation_time": "20:30", "status": "confirmed"}],
"emails": [{"to": "alex@friends.com", "contains": "reschedule"}],
},
"max_steps": 10,
},
{
"id": "book_travel_simple",
"title": "Book a flight for Monday meeting",
"description": (
"Book a flight from SFO to LAX for Monday morning. "
"The meeting is at 2 PM so arrive by noon. "
"Email travel@company.com with the booking confirmation."
),
"seed": {
"calendar": [
{"id": 1, "title": "LA Client Meeting", "time": "14:00", "date": "2026-03-09", "location": "LA Office", "attendees": ["client@partner.com"], "status": "scheduled"},
],
"travel": [
{"flight": "UA101", "from": "SFO", "to": "LAX", "depart": "07:00", "arrive": "08:30", "price": 189, "status": "available"},
{"flight": "UA205", "from": "SFO", "to": "LAX", "depart": "09:00", "arrive": "10:30", "price": 249, "status": "available"},
],
"emails": [],
"policies": {"max_flight_cost": 300},
},
"drift_at_step": 2,
"drift_event": {"type": "policy_change", "tool": "travel", "change": "cost_limit_lowered", "old_limit": 300, "new_limit": 200},
"target": {
"travel": [{"flight": "UA101", "status": "booked"}],
"emails": [{"to": "travel@company.com", "contains": "booking"}],
},
"max_steps": 8,
},
{
"id": "reply_email_urgent",
"title": "Reply to urgent client email",
"description": (
"Client Sarah at sarah@bigcorp.com sent an urgent email asking "
"about the Q2 proposal deadline. The deadline is March 15. "
"Reply to her email with the deadline and CC your manager mgr@company.com."
),
"seed": {
"emails": [
{"id": 1, "from": "sarah@bigcorp.com", "subject": "Q2 Proposal Deadline?", "body": "Hi, when is the Q2 proposal due? We need to plan resources.", "status": "unread"},
],
"docs": [
{"id": "q2-proposal", "title": "Q2 Proposal", "deadline": "2026-03-15", "status": "draft"},
],
"policies": {"reply_within_hours": 2, "cc_manager_on_client": True},
},
"drift_at_step": 1,
"drift_event": {"type": "schema_change", "tool": "emails", "change": "cc_field_renamed", "old_field": "cc", "new_field": "carbon_copy"},
"target": {
"emails": [{"to": "sarah@bigcorp.com", "contains": "March 15", "carbon_copy": "mgr@company.com"}],
},
"max_steps": 6,
},
{
"id": "cancel_meeting_notify",
"title": "Cancel tomorrow's standup and notify team",
"description": (
"Cancel tomorrow's team standup (event 1) because the CEO "
"called an all-hands at the same time. Email the team list: "
"dev1@company.com, dev2@company.com, dev3@company.com."
),
"seed": {
"calendar": [
{"id": 1, "title": "Team Standup", "time": "09:00", "date": "2026-03-08", "attendees": ["dev1@company.com", "dev2@company.com", "dev3@company.com"], "status": "scheduled"},
{"id": 2, "title": "CEO All-Hands", "time": "09:00", "date": "2026-03-08", "attendees": ["all@company.com"], "status": "scheduled"},
],
"emails": [],
"policies": {},
},
"drift_at_step": 2,
"drift_event": {"type": "schema_change", "tool": "calendar", "change": "status_values_changed", "old_values": ["scheduled", "cancelled"], "new_values": ["active", "removed"]},
"target": {
"calendar": [{"id": 1, "status": "removed"}],
"emails": [{"to": "dev1@company.com"}, {"to": "dev2@company.com"}, {"to": "dev3@company.com"}],
},
"max_steps": 10,
},
# ═══════════════════════════════════════════════════════════
# TIER 2: Medium (5-6 tool calls, 2 drift events)
# ═══════════════════════════════════════════════════════════
{
"id": "travel_with_approval",
"title": "Book international travel with manager approval",
"description": (
"Book a flight from SFO to London for the conference on March 20. "
"Budget is $2000. Book hotel for 3 nights near the venue. "
"Get manager approval (mgr@company.com) since international travel "
"requires it. Email travel@company.com with full itinerary."
),
"seed": {
"travel": [
{"flight": "BA285", "from": "SFO", "to": "LHR", "depart": "19:00", "arrive": "13:00+1", "price": 1200, "status": "available"},
{"hotel": "Hilton Tower Bridge", "rate": 250, "nights": 3, "status": "available"},
],
"calendar": [
{"id": 1, "title": "London Conference", "date": "2026-03-20", "location": "ExCeL London", "status": "scheduled"},
],
"emails": [],
"policies": {"intl_travel_requires_approval": True, "max_hotel_rate": 300},
},
"drift_at_step": 3,
"drift_event": {"type": "policy_change", "tool": "travel", "change": "approval_requires_itemized", "new_requirement": "must include flight cost, hotel cost, and total in approval request"},
"target": {
"travel": [{"flight": "BA285", "status": "booked"}, {"hotel": "Hilton Tower Bridge", "status": "booked"}],
"emails": [{"to": "mgr@company.com", "contains": "approval"}, {"to": "travel@company.com", "contains": "itinerary"}],
},
"max_steps": 12,
},
{
"id": "conflict_resolution",
"title": "Resolve double-booked afternoon",
"description": (
"You have 3 meetings at 2 PM: client call (high priority), "
"team sync (can move), and 1:1 with intern (can move). "
"Keep the client call, move team sync to 3 PM, move 1:1 to 4 PM. "
"Email all affected attendees about changes."
),
"seed": {
"calendar": [
{"id": 1, "title": "Client Call", "time": "14:00", "priority": "high", "attendees": ["client@partner.com"], "status": "scheduled"},
{"id": 2, "title": "Team Sync", "time": "14:00", "priority": "medium", "attendees": ["team@company.com"], "status": "scheduled"},
{"id": 3, "title": "1:1 with Intern", "time": "14:00", "priority": "low", "attendees": ["intern@company.com"], "status": "scheduled"},
],
"emails": [],
"policies": {"notify_on_reschedule": True},
},
"drift_at_step": 3,
"drift_event": {"type": "actor_conflict", "tool": "calendar", "change": "attendee_requests_different_time", "actor": "team@company.com", "message": "3 PM doesn't work, can we do 3:30?"},
"target": {
"calendar": [
{"id": 1, "time": "14:00", "status": "scheduled"},
{"id": 2, "time": "15:30", "status": "rescheduled"},
{"id": 3, "time": "16:00", "status": "rescheduled"},
],
"emails": [{"to": "team@company.com"}, {"to": "intern@company.com"}],
},
"max_steps": 12,
},
{
"id": "expense_report",
"title": "Submit expense report with receipt changes",
"description": (
"Submit expense report for last week's client dinner ($185) "
"and taxi ($42). Attach receipts, categorize correctly, "
"and email finance@company.com for approval."
),
"seed": {
"expenses": [
{"id": 1, "type": "meal", "amount": 185, "description": "Client dinner at Nobu", "receipt": True, "status": "draft"},
{"id": 2, "type": "transport", "amount": 42, "description": "Taxi to restaurant", "receipt": True, "status": "draft"},
],
"emails": [],
"policies": {"meal_limit": 200, "require_receipt_over": 25, "approval_required_over": 100},
},
"drift_at_step": 2,
"drift_event": {"type": "policy_change", "tool": "expenses", "change": "meal_limit_lowered", "old_limit": 200, "new_limit": 150, "action": "meals over new limit require VP approval"},
"target": {
"expenses": [{"id": 1, "status": "submitted"}, {"id": 2, "status": "submitted"}],
"emails": [{"to": "finance@company.com", "contains": "expense"}, {"to": "vp@company.com", "contains": "approval"}],
},
"max_steps": 10,
},
{
"id": "onboard_new_hire",
"title": "Onboard new team member",
"description": (
"New hire Jordan (jordan@company.com) starts Monday. "
"Schedule a welcome meeting at 10 AM with the team, "
"create their onboarding doc, add them to the team calendar, "
"and email IT (it@company.com) to set up their accounts."
),
"seed": {
"calendar": [],
"docs": [],
"emails": [],
"team": [
{"name": "Jordan Lee", "email": "jordan@company.com", "role": "engineer", "start_date": "2026-03-09"},
],
"policies": {"onboard_checklist": ["welcome_meeting", "onboarding_doc", "it_setup", "team_intro"]},
},
"drift_at_step": 3,
"drift_event": {"type": "schema_change", "tool": "docs", "change": "template_format_changed", "old_format": "markdown", "new_format": "json"},
"target": {
"calendar": [{"title_contains": "Welcome", "attendees_include": "jordan@company.com"}],
"docs": [{"title_contains": "Onboarding"}],
"emails": [{"to": "it@company.com", "contains": "account"}, {"to": "jordan@company.com", "contains": "welcome"}],
},
"max_steps": 12,
},
# ═══════════════════════════════════════════════════════════
# TIER 3: Complex (7+ tool calls, 2-3 drift events)
# ═══════════════════════════════════════════════════════════
{
"id": "full_day_reorg",
"title": "Reorganize entire day after CEO emergency",
"description": (
"CEO called emergency board meeting at 11 AM. Reorganize the day: "
"move the 11 AM team review to 2 PM, cancel the noon lunch with vendor "
"(email vendor@partner.com to apologize), keep the 3 PM client call, "
"book a conference room for the board meeting, and email all attendees "
"about every change."
),
"seed": {
"calendar": [
{"id": 1, "title": "Team Review", "time": "11:00", "attendees": ["team@company.com"], "status": "scheduled"},
{"id": 2, "title": "Lunch with Vendor", "time": "12:00", "attendees": ["vendor@partner.com"], "status": "scheduled"},
{"id": 3, "title": "Client Call", "time": "15:00", "attendees": ["client@bigcorp.com"], "status": "scheduled"},
],
"rooms": [
{"id": "conf-a", "name": "Board Room", "capacity": 20, "available": True},
{"id": "conf-b", "name": "Small Meeting", "capacity": 6, "available": True},
],
"emails": [],
"policies": {"board_meeting_room_min_capacity": 15},
},
"drift_at_step": 3,
"drift_event": {"type": "schema_change", "tool": "rooms", "change": "booking_requires_purpose", "new_required_field": "purpose"},
"target": {
"calendar": [
{"id": 1, "time": "14:00", "status": "rescheduled"},
{"id": 2, "status": "cancelled"},
],
"rooms": [{"id": "conf-a", "status": "booked", "purpose": "CEO Board Meeting"}],
"emails": [{"to": "vendor@partner.com", "contains": "cancel"}, {"to": "team@company.com", "contains": "moved"}],
},
"max_steps": 15,
},
{
"id": "multi_actor_conflict",
"title": "Handle conflicting requests from VP and client",
"description": (
"VP wants you to schedule a strategy session Thursday 2-4 PM. "
"Client just emailed requesting a demo at the same time. "
"The client is higher priority. Schedule the demo for Thursday 2-3 PM, "
"move VP strategy to Friday 2-4 PM, and email both explaining."
),
"seed": {
"calendar": [],
"emails": [
{"id": 1, "from": "vp@company.com", "subject": "Strategy Session", "body": "Block Thursday 2-4 PM for strategy planning.", "status": "unread"},
{"id": 2, "from": "client@bigcorp.com", "subject": "Demo Request", "body": "Can we see the product demo Thursday 2 PM?", "status": "unread"},
],
"policies": {"client_priority_over_internal": True},
},
"drift_at_step": 4,
"drift_event": {"type": "actor_conflict", "tool": "emails", "change": "vp_insists", "actor": "vp@company.com", "message": "Friday doesn't work. Can we do Thursday morning instead?"},
"target": {
"calendar": [
{"title_contains": "Demo", "time": "14:00", "day": "Thursday"},
{"title_contains": "Strategy", "time": "10:00", "day": "Thursday"},
],
"emails": [{"to": "vp@company.com", "contains": "Thursday morning"}, {"to": "client@bigcorp.com", "contains": "demo confirmed"}],
},
"max_steps": 15,
},
{
"id": "trip_planning_drift",
"title": "Plan team offsite with multiple schema changes",
"description": (
"Plan a 2-day team offsite for 8 people in Napa Valley. "
"Book hotel, restaurant for team dinner, and transportation. "
"Budget: $5000 total. Email team@company.com with the itinerary "
"and finance@company.com for pre-approval."
),
"seed": {
"travel": [
{"hotel": "Napa Inn", "rate": 180, "rooms": 4, "nights": 2, "status": "available"},
{"transport": "Van rental", "cost": 200, "capacity": 10, "status": "available"},
],
"bookings": [
{"restaurant": "Bistro Don Giovanni", "party_size": 8, "time": "19:00", "cost_per_person": 65, "status": "available"},
],
"emails": [],
"policies": {"offsite_requires_preapproval": True, "max_offsite_budget": 5000},
},
"drift_at_step": 3,
"drift_event": {"type": "policy_change", "tool": "travel", "change": "budget_cut", "old_budget": 5000, "new_budget": 4000},
"target": {
"travel": [{"hotel": "Napa Inn", "status": "booked"}, {"transport": "Van rental", "status": "booked"}],
"bookings": [{"restaurant": "Bistro Don Giovanni", "status": "booked"}],
"emails": [{"to": "team@company.com", "contains": "itinerary"}, {"to": "finance@company.com", "contains": "approval"}],
},
"max_steps": 15,
},
{
"id": "crisis_management",
"title": "Handle server outage during client demo",
"description": (
"The production server went down during a client demo. "
"Email the client (client@bigcorp.com) apologizing and offering "
"to reschedule. Escalate to engineering (eng@company.com) with urgency. "
"Cancel the next 2 non-critical meetings to free up time. "
"Schedule a post-mortem for tomorrow at 10 AM. "
"Email your VP (vp@company.com) with a status update."
),
"seed": {
"calendar": [
{"id": 1, "title": "Client Demo", "time": "14:00", "status": "in_progress", "attendees": ["client@bigcorp.com"]},
{"id": 2, "title": "Team Sync", "time": "15:00", "priority": "low", "status": "scheduled"},
{"id": 3, "title": "1:1 with PM", "time": "16:00", "priority": "low", "status": "scheduled"},
{"id": 4, "title": "Board Prep", "time": "17:00", "priority": "high", "status": "scheduled"},
],
"emails": [],
"incidents": [{"id": "INC-001", "severity": "P1", "status": "active", "service": "production-api"}],
"policies": {"p1_notify_vp": True, "p1_cancel_nonessential": True},
},
"drift_at_step": 4,
"drift_event": {"type": "schema_change", "tool": "calendar", "change": "cancel_requires_reason", "new_required_field": "cancellation_reason"},
"target": {
"calendar": [
{"id": 2, "status": "cancelled", "cancellation_reason_contains": "outage"},
{"id": 3, "status": "cancelled", "cancellation_reason_contains": "outage"},
{"title_contains": "Post-mortem", "time": "10:00"},
],
"emails": [
{"to": "client@bigcorp.com", "contains": "apologize"},
{"to": "eng@company.com", "contains": "escalat"},
{"to": "vp@company.com", "contains": "status"},
],
},
"max_steps": 18,
},
]