| openenv_v: 1.0 |
| name: email_triage_assistant |
| description: "A real-world simulation of email triage and scheduling. Not a toy environment." |
| category: "Productivity" |
| tags: ["Agentic", "Email", "Scheduling", "Triage"] |
|
|
| tasks: |
| - id: 1 |
| name: "Spam Guard (Level 1)" |
| description: "Identify and archive a clear spam email ($1M claims) to the Spam folder." |
| motivation: "Reduces inbox clutter and enhances cybersecurity posture by removing phishing threats." |
| difficulty: "easy" |
| reward_range: [0.0, 1.0] |
| expected_behavior: "Agent identifies the sender 'spam@bott.io' and correctly applies the MOVE action to the 'Spam' target folder." |
| - id: 2 |
| name: "Organization Workflow (Level 2)" |
| description: "Categorize multi-topic emails from Inbox into 'Work' and 'Archive' folders." |
| motivation: "Standard professional office workflow to maintain a clear organizational structure." |
| difficulty: "medium" |
| reward_range: [0.0, 1.0] |
| expected_behavior: "Agent sorts 'Urgent: Project Update' and 'Daily Briefing' email IDs correctly in a single trajectory." |
| - id: 3 |
| name: "Calendar Coordinator (Level 3)" |
| description: "Schedule a meeting reply while avoiding conflicts (Busy 10 AM, Free 2 PM)." |
| motivation: "Requires high-level logical reasoning and information extraction from the 'Calendar' observation field." |
| difficulty: "hard" |
| reward_range: [0.0, 1.0] |
| expected_behavior: "Agent extracts busy times from the calendar and generates a SCHEDULE action at a non-conflicting time (2 PM)." |
|
|
| endpoints: |
| reset: /reset |
| step: /step |
| state: /state |
|
|
| docker: |
| build: ./Dockerfile |
| memory: 8gb |
| vcpu: 2 |
|
|