| from uuid import uuid4 |
|
|
| from openenv.core.env_server.interfaces import Environment |
|
|
| try: |
| from my_env.models import MyAction, MyObservation, MyState |
| except ImportError: |
| from models import MyAction, MyObservation, MyState |
|
|
|
|
| class MyEnvironment(Environment): |
| """ |
| World Modeling environment: a personal assistant that must |
| handle scheduling conflicts, email replies, and task delegation. |
| """ |
|
|
| SUPPORTS_CONCURRENT_SESSIONS: bool = True |
|
|
| def __init__(self): |
| super().__init__() |
| self._state = MyState(episode_id=str(uuid4()), step_count=0) |
|
|
| def reset(self, seed=None, episode_id=None, **kwargs) -> MyObservation: |
| """Initialize a new episode with a task scenario.""" |
| self._state = MyState( |
| episode_id=episode_id or str(uuid4()), |
| step_count=0, |
| task_description=( |
| "You have a dinner at 7pm but your boss just scheduled " |
| "a mandatory meeting at 6:30pm. Handle the conflict." |
| ), |
| history=[], |
| ) |
| return MyObservation( |
| result=self._state.task_description, |
| available_tools=["check_calendar", "send_email", |
| "reschedule_meeting", "send_message"], |
| task_completed=False, |
| done=False, |
| reward=0.0, |
| ) |
|
|
| def step(self, action: MyAction, timeout_s=None, **kwargs) -> MyObservation: |
| """Execute an action and return observation.""" |
| self._state.step_count += 1 |
| self._state.history.append(action.model_dump()) |
|
|
| reward = 0.0 |
| done = False |
|
|
| if action.tool_name == "check_calendar": |
| result = "Calendar: Dinner 7pm, Meeting 6:30-7:30pm (mandatory)" |
| reward = 0.1 |
|
|
| elif action.tool_name == "send_email": |
| to = action.tool_args.get("to", "") |
| body = action.tool_args.get("body", "") |
| if "boss" in to.lower() and "reschedule" in body.lower(): |
| result = "Email sent to boss requesting meeting reschedule." |
| reward = 0.5 |
| elif "dinner" in to.lower(): |
| result = "Message sent about running late to dinner." |
| reward = 0.3 |
| else: |
| result = f"Email sent to {to}." |
| reward = 0.1 |
|
|
| elif action.tool_name == "reschedule_meeting": |
| result = "Meeting rescheduled to 5:30pm. Conflict resolved!" |
| reward = 1.0 |
| done = True |
|
|
| elif action.tool_name == "send_message": |
| result = f"Message sent: {action.tool_args.get('body', '')}" |
| reward = 0.2 |
|
|
| else: |
| result = f"Unknown tool: {action.tool_name}" |
| reward = -0.1 |
|
|
| if self._state.step_count >= 10: |
| done = True |
|
|
| return MyObservation( |
| result=result, |
| available_tools=["check_calendar", "send_email", |
| "reschedule_meeting", "send_message"], |
| task_completed=done, |
| done=done, |
| reward=reward, |
| ) |
|
|
| @property |
| def state(self) -> MyState: |
| return self._state |
|
|