| | """ |
| | HR Onboarding/Offboarding Environment Implementation. |
| | |
| | An OpenEnv environment that simulates enterprise HR workflows. |
| | The agent calls tools (hr_create_employee, it_assign_asset, etc.) |
| | to complete onboarding/offboarding tasks. Reward is computed via rubrics. |
| | """ |
| |
|
| | import json |
| | import random |
| | from typing import Any, Dict, List, Optional |
| | from uuid import uuid4 |
| |
|
| | from openenv.core.env_server.interfaces import Environment |
| | from openenv.core.env_server.types import State |
| |
|
| | from models import HROnboardingAction, HROnboardingObservation |
| |
|
| | try: |
| | from .world import WorldState |
| | from .tools import ToolRegistry, TOOL_DEFINITIONS |
| | from .tasks import TaskGenerator |
| | from .rubrics import RubricEvaluator |
| | except ImportError: |
| | from world import WorldState |
| | from tools import ToolRegistry, TOOL_DEFINITIONS |
| | from tasks import TaskGenerator |
| | from rubrics import RubricEvaluator |
| |
|
| |
|
| | class HROnboardingEnvironment(Environment): |
| | """ |
| | HR Onboarding/Offboarding environment. |
| | |
| | Simulates an enterprise HR system with 200+ employees, 8 departments, |
| | RBAC, approval chains, and IT provisioning. The agent calls one of 25 |
| | tools per step to complete onboarding/offboarding tasks. |
| | |
| | Example: |
| | >>> env = HROnboardingEnvironment() |
| | >>> obs = env.reset() |
| | >>> print(obs.instruction) # "Onboard Priya Sharma to Engineering..." |
| | >>> |
| | >>> obs = env.step(HROnboardingAction( |
| | ... tool_name="hr_create_employee", |
| | ... arguments={"name": "Priya Sharma", "department": "Engineering", |
| | ... "level": "L2", "role": "Software Engineer"} |
| | ... )) |
| | >>> print(obs.tool_result) # {"success": true, "employee": {...}} |
| | >>> print(obs.reward) # 0.0 (intermediate) or 0.85 (final) |
| | """ |
| |
|
| | SUPPORTS_CONCURRENT_SESSIONS: bool = True |
| |
|
| | def __init__(self, seed: int = 42, max_steps: int = 15): |
| | """Initialize the HR environment.""" |
| | self._seed = seed |
| | self._max_steps = max_steps |
| | self._rng = random.Random(seed) |
| |
|
| | |
| | self.world = WorldState() |
| | self.tool_registry = ToolRegistry(self.world) |
| | self.evaluator = RubricEvaluator() |
| |
|
| | |
| | self._task_gen = TaskGenerator(self.world, seed=seed) |
| | self._tasks = self._task_gen.generate_all_tasks() |
| | self._task_idx = 0 |
| | self._current_task = None |
| |
|
| | |
| | self._state = State(episode_id=str(uuid4()), step_count=0) |
| | self._done = False |
| | self._tool_names = [t["name"] for t in TOOL_DEFINITIONS] |
| |
|
| | def reset(self) -> HROnboardingObservation: |
| | """ |
| | Reset the environment for a new episode. |
| | |
| | Picks the next task, resets world state, returns initial observation |
| | with the task instruction and available tools. |
| | """ |
| | self.world.reset() |
| | self._done = False |
| |
|
| | |
| | self._current_task = self._tasks[self._task_idx % len(self._tasks)] |
| | self._task_idx += 1 |
| |
|
| | |
| | if self._current_task.setup_fn: |
| | self._current_task.setup_fn(self.world) |
| |
|
| | self._state = State(episode_id=str(uuid4()), step_count=0) |
| |
|
| | return HROnboardingObservation( |
| | task_id=self._current_task.task_id, |
| | instruction=self._current_task.instruction, |
| | tool_name="", |
| | tool_result={}, |
| | step=0, |
| | max_steps=self._max_steps, |
| | available_tools=self._tool_names, |
| | done=False, |
| | reward=0.0, |
| | metadata={ |
| | "difficulty": self._current_task.difficulty, |
| | "category": self._current_task.category, |
| | "context": self._current_task.context, |
| | }, |
| | ) |
| |
|
| | def step(self, action: HROnboardingAction) -> HROnboardingObservation: |
| | """ |
| | Execute one step: call the specified tool and return the result. |
| | |
| | Args: |
| | action: HROnboardingAction with tool_name and arguments. |
| | |
| | Returns: |
| | HROnboardingObservation with tool result, reward (on final step), and done flag. |
| | """ |
| | if self._done: |
| | return HROnboardingObservation( |
| | task_id=self._current_task.task_id if self._current_task else "", |
| | instruction="", |
| | tool_name=action.tool_name, |
| | tool_result={"error": "Episode already finished"}, |
| | step=self._state.step_count, |
| | max_steps=self._max_steps, |
| | available_tools=self._tool_names, |
| | done=True, |
| | reward=0.0, |
| | ) |
| |
|
| | self._state.step_count += 1 |
| |
|
| | |
| | result = self.tool_registry.execute(action.tool_name, action.arguments) |
| |
|
| | |
| | done = self._state.step_count >= self._max_steps |
| | self._done = done |
| |
|
| | |
| | reward = 0.0 |
| | eval_info = {} |
| | if done and self._current_task: |
| | eval_result = self.evaluator.evaluate(self._current_task, self.world.action_log) |
| | reward = eval_result["score"] |
| | eval_info = eval_result |
| |
|
| | return HROnboardingObservation( |
| | task_id=self._current_task.task_id if self._current_task else "", |
| | instruction=self._current_task.instruction if self._current_task else "", |
| | tool_name=action.tool_name, |
| | tool_result=result, |
| | step=self._state.step_count, |
| | max_steps=self._max_steps, |
| | available_tools=self._tool_names, |
| | done=done, |
| | reward=reward, |
| | metadata={ |
| | "step": self._state.step_count, |
| | **({"evaluation": eval_info} if eval_info else {}), |
| | }, |
| | ) |
| |
|
| | @property |
| | def state(self) -> State: |
| | """Get the current environment state.""" |
| | return self._state |
| |
|