rl_hack / server /hr_onboarding_environment.py
devxpy's picture
Upload folder using huggingface_hub
e181764 verified
"""
HR Onboarding/Offboarding Environment Implementation.
An OpenEnv environment that simulates enterprise HR workflows.
The agent calls tools (hr_create_employee, it_assign_asset, etc.)
to complete onboarding/offboarding tasks. Reward is computed via rubrics.
"""
import json
import random
from typing import Any, Dict, List, Optional
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from models import HROnboardingAction, HROnboardingObservation
try:
from .world import WorldState
from .tools import ToolRegistry, TOOL_DEFINITIONS
from .tasks import TaskGenerator
from .rubrics import RubricEvaluator
except ImportError:
from world import WorldState
from tools import ToolRegistry, TOOL_DEFINITIONS
from tasks import TaskGenerator
from rubrics import RubricEvaluator
class HROnboardingEnvironment(Environment):
"""
HR Onboarding/Offboarding environment.
Simulates an enterprise HR system with 200+ employees, 8 departments,
RBAC, approval chains, and IT provisioning. The agent calls one of 25
tools per step to complete onboarding/offboarding tasks.
Example:
>>> env = HROnboardingEnvironment()
>>> obs = env.reset()
>>> print(obs.instruction) # "Onboard Priya Sharma to Engineering..."
>>>
>>> obs = env.step(HROnboardingAction(
... tool_name="hr_create_employee",
... arguments={"name": "Priya Sharma", "department": "Engineering",
... "level": "L2", "role": "Software Engineer"}
... ))
>>> print(obs.tool_result) # {"success": true, "employee": {...}}
>>> print(obs.reward) # 0.0 (intermediate) or 0.85 (final)
"""
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self, seed: int = 42, max_steps: int = 15):
"""Initialize the HR environment."""
self._seed = seed
self._max_steps = max_steps
self._rng = random.Random(seed)
# World state + tools
self.world = WorldState()
self.tool_registry = ToolRegistry(self.world)
self.evaluator = RubricEvaluator()
# Tasks
self._task_gen = TaskGenerator(self.world, seed=seed)
self._tasks = self._task_gen.generate_all_tasks()
self._task_idx = 0
self._current_task = None
# Episode state
self._state = State(episode_id=str(uuid4()), step_count=0)
self._done = False
self._tool_names = [t["name"] for t in TOOL_DEFINITIONS]
def reset(self) -> HROnboardingObservation:
"""
Reset the environment for a new episode.
Picks the next task, resets world state, returns initial observation
with the task instruction and available tools.
"""
self.world.reset()
self._done = False
# Pick next task (cycle through)
self._current_task = self._tasks[self._task_idx % len(self._tasks)]
self._task_idx += 1
# Apply task setup if any
if self._current_task.setup_fn:
self._current_task.setup_fn(self.world)
self._state = State(episode_id=str(uuid4()), step_count=0)
return HROnboardingObservation(
task_id=self._current_task.task_id,
instruction=self._current_task.instruction,
tool_name="",
tool_result={},
step=0,
max_steps=self._max_steps,
available_tools=self._tool_names,
done=False,
reward=0.0,
metadata={
"difficulty": self._current_task.difficulty,
"category": self._current_task.category,
"context": self._current_task.context,
},
)
def step(self, action: HROnboardingAction) -> HROnboardingObservation: # type: ignore[override]
"""
Execute one step: call the specified tool and return the result.
Args:
action: HROnboardingAction with tool_name and arguments.
Returns:
HROnboardingObservation with tool result, reward (on final step), and done flag.
"""
if self._done:
return HROnboardingObservation(
task_id=self._current_task.task_id if self._current_task else "",
instruction="",
tool_name=action.tool_name,
tool_result={"error": "Episode already finished"},
step=self._state.step_count,
max_steps=self._max_steps,
available_tools=self._tool_names,
done=True,
reward=0.0,
)
self._state.step_count += 1
# Execute the tool
result = self.tool_registry.execute(action.tool_name, action.arguments)
# Check if episode is done
done = self._state.step_count >= self._max_steps
self._done = done
# Compute reward on final step
reward = 0.0
eval_info = {}
if done and self._current_task:
eval_result = self.evaluator.evaluate(self._current_task, self.world.action_log)
reward = eval_result["score"]
eval_info = eval_result
return HROnboardingObservation(
task_id=self._current_task.task_id if self._current_task else "",
instruction=self._current_task.instruction if self._current_task else "",
tool_name=action.tool_name,
tool_result=result,
step=self._state.step_count,
max_steps=self._max_steps,
available_tools=self._tool_names,
done=done,
reward=reward,
metadata={
"step": self._state.step_count,
**({"evaluation": eval_info} if eval_info else {}),
},
)
@property
def state(self) -> State:
"""Get the current environment state."""
return self._state