Spaces:

devxpy
/

rl_hack

Running

File size: 6,003 Bytes

e181764

"""
HR Onboarding/Offboarding Environment Implementation.

An OpenEnv environment that simulates enterprise HR workflows.
The agent calls tools (hr_create_employee, it_assign_asset, etc.)
to complete onboarding/offboarding tasks. Reward is computed via rubrics.
"""

import json
import random
from typing import Any, Dict, List, Optional
from uuid import uuid4

from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

from models import HROnboardingAction, HROnboardingObservation

try:
    from .world import WorldState
    from .tools import ToolRegistry, TOOL_DEFINITIONS
    from .tasks import TaskGenerator
    from .rubrics import RubricEvaluator
except ImportError:
    from world import WorldState
    from tools import ToolRegistry, TOOL_DEFINITIONS
    from tasks import TaskGenerator
    from rubrics import RubricEvaluator


class HROnboardingEnvironment(Environment):
    """
    HR Onboarding/Offboarding environment.

    Simulates an enterprise HR system with 200+ employees, 8 departments,
    RBAC, approval chains, and IT provisioning. The agent calls one of 25
    tools per step to complete onboarding/offboarding tasks.

    Example:
        >>> env = HROnboardingEnvironment()
        >>> obs = env.reset()
        >>> print(obs.instruction)  # "Onboard Priya Sharma to Engineering..."
        >>>
        >>> obs = env.step(HROnboardingAction(
        ...     tool_name="hr_create_employee",
        ...     arguments={"name": "Priya Sharma", "department": "Engineering",
        ...                "level": "L2", "role": "Software Engineer"}
        ... ))
        >>> print(obs.tool_result)  # {"success": true, "employee": {...}}
        >>> print(obs.reward)       # 0.0 (intermediate) or 0.85 (final)
    """

    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self, seed: int = 42, max_steps: int = 15):
        """Initialize the HR environment."""
        self._seed = seed
        self._max_steps = max_steps
        self._rng = random.Random(seed)

        # World state + tools
        self.world = WorldState()
        self.tool_registry = ToolRegistry(self.world)
        self.evaluator = RubricEvaluator()

        # Tasks
        self._task_gen = TaskGenerator(self.world, seed=seed)
        self._tasks = self._task_gen.generate_all_tasks()
        self._task_idx = 0
        self._current_task = None

        # Episode state
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._done = False
        self._tool_names = [t["name"] for t in TOOL_DEFINITIONS]

    def reset(self) -> HROnboardingObservation:
        """
        Reset the environment for a new episode.

        Picks the next task, resets world state, returns initial observation
        with the task instruction and available tools.
        """
        self.world.reset()
        self._done = False

        # Pick next task (cycle through)
        self._current_task = self._tasks[self._task_idx % len(self._tasks)]
        self._task_idx += 1

        # Apply task setup if any
        if self._current_task.setup_fn:
            self._current_task.setup_fn(self.world)

        self._state = State(episode_id=str(uuid4()), step_count=0)

        return HROnboardingObservation(
            task_id=self._current_task.task_id,
            instruction=self._current_task.instruction,
            tool_name="",
            tool_result={},
            step=0,
            max_steps=self._max_steps,
            available_tools=self._tool_names,
            done=False,
            reward=0.0,
            metadata={
                "difficulty": self._current_task.difficulty,
                "category": self._current_task.category,
                "context": self._current_task.context,
            },
        )

    def step(self, action: HROnboardingAction) -> HROnboardingObservation:  # type: ignore[override]
        """
        Execute one step: call the specified tool and return the result.

        Args:
            action: HROnboardingAction with tool_name and arguments.

        Returns:
            HROnboardingObservation with tool result, reward (on final step), and done flag.
        """
        if self._done:
            return HROnboardingObservation(
                task_id=self._current_task.task_id if self._current_task else "",
                instruction="",
                tool_name=action.tool_name,
                tool_result={"error": "Episode already finished"},
                step=self._state.step_count,
                max_steps=self._max_steps,
                available_tools=self._tool_names,
                done=True,
                reward=0.0,
            )

        self._state.step_count += 1

        # Execute the tool
        result = self.tool_registry.execute(action.tool_name, action.arguments)

        # Check if episode is done
        done = self._state.step_count >= self._max_steps
        self._done = done

        # Compute reward on final step
        reward = 0.0
        eval_info = {}
        if done and self._current_task:
            eval_result = self.evaluator.evaluate(self._current_task, self.world.action_log)
            reward = eval_result["score"]
            eval_info = eval_result

        return HROnboardingObservation(
            task_id=self._current_task.task_id if self._current_task else "",
            instruction=self._current_task.instruction if self._current_task else "",
            tool_name=action.tool_name,
            tool_result=result,
            step=self._state.step_count,
            max_steps=self._max_steps,
            available_tools=self._tool_names,
            done=done,
            reward=reward,
            metadata={
                "step": self._state.step_count,
                **({"evaluation": eval_info} if eval_info else {}),
            },
        )

    @property
    def state(self) -> State:
        """Get the current environment state."""
        return self._state