Spaces:

SidraMiconi
/

opsgate

Sleeping

File size: 5,951 Bytes

"""OpsGate Environment — the core reset/step/state loop."""
import uuid
import sys
import os

sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

from openenv.core.env_server.interfaces import Environment
from models import ToolCall, ToolResult, EpisodeState, AuditEvent
from tools.crm import CRMTool
from tools.billing import BillingTool
from tools.calendar import CalendarTool
from tools.email import EmailTool
from verifier import verify_episode
from tasks import TASKS
from hyperparameters import (
    MAX_STEPS_PER_EPISODE, TOOL_CALL_PENALTY, INVALID_TOOL_PENALTY,
)


class OpsGateEnvironment(Environment):

    def __init__(self):
        super().__init__()
        self.crm = CRMTool()
        self.billing = BillingTool()
        self.calendar = CalendarTool()
        self.email = EmailTool()
        self._state = None
        self._current_task = None
        self._task_index = 0

    def _log_audit(self, event_type, action, title, detail="", severity="info"):
        if self._state and self._state.audit_trail is not None:
            event = AuditEvent(
                event_type=event_type, action=action, title=title,
                detail=detail, severity=severity,
                step=self._state.tool_calls_made if self._state else 0,
            )
            self._state.audit_trail.append(event.__dict__)

    def reset(self):
        task = TASKS[self._task_index % len(TASKS)]
        self._task_index += 1
        self._current_task = task

        self.crm = CRMTool()
        self.billing = BillingTool()
        self.calendar = CalendarTool()
        self.email = EmailTool()

        seed = task["seed"]
        if seed.get("users"):
            self.crm.seed(seed["users"])
        if seed.get("invoices"):
            self.billing.seed(seed["invoices"])
        if seed.get("events"):
            self.calendar.seed(seed["events"])

        self._state = EpisodeState(
            task_id=task["id"],
            task_description=task["description"],
            target_state=task["target"],
            current_db_snapshot={},
            tool_calls_made=0,
            invalid_calls=0,
            policy_violations=0,
            completed=False,
            verdict={},
            audit_trail=[],
        )

        self._log_audit("episode", "started", f"Episode started: {task['id']}", task["description"][:100])

        return ToolResult(
            success=True,
            data={"message": "Environment ready. Complete the following task."},
            task_description=task["description"],
            step_count=0,
            done=False,
        )

    def step(self, action):
        if self._state is None:
            return ToolResult(success=False, error="Call reset() first", reward=-1.0, done=True)

        self._state.tool_calls_made += 1

        tool_map = {"crm": self.crm, "billing": self.billing, "calendar": self.calendar, "email": self.email}

        tool_name = action.tool if hasattr(action, 'tool') else action.get('tool', '')
        action_name = action.action if hasattr(action, 'action') else action.get('action', '')
        params = action.parameters if hasattr(action, 'parameters') else action.get('parameters', {})

        tool = tool_map.get(tool_name)
        if not tool:
            self._state.invalid_calls += 1
            self._log_audit("tool_call", "invalid_tool", f"Unknown tool: {tool_name}", severity="warning")
            return ToolResult(
                success=False, error=f"Unknown tool: {tool_name}",
                reward=INVALID_TOOL_PENALTY, done=False,
                step_count=self._state.tool_calls_made,
                task_description=self._current_task["description"],
            )

        self._log_audit("tool_call", f"{tool_name}.{action_name}", f"Tool call: {tool_name}.{action_name}", str(params)[:200])

        result = tool.execute(action_name, params)

        if "error" in result:
            sev = "critical" if result.get("policy_violated") else "warning"
            self._log_audit("tool_result", "error", f"Tool error: {tool_name}.{action_name}", result["error"][:200], sev)
            if result.get("policy_violated"):
                self._state.policy_violations += 1
        else:
            self._log_audit("tool_result", "success", f"Tool success: {tool_name}.{action_name}", str(result)[:200], "success")

        reward = TOOL_CALL_PENALTY
        max_steps = self._current_task.get("max_steps", MAX_STEPS_PER_EPISODE)
        done = (action_name == "submit") or (self._state.tool_calls_made >= max_steps)

        if done:
            snapshots = {
                "crm": self.crm.snapshot(), "billing": self.billing.snapshot(),
                "calendar": self.calendar.snapshot(), "email": self.email.snapshot(),
            }
            reward, violations, verdict = verify_episode(
                target=self._current_task["target"], snapshots=snapshots,
                policy_violations=self._state.policy_violations,
                invalid_calls=self._state.invalid_calls,
                tool_calls_made=self._state.tool_calls_made,
            )
            self._state.completed = True
            self._state.current_db_snapshot = snapshots
            self._state.verdict = verdict
            self._log_audit("verdict", verdict["decision"],
                f"Safety gate {verdict['decision']}: {self._current_task['id']}",
                f"Score: {verdict['score']}/100 (Grade {verdict['grade']})",
                "success" if verdict["decision"] == "PASS" else "warning" if verdict["decision"] == "HOLD" else "critical")
            result["_verification"] = verdict

        return ToolResult(
            success="error" not in result, data=result, reward=reward, done=done,
            step_count=self._state.tool_calls_made,
            task_description=self._current_task["description"],
        )

    @property
    def state(self):
        return self._state