from __future__ import annotations from typing import Literal from pydantic import BaseModel, ConfigDict from ethicsguard.env import EthicsGuardEnv from ethicsguard.generator import DIFFICULTY_CONFIG from ethicsguard.models import ( ActionType, EnvironmentState, EthicsGuardAction, StepResult, TaskInfo, TaskListResponse, ) class ResetRequest(BaseModel): model_config = ConfigDict(extra="forbid") task: Literal["easy", "medium", "hard"] = "easy" seed: int = 0 class StepRequest(BaseModel): model_config = ConfigDict(extra="forbid") item_id: str action_type: ActionType class OpenEnvAdapter: def __init__(self) -> None: self._env: EthicsGuardEnv | None = None async def reset(self, request: ResetRequest | None = None) -> StepResult: request = request or ResetRequest() self._env = EthicsGuardEnv(difficulty=request.task, seed=request.seed) return await self._env.reset() async def step(self, request: StepRequest) -> StepResult: if self._env is None: self._env = EthicsGuardEnv() await self._env.reset() action = EthicsGuardAction(item_id=request.item_id, action_type=request.action_type) return await self._env.step(action) async def state(self) -> EnvironmentState: if self._env is None: self._env = EthicsGuardEnv() await self._env.reset() return await self._env.state() async def tasks(self) -> TaskListResponse: task_descriptions = { "easy": "Clear violations, low noise, and stronger signals.", "medium": "Mixed cues, moderate noise, and more ambiguous policy calls.", "hard": "High noise, subtle violations, and aggregate 30 percent null hints.", } return TaskListResponse( tasks=[ TaskInfo( name=name, difficulty=name, description=task_descriptions[name], queue_size=int(config["queue_size"]), max_steps=EthicsGuardEnv.MAX_STEPS, ) for name, config in DIFFICULTY_CONFIG.items() ] ) async def close(self) -> None: if self._env is not None: await self._env.close()