import os, json, random, requests from uuid import uuid4 from openenv.core.env_server.interfaces import Environment from openenv.core.env_server.types import State try: from ..models import AiServerAdminAction, AiServerAdminObservation except ImportError: from models import AiServerAdminAction, AiServerAdminObservation class AiServerAdminEnvironment(Environment): SUPPORTS_CONCURRENT_SESSIONS: bool = True def __init__(self): self._state = State(episode_id=str(uuid4()), step_count=0) self.current_task = None tasks_path = os.path.join(os.path.dirname(__file__), "..", "tasks.json") try: with open(tasks_path, "r") as f: self.tasks = json.load(f) except Exception: with open("tasks.json", "r") as f: self.tasks = json.load(f) def reset(self) -> AiServerAdminObservation: self._state = State(episode_id=str(uuid4()), step_count=0) self.current_task = random.choice(self.tasks) return AiServerAdminObservation( echoed_message=f"[NEW TASK]: {self.current_task['prompt']}", message_length=0, done=False, reward=0.0 ) def step(self, action: AiServerAdminAction) -> AiServerAdminObservation: self._state.step_count += 1 agent_answer = action.message reward = self._judge_code(agent_answer) return AiServerAdminObservation( echoed_message="Evaluation Complete.", message_length=len(agent_answer), done=True, reward=reward, ) def _judge_code(self, agent_answer: str) -> float: openai_key = os.environ.get("OPENAI_API_KEY", "") if not openai_key: return 0.5 headers = {"Authorization": f"Bearer {openai_key}", "Content-Type": "application/json"} payload = { "model": "gpt-4o-mini", "messages": [ {"role": "system", "content": "You are a strict AI Judge. Evaluate the answer based on the rubric. Output ONLY a single float number between 0.0 and 1.0. No extra text."}, {"role": "user", "content": f"Task: {self.current_task['prompt']}\nRubric: {self.current_task['rubric']}\nAgent Answer: {agent_answer}"} ] } try: resp = requests.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers) resp.raise_for_status() score_str = resp.json()["choices"][0]["message"]["content"].strip() return min(max(float(score_str), 0.0), 1.0) except Exception: return 0.0 @property def state(self) -> State: return self._state