| import os, json, random, requests |
| from uuid import uuid4 |
| from openenv.core.env_server.interfaces import Environment |
| from openenv.core.env_server.types import State |
|
|
| try: |
| from ..models import AiServerAdminAction, AiServerAdminObservation |
| except ImportError: |
| from models import AiServerAdminAction, AiServerAdminObservation |
|
|
| class AiServerAdminEnvironment(Environment): |
| SUPPORTS_CONCURRENT_SESSIONS: bool = True |
|
|
| def __init__(self): |
| self._state = State(episode_id=str(uuid4()), step_count=0) |
| self.current_task = None |
| tasks_path = os.path.join(os.path.dirname(__file__), "..", "tasks.json") |
| try: |
| with open(tasks_path, "r") as f: self.tasks = json.load(f) |
| except Exception: |
| with open("tasks.json", "r") as f: self.tasks = json.load(f) |
|
|
| def reset(self) -> AiServerAdminObservation: |
| self._state = State(episode_id=str(uuid4()), step_count=0) |
| self.current_task = random.choice(self.tasks) |
| return AiServerAdminObservation( |
| echoed_message=f"[NEW TASK]: {self.current_task['prompt']}", |
| message_length=0, done=False, reward=0.0 |
| ) |
|
|
| def step(self, action: AiServerAdminAction) -> AiServerAdminObservation: |
| self._state.step_count += 1 |
| agent_answer = action.message |
| reward = self._judge_code(agent_answer) |
| return AiServerAdminObservation( |
| echoed_message="Evaluation Complete.", |
| message_length=len(agent_answer), done=True, reward=reward, |
| ) |
|
|
| def _judge_code(self, agent_answer: str) -> float: |
| openai_key = os.environ.get("OPENAI_API_KEY", "") |
| if not openai_key: return 0.5 |
|
|
| headers = {"Authorization": f"Bearer {openai_key}", "Content-Type": "application/json"} |
| payload = { |
| "model": "gpt-4o-mini", |
| "messages": [ |
| {"role": "system", "content": "You are a strict AI Judge. Evaluate the answer based on the rubric. Output ONLY a single float number between 0.0 and 1.0. No extra text."}, |
| {"role": "user", "content": f"Task: {self.current_task['prompt']}\nRubric: {self.current_task['rubric']}\nAgent Answer: {agent_answer}"} |
| ] |
| } |
|
|
| try: |
| resp = requests.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers) |
| resp.raise_for_status() |
| score_str = resp.json()["choices"][0]["message"]["content"].strip() |
| return min(max(float(score_str), 0.0), 1.0) |
| except Exception: |
| return 0.0 |
|
|
| @property |
| def state(self) -> State: return self._state |
|
|