File size: 2,638 Bytes
625b444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os, json, random, requests
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

try:
    from ..models import AiServerAdminAction, AiServerAdminObservation
except ImportError:
    from models import AiServerAdminAction, AiServerAdminObservation

class AiServerAdminEnvironment(Environment):
    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self):
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self.current_task = None
        tasks_path = os.path.join(os.path.dirname(__file__), "..", "tasks.json")
        try:
            with open(tasks_path, "r") as f: self.tasks = json.load(f)
        except Exception:
            with open("tasks.json", "r") as f: self.tasks = json.load(f)

    def reset(self) -> AiServerAdminObservation:
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self.current_task = random.choice(self.tasks)
        return AiServerAdminObservation(
            echoed_message=f"[NEW TASK]: {self.current_task['prompt']}",
            message_length=0, done=False, reward=0.0
        )

    def step(self, action: AiServerAdminAction) -> AiServerAdminObservation:
        self._state.step_count += 1
        agent_answer = action.message
        reward = self._judge_code(agent_answer)
        return AiServerAdminObservation(
            echoed_message="Evaluation Complete.",
            message_length=len(agent_answer), done=True, reward=reward,
        )

    def _judge_code(self, agent_answer: str) -> float:
        openai_key = os.environ.get("OPENAI_API_KEY", "")
        if not openai_key: return 0.5

        headers = {"Authorization": f"Bearer {openai_key}", "Content-Type": "application/json"}
        payload = {
            "model": "gpt-4o-mini",
            "messages": [
                {"role": "system", "content": "You are a strict AI Judge. Evaluate the answer based on the rubric. Output ONLY a single float number between 0.0 and 1.0. No extra text."},
                {"role": "user", "content": f"Task: {self.current_task['prompt']}\nRubric: {self.current_task['rubric']}\nAgent Answer: {agent_answer}"}
            ]
        }

        try:
            resp = requests.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
            resp.raise_for_status()
            score_str = resp.json()["choices"][0]["message"]["content"].strip()
            return min(max(float(score_str), 0.0), 1.0)
        except Exception:
            return 0.0

    @property
    def state(self) -> State: return self._state