File size: 2,218 Bytes
fd78303
 
 
 
 
 
0ef4206
 
 
fd78303
0ef4206
fd78303
0ef4206
fd78303
 
 
0ef4206
 
fd78303
 
 
0ef4206
fd78303
 
 
 
 
 
 
 
 
 
 
 
0ef4206
 
fd78303
 
 
 
 
 
0ef4206
fd78303
 
 
 
 
 
 
 
 
 
 
0ef4206
fd78303
0ef4206
 
fd78303
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from uuid import uuid4
from typing import Optional
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from environment.models import CodeReviewAction, CodeReviewObservation
from environment.tasks import get_task
from environment.graders import grade


class CodeReviewEnv(Environment):

    SUPPORTS_CONCURRENT_SESSIONS: bool = False

    def __init__(self):
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._task = get_task("easy_syntax")
        self._last_feedback: Optional[str] = None

    def reset(self) -> CodeReviewObservation:
        self._task = get_task("easy_syntax")
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._last_feedback = None
        return CodeReviewObservation(
            task_id=self._task["task_id"],
            task_name=self._task["task_name"],
            difficulty=self._task["difficulty"],
            language=self._task["language"],
            code_snippet=self._task["code_snippet"],
            context=self._task["context"],
            step_number=0,
            max_steps=self._task["max_steps"],
            previous_feedback=None,
            done=False,
            reward=0.0,
        )

    def step(self, action: CodeReviewAction) -> CodeReviewObservation:
        self._state.step_count += 1
        reward, feedback = grade(
            self._task["task_id"], action, self._task["ground_truth"]
        )
        self._last_feedback = feedback
        max_steps = self._task["max_steps"]
        done = action.submit or reward >= 0.95 or self._state.step_count >= max_steps
        return CodeReviewObservation(
            task_id=self._task["task_id"],
            task_name=self._task["task_name"],
            difficulty=self._task["difficulty"],
            language=self._task["language"],
            code_snippet=self._task["code_snippet"],
            context=self._task["context"],
            step_number=self._state.step_count,
            max_steps=max_steps,
            previous_feedback=feedback,
            done=done,
            reward=reward,
        )

    @property
    def state(self) -> State:
        return self._state