Spaces:
Sleeping
Sleeping
File size: 2,218 Bytes
fd78303 0ef4206 fd78303 0ef4206 fd78303 0ef4206 fd78303 0ef4206 fd78303 0ef4206 fd78303 0ef4206 fd78303 0ef4206 fd78303 0ef4206 fd78303 0ef4206 fd78303 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | from uuid import uuid4
from typing import Optional
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from environment.models import CodeReviewAction, CodeReviewObservation
from environment.tasks import get_task
from environment.graders import grade
class CodeReviewEnv(Environment):
SUPPORTS_CONCURRENT_SESSIONS: bool = False
def __init__(self):
self._state = State(episode_id=str(uuid4()), step_count=0)
self._task = get_task("easy_syntax")
self._last_feedback: Optional[str] = None
def reset(self) -> CodeReviewObservation:
self._task = get_task("easy_syntax")
self._state = State(episode_id=str(uuid4()), step_count=0)
self._last_feedback = None
return CodeReviewObservation(
task_id=self._task["task_id"],
task_name=self._task["task_name"],
difficulty=self._task["difficulty"],
language=self._task["language"],
code_snippet=self._task["code_snippet"],
context=self._task["context"],
step_number=0,
max_steps=self._task["max_steps"],
previous_feedback=None,
done=False,
reward=0.0,
)
def step(self, action: CodeReviewAction) -> CodeReviewObservation:
self._state.step_count += 1
reward, feedback = grade(
self._task["task_id"], action, self._task["ground_truth"]
)
self._last_feedback = feedback
max_steps = self._task["max_steps"]
done = action.submit or reward >= 0.95 or self._state.step_count >= max_steps
return CodeReviewObservation(
task_id=self._task["task_id"],
task_name=self._task["task_name"],
difficulty=self._task["difficulty"],
language=self._task["language"],
code_snippet=self._task["code_snippet"],
context=self._task["context"],
step_number=self._state.step_count,
max_steps=max_steps,
previous_feedback=feedback,
done=done,
reward=reward,
)
@property
def state(self) -> State:
return self._state |