code-review-env / server /code_review_env_environment.py
Dolphin-Syndrom's picture
environment and dependecies
92e5c18
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Code Review environment implementation for OpenEnv."""
from uuid import uuid4
from openenv.core.env_server.interfaces import Environment
try:
from ..models import ReviewAction, ReviewObservation, ReviewState
from .graders import grade_review_with_breakdown
from .tasks import get_task
except ImportError:
from models import ReviewAction, ReviewObservation, ReviewState
from server.graders import grade_review_with_breakdown
from server.tasks import get_task
MAX_STEPS = 3
class CodeReviewEnvironment(Environment):
"""Environment where an agent reviews code and tags planted issues."""
SUPPORTS_CONCURRENT_SESSIONS: bool = True
def __init__(self):
default_task = get_task("task_easy")
self._state = ReviewState(
episode_id=str(uuid4()),
step_count=0,
current_task_id=default_task.task_id,
max_steps=MAX_STEPS,
)
self._current_task = default_task
def reset(self, task_id: str = "task_easy", **kwargs) -> ReviewObservation:
"""Reset episode and load selected task (fallback to task_easy)."""
_ = kwargs
task = get_task(task_id)
self._current_task = task
self._state = ReviewState(
episode_id=str(uuid4()),
step_count=0,
current_task_id=task.task_id,
max_steps=MAX_STEPS,
)
return ReviewObservation(
task_id=task.task_id,
file_name=task.file_name,
task_description=task.description,
code_snippet=task.code,
feedback="Environment reset. Submit issues_found and review_comment.",
step_number=0,
reward=0.0,
done=False,
metadata={
"difficulty": task.difficulty,
"planted_issue_count": len(task.planted_issues),
},
)
def step(self, action: ReviewAction) -> ReviewObservation: # type: ignore[override]
"""Grade one review action and return updated observation."""
self._state.step_count += 1
breakdown = grade_review_with_breakdown(
action_issues=action.issues_found,
action_comment=action.review_comment,
task=self._current_task,
)
score = breakdown.score
done = (score >= 0.95) or (self._state.step_count >= MAX_STEPS)
correctly_found = sorted(breakdown.correctly_found)
missed_count = len(breakdown.missed)
false_positive_count = len(breakdown.false_positives)
feedback = (
f"Score: {score:.3f} | Found: {correctly_found} | "
f"Missed: {missed_count} remaining | False positives: {false_positive_count}"
)
return ReviewObservation(
task_id=self._current_task.task_id,
file_name=self._current_task.file_name,
task_description=self._current_task.description,
code_snippet=self._current_task.code,
feedback=feedback,
step_number=self._state.step_count,
reward=score,
done=done,
metadata={
"correctly_found": correctly_found,
"missed": sorted(breakdown.missed),
"false_positives": sorted(breakdown.false_positives),
"submitted_severity": action.severity,
},
)
@property
def state(self) -> ReviewState:
"""Return current episode state."""
return self._state