File size: 4,502 Bytes
1588266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# openenv.yaml – Environment metadata for OpenEnv
name: CodeReview-Professional-Workflow
version: 1.0.0
description: |
  Multi‑turn code review environment for professional tasks.
  Agent must inspect, test, lint, query docs, and negotiate with a simulated author
  to fix injected bugs. Supports DPO training on full trajectories.
author: yuvraj gupta
license: MIT

# ----------------------------------------------------------------------
# Tasks (difficulty progression)
# ----------------------------------------------------------------------
tasks:
  - id: easy
    description: "Fix missing null check in a dictionary lookup"
  - id: medium
    description: "Improve loop efficiency (replace range(len) with direct iteration)"
  - id: hard
    description: "Handle division by zero in average calculation"
  - id: harder
    description: "Fix race condition by adding a lock"
  - id: hardest
    description: "Resolve potential deadlock by standardising lock order"

# ----------------------------------------------------------------------
# Observation space (complete Markov state – agent sees everything)
# ----------------------------------------------------------------------
observation_space:
  type: object
  properties:
    code_snippet:
      type: string
      description: "Current code snippet (may contain injected bug)"
    last_tool_output:
      type: string
      description: "Raw output from last tool (test runner, linter, etc.)"
    author_response:
      type: string
      description: "Latest feedback from the simulated human developer"
    current_test_score:
      type: number
      description: "Proportion of tests passed (0.0–1.0)"
    current_lint_score:
      type: number
      description: "Normalised pylint score (0.0–1.0)"
    negotiation_score:
      type: number
      description: "Author's confidence minus pushback penalty"
    previous_test_score:
      type: number
      description: "Test score before the last action"
    previous_lint_score:
      type: number
      description: "Lint score before the last action"
    author_confidence:
      type: number
      description: "Internal belief of the author (0.0–1.0)"
    author_threshold:
      type: number
      description: "Confidence threshold for this personality"
    step:
      type: integer
      description: "Current step number"
    max_steps:
      type: integer
      description: "Maximum steps allowed in the episode"
    progress_ratio:
      type: number
      description: "step / max_steps"
    tests_run:
      type: boolean
      description: "Whether the agent has run tests at least once"
    linter_run:
      type: boolean
      description: "Whether the agent has run the linter at least once"
    docs_queried:
      type: boolean
      description: "Whether the agent has queried documentation"
    last_action_type:
      type: string
      description: "String name of the last executed action"
    action_history:
      type: array
      items:
        type: string
      description: "Last 5 action types"
    done:
      type: boolean
      description: "Whether the episode has finished"
    bug_description:
      type: string
      description: "Short description of the injected bug"
    comments_count:
      type: integer
      description: "Number of comments exchanged so far"

# ----------------------------------------------------------------------
# Action space (short names as produced by the agent)
# ----------------------------------------------------------------------
action_space:
  type: object
  properties:
    action_type:
      type: string
      enum:
        - comment
        - skip
        - done
        - question
        - fix
        - execute
        - inspect
        - run_linter
        - run_tests
        - query_docs
    comment_text:
      type: string
      description: "Required for comment"
    question:
      type: string
      description: "Required for question"
    fix_code:
      type: string
      description: "Required for fix"
    query_topic:
      type: string
      description: "Required for query_docs"

# ----------------------------------------------------------------------
# (Optional) Server configuration – used by openenv serve
# ----------------------------------------------------------------------
server:
  app: server.app:app
  port: 7860