Update openenv.yaml
Browse files- openenv.yaml +62 -9
openenv.yaml
CHANGED
|
@@ -24,7 +24,7 @@ tasks:
|
|
| 24 |
description: "Resolve potential deadlock by standardising lock order"
|
| 25 |
|
| 26 |
# ----------------------------------------------------------------------
|
| 27 |
-
# Observation space (
|
| 28 |
# ----------------------------------------------------------------------
|
| 29 |
observation_space:
|
| 30 |
type: object
|
|
@@ -34,16 +34,69 @@ observation_space:
|
|
| 34 |
description: "Current code snippet (may contain injected bug)"
|
| 35 |
last_tool_output:
|
| 36 |
type: string
|
| 37 |
-
description: "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
step:
|
| 39 |
type: integer
|
| 40 |
description: "Current step number"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
done:
|
| 42 |
type: boolean
|
| 43 |
description: "Whether the episode has finished"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# ----------------------------------------------------------------------
|
| 46 |
-
# Action space (
|
| 47 |
# ----------------------------------------------------------------------
|
| 48 |
action_space:
|
| 49 |
type: object
|
|
@@ -51,11 +104,11 @@ action_space:
|
|
| 51 |
action_type:
|
| 52 |
type: string
|
| 53 |
enum:
|
| 54 |
-
-
|
| 55 |
- skip
|
| 56 |
- done
|
| 57 |
-
-
|
| 58 |
-
-
|
| 59 |
- execute
|
| 60 |
- inspect
|
| 61 |
- run_linter
|
|
@@ -63,13 +116,13 @@ action_space:
|
|
| 63 |
- query_docs
|
| 64 |
comment_text:
|
| 65 |
type: string
|
| 66 |
-
description: "Required for
|
| 67 |
question:
|
| 68 |
type: string
|
| 69 |
-
description: "Required for
|
| 70 |
fix_code:
|
| 71 |
type: string
|
| 72 |
-
description: "Required for
|
| 73 |
query_topic:
|
| 74 |
type: string
|
| 75 |
description: "Required for query_docs"
|
|
|
|
| 24 |
description: "Resolve potential deadlock by standardising lock order"
|
| 25 |
|
| 26 |
# ----------------------------------------------------------------------
|
| 27 |
+
# Observation space (complete Markov state – agent sees everything)
|
| 28 |
# ----------------------------------------------------------------------
|
| 29 |
observation_space:
|
| 30 |
type: object
|
|
|
|
| 34 |
description: "Current code snippet (may contain injected bug)"
|
| 35 |
last_tool_output:
|
| 36 |
type: string
|
| 37 |
+
description: "Raw output from last tool (test runner, linter, etc.)"
|
| 38 |
+
author_response:
|
| 39 |
+
type: string
|
| 40 |
+
description: "Latest feedback from the simulated human developer"
|
| 41 |
+
current_test_score:
|
| 42 |
+
type: number
|
| 43 |
+
description: "Proportion of tests passed (0.0–1.0)"
|
| 44 |
+
current_lint_score:
|
| 45 |
+
type: number
|
| 46 |
+
description: "Normalised pylint score (0.0–1.0)"
|
| 47 |
+
negotiation_score:
|
| 48 |
+
type: number
|
| 49 |
+
description: "Author's confidence minus pushback penalty"
|
| 50 |
+
previous_test_score:
|
| 51 |
+
type: number
|
| 52 |
+
description: "Test score before the last action"
|
| 53 |
+
previous_lint_score:
|
| 54 |
+
type: number
|
| 55 |
+
description: "Lint score before the last action"
|
| 56 |
+
author_confidence:
|
| 57 |
+
type: number
|
| 58 |
+
description: "Internal belief of the author (0.0–1.0)"
|
| 59 |
+
author_threshold:
|
| 60 |
+
type: number
|
| 61 |
+
description: "Confidence threshold for this personality"
|
| 62 |
step:
|
| 63 |
type: integer
|
| 64 |
description: "Current step number"
|
| 65 |
+
max_steps:
|
| 66 |
+
type: integer
|
| 67 |
+
description: "Maximum steps allowed in the episode"
|
| 68 |
+
progress_ratio:
|
| 69 |
+
type: number
|
| 70 |
+
description: "step / max_steps"
|
| 71 |
+
tests_run:
|
| 72 |
+
type: boolean
|
| 73 |
+
description: "Whether the agent has run tests at least once"
|
| 74 |
+
linter_run:
|
| 75 |
+
type: boolean
|
| 76 |
+
description: "Whether the agent has run the linter at least once"
|
| 77 |
+
docs_queried:
|
| 78 |
+
type: boolean
|
| 79 |
+
description: "Whether the agent has queried documentation"
|
| 80 |
+
last_action_type:
|
| 81 |
+
type: string
|
| 82 |
+
description: "String name of the last executed action"
|
| 83 |
+
action_history:
|
| 84 |
+
type: array
|
| 85 |
+
items:
|
| 86 |
+
type: string
|
| 87 |
+
description: "Last 5 action types"
|
| 88 |
done:
|
| 89 |
type: boolean
|
| 90 |
description: "Whether the episode has finished"
|
| 91 |
+
bug_description:
|
| 92 |
+
type: string
|
| 93 |
+
description: "Short description of the injected bug"
|
| 94 |
+
comments_count:
|
| 95 |
+
type: integer
|
| 96 |
+
description: "Number of comments exchanged so far"
|
| 97 |
|
| 98 |
# ----------------------------------------------------------------------
|
| 99 |
+
# Action space (short names as produced by the agent)
|
| 100 |
# ----------------------------------------------------------------------
|
| 101 |
action_space:
|
| 102 |
type: object
|
|
|
|
| 104 |
action_type:
|
| 105 |
type: string
|
| 106 |
enum:
|
| 107 |
+
- comment
|
| 108 |
- skip
|
| 109 |
- done
|
| 110 |
+
- question
|
| 111 |
+
- fix
|
| 112 |
- execute
|
| 113 |
- inspect
|
| 114 |
- run_linter
|
|
|
|
| 116 |
- query_docs
|
| 117 |
comment_text:
|
| 118 |
type: string
|
| 119 |
+
description: "Required for comment"
|
| 120 |
question:
|
| 121 |
type: string
|
| 122 |
+
description: "Required for question"
|
| 123 |
fix_code:
|
| 124 |
type: string
|
| 125 |
+
description: "Required for fix"
|
| 126 |
query_topic:
|
| 127 |
type: string
|
| 128 |
description: "Required for query_docs"
|