Update environment.py
Browse files- environment.py +5 -16
environment.py
CHANGED
|
@@ -32,49 +32,38 @@ from rubrics import (
|
|
| 32 |
# ======================================================================
|
| 33 |
@dataclass
|
| 34 |
class EnhancedObservation:
|
| 35 |
-
"""
|
| 36 |
-
Complete Markov state - agent has ALL information needed for optimal decisions.
|
| 37 |
-
Reward function depends ONLY on (state, action), not hidden variables.
|
| 38 |
-
"""
|
| 39 |
-
# Code state
|
| 40 |
code_snippet: str
|
| 41 |
last_tool_output: str
|
| 42 |
-
author_response: str = "" # ← ADDED
|
| 43 |
|
| 44 |
-
# Current metrics
|
| 45 |
current_test_score: float
|
| 46 |
current_lint_score: float
|
| 47 |
negotiation_score: float
|
| 48 |
|
| 49 |
-
# CRITICAL: Previous metrics (for understanding deltas)
|
| 50 |
previous_test_score: float
|
| 51 |
previous_lint_score: float
|
| 52 |
|
| 53 |
-
# CRITICAL: Author internal state (affects reward gating)
|
| 54 |
author_confidence: float
|
| 55 |
-
author_threshold: float
|
| 56 |
|
| 57 |
-
# Progress tracking
|
| 58 |
step: int
|
| 59 |
max_steps: int
|
| 60 |
progress_ratio: float
|
| 61 |
|
| 62 |
-
# Tool usage flags
|
| 63 |
tests_run: bool
|
| 64 |
linter_run: bool
|
| 65 |
docs_queried: bool
|
| 66 |
|
| 67 |
-
# Action history (with outcomes)
|
| 68 |
last_action_type: str
|
| 69 |
-
action_history: List[str]
|
| 70 |
|
| 71 |
-
# Terminal flag
|
| 72 |
done: bool
|
| 73 |
|
| 74 |
-
# Additional context
|
| 75 |
bug_description: str
|
| 76 |
comments_count: int
|
| 77 |
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
# ======================================================================
|
| 80 |
# HELPER FUNCTIONS
|
|
|
|
| 32 |
# ======================================================================
|
| 33 |
@dataclass
|
| 34 |
class EnhancedObservation:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
code_snippet: str
|
| 36 |
last_tool_output: str
|
|
|
|
| 37 |
|
|
|
|
| 38 |
current_test_score: float
|
| 39 |
current_lint_score: float
|
| 40 |
negotiation_score: float
|
| 41 |
|
|
|
|
| 42 |
previous_test_score: float
|
| 43 |
previous_lint_score: float
|
| 44 |
|
|
|
|
| 45 |
author_confidence: float
|
| 46 |
+
author_threshold: float
|
| 47 |
|
|
|
|
| 48 |
step: int
|
| 49 |
max_steps: int
|
| 50 |
progress_ratio: float
|
| 51 |
|
|
|
|
| 52 |
tests_run: bool
|
| 53 |
linter_run: bool
|
| 54 |
docs_queried: bool
|
| 55 |
|
|
|
|
| 56 |
last_action_type: str
|
| 57 |
+
action_history: List[str]
|
| 58 |
|
|
|
|
| 59 |
done: bool
|
| 60 |
|
|
|
|
| 61 |
bug_description: str
|
| 62 |
comments_count: int
|
| 63 |
|
| 64 |
+
# default fields must be at the very end
|
| 65 |
+
author_response: str = ""
|
| 66 |
+
|
| 67 |
|
| 68 |
# ======================================================================
|
| 69 |
# HELPER FUNCTIONS
|