100XZX001 commited on
Commit
a9cad0e
·
verified ·
1 Parent(s): 0903f4c

Update environment.py

Browse files
Files changed (1) hide show
  1. environment.py +5 -16
environment.py CHANGED
@@ -32,49 +32,38 @@ from rubrics import (
32
  # ======================================================================
33
  @dataclass
34
  class EnhancedObservation:
35
- """
36
- Complete Markov state - agent has ALL information needed for optimal decisions.
37
- Reward function depends ONLY on (state, action), not hidden variables.
38
- """
39
- # Code state
40
  code_snippet: str
41
  last_tool_output: str
42
- author_response: str = "" # ← ADDED
43
 
44
- # Current metrics
45
  current_test_score: float
46
  current_lint_score: float
47
  negotiation_score: float
48
 
49
- # CRITICAL: Previous metrics (for understanding deltas)
50
  previous_test_score: float
51
  previous_lint_score: float
52
 
53
- # CRITICAL: Author internal state (affects reward gating)
54
  author_confidence: float
55
- author_threshold: float # When author accepts
56
 
57
- # Progress tracking
58
  step: int
59
  max_steps: int
60
  progress_ratio: float
61
 
62
- # Tool usage flags
63
  tests_run: bool
64
  linter_run: bool
65
  docs_queried: bool
66
 
67
- # Action history (with outcomes)
68
  last_action_type: str
69
- action_history: List[str] # Last 5 actions
70
 
71
- # Terminal flag
72
  done: bool
73
 
74
- # Additional context
75
  bug_description: str
76
  comments_count: int
77
 
 
 
 
78
 
79
  # ======================================================================
80
  # HELPER FUNCTIONS
 
32
  # ======================================================================
33
  @dataclass
34
  class EnhancedObservation:
 
 
 
 
 
35
  code_snippet: str
36
  last_tool_output: str
 
37
 
 
38
  current_test_score: float
39
  current_lint_score: float
40
  negotiation_score: float
41
 
 
42
  previous_test_score: float
43
  previous_lint_score: float
44
 
 
45
  author_confidence: float
46
+ author_threshold: float
47
 
 
48
  step: int
49
  max_steps: int
50
  progress_ratio: float
51
 
 
52
  tests_run: bool
53
  linter_run: bool
54
  docs_queried: bool
55
 
 
56
  last_action_type: str
57
+ action_history: List[str]
58
 
 
59
  done: bool
60
 
 
61
  bug_description: str
62
  comments_count: int
63
 
64
+ # default fields must be at the very end
65
+ author_response: str = ""
66
+
67
 
68
  # ======================================================================
69
  # HELPER FUNCTIONS