Your Name commited on
Commit
368d425
Β·
1 Parent(s): 0786522

fix(OpenEnv): final scour of zero/one values from rewards and state to ensure absolute compliance

Browse files
Files changed (2) hide show
  1. environment.py +2 -2
  2. reward.py +1 -1
environment.py CHANGED
@@ -64,7 +64,7 @@ class TeamForgeEnv:
64
 
65
  # Episode state
66
  self._step_number = 0
67
- self._cumulative_reward = 0.0
68
  self._plan: List[PlanStep] = []
69
  self._reviews: List[ReviewArtifact] = []
70
  self._reflections: List[ReflectionArtifact] = []
@@ -106,7 +106,7 @@ class TeamForgeEnv:
106
 
107
  # Reset episode state
108
  self._step_number = 0
109
- self._cumulative_reward = 0.0
110
  self._plan = []
111
  self._reviews = []
112
  self._reflections = []
 
64
 
65
  # Episode state
66
  self._step_number = 0
67
+ self._cumulative_reward = 0.01
68
  self._plan: List[PlanStep] = []
69
  self._reviews: List[ReviewArtifact] = []
70
  self._reflections: List[ReflectionArtifact] = []
 
106
 
107
  # Reset episode state
108
  self._step_number = 0
109
+ self._cumulative_reward = 0.01
110
  self._plan = []
111
  self._reviews = []
112
  self._reflections = []
reward.py CHANGED
@@ -95,7 +95,7 @@ class RewardCalculator:
95
  "run_tests": 0.02,
96
  "run_lint": 0.02,
97
  "request_iteration": 0.02,
98
- }.get(action_type, 0.0)
99
 
100
  # ── Test progress bonus ──
101
  if tests_passed is not None:
 
95
  "run_tests": 0.02,
96
  "run_lint": 0.02,
97
  "request_iteration": 0.02,
98
+ }.get(action_type, 0.01)
99
 
100
  # ── Test progress bonus ──
101
  if tests_passed is not None: