Spaces:
Sleeping
Sleeping
Your Name commited on
Commit Β·
368d425
1
Parent(s): 0786522
fix(OpenEnv): final scour of zero/one values from rewards and state to ensure absolute compliance
Browse files- environment.py +2 -2
- reward.py +1 -1
environment.py
CHANGED
|
@@ -64,7 +64,7 @@ class TeamForgeEnv:
|
|
| 64 |
|
| 65 |
# Episode state
|
| 66 |
self._step_number = 0
|
| 67 |
-
self._cumulative_reward = 0.
|
| 68 |
self._plan: List[PlanStep] = []
|
| 69 |
self._reviews: List[ReviewArtifact] = []
|
| 70 |
self._reflections: List[ReflectionArtifact] = []
|
|
@@ -106,7 +106,7 @@ class TeamForgeEnv:
|
|
| 106 |
|
| 107 |
# Reset episode state
|
| 108 |
self._step_number = 0
|
| 109 |
-
self._cumulative_reward = 0.
|
| 110 |
self._plan = []
|
| 111 |
self._reviews = []
|
| 112 |
self._reflections = []
|
|
|
|
| 64 |
|
| 65 |
# Episode state
|
| 66 |
self._step_number = 0
|
| 67 |
+
self._cumulative_reward = 0.01
|
| 68 |
self._plan: List[PlanStep] = []
|
| 69 |
self._reviews: List[ReviewArtifact] = []
|
| 70 |
self._reflections: List[ReflectionArtifact] = []
|
|
|
|
| 106 |
|
| 107 |
# Reset episode state
|
| 108 |
self._step_number = 0
|
| 109 |
+
self._cumulative_reward = 0.01
|
| 110 |
self._plan = []
|
| 111 |
self._reviews = []
|
| 112 |
self._reflections = []
|
reward.py
CHANGED
|
@@ -95,7 +95,7 @@ class RewardCalculator:
|
|
| 95 |
"run_tests": 0.02,
|
| 96 |
"run_lint": 0.02,
|
| 97 |
"request_iteration": 0.02,
|
| 98 |
-
}.get(action_type, 0.
|
| 99 |
|
| 100 |
# ββ Test progress bonus ββ
|
| 101 |
if tests_passed is not None:
|
|
|
|
| 95 |
"run_tests": 0.02,
|
| 96 |
"run_lint": 0.02,
|
| 97 |
"request_iteration": 0.02,
|
| 98 |
+
}.get(action_type, 0.01)
|
| 99 |
|
| 100 |
# ββ Test progress bonus ββ
|
| 101 |
if tests_passed is not None:
|