vikashsaravanann commited on
Commit ·
8fbdaab
1
Parent(s): ad96a6e
fix: enforce strict (0, 1) range for task scores and rewards
Browse files- app/environment.py +1 -1
- inference.py +5 -5
app/environment.py
CHANGED
|
@@ -48,7 +48,7 @@ class SupportTriageEnv:
|
|
| 48 |
step=0,
|
| 49 |
current_ticket=ticket,
|
| 50 |
actions_taken=[],
|
| 51 |
-
cumulative_reward=0.
|
| 52 |
done=False,
|
| 53 |
)
|
| 54 |
return self._make_observation()
|
|
|
|
| 48 |
step=0,
|
| 49 |
current_ticket=ticket,
|
| 50 |
actions_taken=[],
|
| 51 |
+
cumulative_reward=0.01,
|
| 52 |
done=False,
|
| 53 |
)
|
| 54 |
return self._make_observation()
|
inference.py
CHANGED
|
@@ -115,9 +115,9 @@ def run_episode(task_id, max_steps):
|
|
| 115 |
|
| 116 |
except Exception as e:
|
| 117 |
error_msg = str(e).replace('\n', ' ')
|
| 118 |
-
print(f"[STEP] step={step_n} action={action_str} reward=0.
|
| 119 |
done = True
|
| 120 |
-
rewards.append("0.
|
| 121 |
break
|
| 122 |
|
| 123 |
final = float(cumulative)
|
|
@@ -128,12 +128,12 @@ def run_episode(task_id, max_steps):
|
|
| 128 |
|
| 129 |
success = final >= 0.5
|
| 130 |
rewards_joined = ",".join(rewards)
|
| 131 |
-
print(f"[END] success={str(success).lower()} steps={len(rewards)} rewards={rewards_joined}", flush=True)
|
| 132 |
return round(final, 4)
|
| 133 |
except Exception as e:
|
| 134 |
error_msg = str(e).replace('\n', ' ')
|
| 135 |
-
print(f"[STEP] step=1 action={{}} reward=0.
|
| 136 |
-
print(f"[END] success=false steps=1 rewards=0.
|
| 137 |
return 0.01
|
| 138 |
|
| 139 |
def main():
|
|
|
|
| 115 |
|
| 116 |
except Exception as e:
|
| 117 |
error_msg = str(e).replace('\n', ' ')
|
| 118 |
+
print(f"[STEP] step={step_n} action={action_str} reward=0.01 done=true error={error_msg}", flush=True)
|
| 119 |
done = True
|
| 120 |
+
rewards.append("0.01")
|
| 121 |
break
|
| 122 |
|
| 123 |
final = float(cumulative)
|
|
|
|
| 128 |
|
| 129 |
success = final >= 0.5
|
| 130 |
rewards_joined = ",".join(rewards)
|
| 131 |
+
print(f"[END] success={str(success).lower()} steps={len(rewards)} score={final:.2f} rewards={rewards_joined}", flush=True)
|
| 132 |
return round(final, 4)
|
| 133 |
except Exception as e:
|
| 134 |
error_msg = str(e).replace('\n', ' ')
|
| 135 |
+
print(f"[STEP] step=1 action={{}} reward=0.01 done=true error={error_msg}", flush=True)
|
| 136 |
+
print(f"[END] success=false steps=1 score=0.01 rewards=0.01", flush=True)
|
| 137 |
return 0.01
|
| 138 |
|
| 139 |
def main():
|