vikashsaravanann commited on
Commit
8fbdaab
·
1 Parent(s): ad96a6e

fix: enforce strict (0, 1) range for task scores and rewards

Browse files
Files changed (2) hide show
  1. app/environment.py +1 -1
  2. inference.py +5 -5
app/environment.py CHANGED
@@ -48,7 +48,7 @@ class SupportTriageEnv:
48
  step=0,
49
  current_ticket=ticket,
50
  actions_taken=[],
51
- cumulative_reward=0.0,
52
  done=False,
53
  )
54
  return self._make_observation()
 
48
  step=0,
49
  current_ticket=ticket,
50
  actions_taken=[],
51
+ cumulative_reward=0.01,
52
  done=False,
53
  )
54
  return self._make_observation()
inference.py CHANGED
@@ -115,9 +115,9 @@ def run_episode(task_id, max_steps):
115
 
116
  except Exception as e:
117
  error_msg = str(e).replace('\n', ' ')
118
- print(f"[STEP] step={step_n} action={action_str} reward=0.00 done=true error={error_msg}", flush=True)
119
  done = True
120
- rewards.append("0.00")
121
  break
122
 
123
  final = float(cumulative)
@@ -128,12 +128,12 @@ def run_episode(task_id, max_steps):
128
 
129
  success = final >= 0.5
130
  rewards_joined = ",".join(rewards)
131
- print(f"[END] success={str(success).lower()} steps={len(rewards)} rewards={rewards_joined}", flush=True)
132
  return round(final, 4)
133
  except Exception as e:
134
  error_msg = str(e).replace('\n', ' ')
135
- print(f"[STEP] step=1 action={{}} reward=0.00 done=true error={error_msg}", flush=True)
136
- print(f"[END] success=false steps=1 rewards=0.00", flush=True)
137
  return 0.01
138
 
139
  def main():
 
115
 
116
  except Exception as e:
117
  error_msg = str(e).replace('\n', ' ')
118
+ print(f"[STEP] step={step_n} action={action_str} reward=0.01 done=true error={error_msg}", flush=True)
119
  done = True
120
+ rewards.append("0.01")
121
  break
122
 
123
  final = float(cumulative)
 
128
 
129
  success = final >= 0.5
130
  rewards_joined = ",".join(rewards)
131
+ print(f"[END] success={str(success).lower()} steps={len(rewards)} score={final:.2f} rewards={rewards_joined}", flush=True)
132
  return round(final, 4)
133
  except Exception as e:
134
  error_msg = str(e).replace('\n', ' ')
135
+ print(f"[STEP] step=1 action={{}} reward=0.01 done=true error={error_msg}", flush=True)
136
+ print(f"[END] success=false steps=1 score=0.01 rewards=0.01", flush=True)
137
  return 0.01
138
 
139
  def main():