Somuai12 commited on
Commit
8085f66
·
1 Parent(s): a5522cd

fix: ensure reward evolution chart has (0,0) baseline for judge visibility

Browse files
Files changed (1) hide show
  1. server/app.py +5 -5
server/app.py CHANGED
@@ -155,12 +155,11 @@ def build_custom_ui():
155
 
156
  # 3. Simple Stats & Reward History
157
  history = obs.get("info", {}).get("rewards_history", [])
 
158
  df_reward = pd.DataFrame({
159
- "Step": [i + 1 for i in range(len(history))],
160
- "Reward": history
161
  })
162
- if df_reward.empty:
163
- df_reward = pd.DataFrame({"Step": [0], "Reward": [0.0]})
164
 
165
  best_score = obs.get("info", {}).get("best_score", 0.0)
166
  steps_left = obs.get("info", {}).get("steps_remaining", 5)
@@ -201,7 +200,8 @@ def build_custom_ui():
201
 
202
  return df, pol, score, steps, ep, stat, df_hist, reward_msg, json.dumps(obs, indent=2)
203
  except Exception as e:
204
- return pd.DataFrame(), f"### Execution Error\n{str(e)}", 0, 0, "ERROR", "### ERROR", pd.DataFrame(), f"Traceback:\n{traceback.format_exc()}", "{}"
 
205
 
206
  with gr.Blocks(
207
  title="PolicyEvolver Judge Console",
 
155
 
156
  # 3. Simple Stats & Reward History
157
  history = obs.get("info", {}).get("rewards_history", [])
158
+ # Always include point (0,0) to ensure a visible line from episode start
159
  df_reward = pd.DataFrame({
160
+ "Step": [0] + [i + 1 for i in range(len(history))],
161
+ "Reward": [0.0] + [float(r) for r in history]
162
  })
 
 
163
 
164
  best_score = obs.get("info", {}).get("best_score", 0.0)
165
  steps_left = obs.get("info", {}).get("steps_remaining", 5)
 
200
 
201
  return df, pol, score, steps, ep, stat, df_hist, reward_msg, json.dumps(obs, indent=2)
202
  except Exception as e:
203
+ err_df = pd.DataFrame({"Step": [0], "Reward": [0.0]})
204
+ return pd.DataFrame(), f"### Execution Error\n{str(e)}", 0, 0, "ERROR", "### ERROR", err_df, f"Traceback:\n{traceback.format_exc()}", "{}"
205
 
206
  with gr.Blocks(
207
  title="PolicyEvolver Judge Console",