fix: ensure reward evolution chart has (0,0) baseline for judge visibility
Browse files- server/app.py +5 -5
server/app.py
CHANGED
|
@@ -155,12 +155,11 @@ def build_custom_ui():
|
|
| 155 |
|
| 156 |
# 3. Simple Stats & Reward History
|
| 157 |
history = obs.get("info", {}).get("rewards_history", [])
|
|
|
|
| 158 |
df_reward = pd.DataFrame({
|
| 159 |
-
"Step": [i + 1 for i in range(len(history))],
|
| 160 |
-
"Reward": history
|
| 161 |
})
|
| 162 |
-
if df_reward.empty:
|
| 163 |
-
df_reward = pd.DataFrame({"Step": [0], "Reward": [0.0]})
|
| 164 |
|
| 165 |
best_score = obs.get("info", {}).get("best_score", 0.0)
|
| 166 |
steps_left = obs.get("info", {}).get("steps_remaining", 5)
|
|
@@ -201,7 +200,8 @@ def build_custom_ui():
|
|
| 201 |
|
| 202 |
return df, pol, score, steps, ep, stat, df_hist, reward_msg, json.dumps(obs, indent=2)
|
| 203 |
except Exception as e:
|
| 204 |
-
|
|
|
|
| 205 |
|
| 206 |
with gr.Blocks(
|
| 207 |
title="PolicyEvolver Judge Console",
|
|
|
|
| 155 |
|
| 156 |
# 3. Simple Stats & Reward History
|
| 157 |
history = obs.get("info", {}).get("rewards_history", [])
|
| 158 |
+
# Always include point (0,0) to ensure a visible line from episode start
|
| 159 |
df_reward = pd.DataFrame({
|
| 160 |
+
"Step": [0] + [i + 1 for i in range(len(history))],
|
| 161 |
+
"Reward": [0.0] + [float(r) for r in history]
|
| 162 |
})
|
|
|
|
|
|
|
| 163 |
|
| 164 |
best_score = obs.get("info", {}).get("best_score", 0.0)
|
| 165 |
steps_left = obs.get("info", {}).get("steps_remaining", 5)
|
|
|
|
| 200 |
|
| 201 |
return df, pol, score, steps, ep, stat, df_hist, reward_msg, json.dumps(obs, indent=2)
|
| 202 |
except Exception as e:
|
| 203 |
+
err_df = pd.DataFrame({"Step": [0], "Reward": [0.0]})
|
| 204 |
+
return pd.DataFrame(), f"### Execution Error\n{str(e)}", 0, 0, "ERROR", "### ERROR", err_df, f"Traceback:\n{traceback.format_exc()}", "{}"
|
| 205 |
|
| 206 |
with gr.Blocks(
|
| 207 |
title="PolicyEvolver Judge Console",
|