Spaces:
Sleeping
Sleeping
| def clamp_unit_interval(x: float) -> float: | |
| """Clamp to [0.0, 1.0] (Phase 1 / rubric).""" | |
| return max(0.0, min(1.0, float(x))) | |
| def compute_step_reward(action_type, is_stale): | |
| reward = 0 | |
| if action_type == "invalidate": | |
| reward = 1.0 if is_stale else -0.5 | |
| elif action_type == "keep": | |
| reward = 0.8 if not is_stale else -0.6 | |
| elif action_type == "refresh": | |
| reward = 0.6 if is_stale else 0.2 | |
| return reward | |
| def normalize_episode_score(total_reward, max_steps=10): | |
| score = total_reward / max_steps | |
| return clamp_unit_interval(score) | |
| def evaluate_episode(history): | |
| """ | |
| history = list of: | |
| { | |
| "action": str, | |
| "is_stale": bool | |
| } | |
| """ | |
| total_steps = len(history) | |
| if total_steps == 0: | |
| return clamp_unit_interval(0.0) | |
| correct_decisions = 0 | |
| unnecessary_invalidations = 0 | |
| oscillations = 0 | |
| last_action = None | |
| for step in history: | |
| action = step["action"] | |
| is_stale = step["is_stale"] | |
| if (is_stale and action in ["invalidate", "refresh"]) or ( | |
| not is_stale and action == "keep" | |
| ): | |
| correct_decisions += 1 | |
| if action == "invalidate" and not is_stale: | |
| unnecessary_invalidations += 1 | |
| if last_action and last_action != action: | |
| oscillations += 1 | |
| last_action = action | |
| freshness = correct_decisions / total_steps | |
| efficiency = 1 - (unnecessary_invalidations / total_steps) | |
| stability = 1 - (oscillations / total_steps) | |
| score = 0.5 * freshness + 0.3 * efficiency + 0.2 * stability | |
| return clamp_unit_interval(score) | |