Spaces:
Sleeping
Sleeping
File size: 1,652 Bytes
e75c8ce d342897 6e72b95 e75c8ce 6e72b95 e75c8ce 4f8cf04 e75c8ce 4f8cf04 e75c8ce 4f8cf04 e75c8ce 4f8cf04 e75c8ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | def clamp_unit_interval(x: float) -> float:
"""Clamp to [0.0, 1.0] (Phase 1 / rubric)."""
return max(0.0, min(1.0, float(x)))
def compute_step_reward(action_type, is_stale):
reward = 0
if action_type == "invalidate":
reward = 1.0 if is_stale else -0.5
elif action_type == "keep":
reward = 0.8 if not is_stale else -0.6
elif action_type == "refresh":
reward = 0.6 if is_stale else 0.2
return reward
def normalize_episode_score(total_reward, max_steps=10):
score = total_reward / max_steps
return clamp_unit_interval(score)
def evaluate_episode(history):
"""
history = list of:
{
"action": str,
"is_stale": bool
}
"""
total_steps = len(history)
if total_steps == 0:
return clamp_unit_interval(0.0)
correct_decisions = 0
unnecessary_invalidations = 0
oscillations = 0
last_action = None
for step in history:
action = step["action"]
is_stale = step["is_stale"]
if (is_stale and action in ["invalidate", "refresh"]) or (
not is_stale and action == "keep"
):
correct_decisions += 1
if action == "invalidate" and not is_stale:
unnecessary_invalidations += 1
if last_action and last_action != action:
oscillations += 1
last_action = action
freshness = correct_decisions / total_steps
efficiency = 1 - (unnecessary_invalidations / total_steps)
stability = 1 - (oscillations / total_steps)
score = 0.5 * freshness + 0.3 * efficiency + 0.2 * stability
return clamp_unit_interval(score)
|