Spaces:

parvpareek
/

cache-env

Sleeping

Parv Pareek

done

e75c8ce about 1 month ago

1.65 kB

	def clamp_unit_interval(x: float) -> float:
	"""Clamp to [0.0, 1.0] (Phase 1 / rubric)."""
	return max(0.0, min(1.0, float(x)))


	def compute_step_reward(action_type, is_stale):
	reward = 0

	if action_type == "invalidate":
	reward = 1.0 if is_stale else -0.5

	elif action_type == "keep":
	reward = 0.8 if not is_stale else -0.6

	elif action_type == "refresh":
	reward = 0.6 if is_stale else 0.2

	return reward


	def normalize_episode_score(total_reward, max_steps=10):
	score = total_reward / max_steps
	return clamp_unit_interval(score)


	def evaluate_episode(history):
	"""
	history = list of:
	{
	"action": str,
	"is_stale": bool
	}
	"""
	total_steps = len(history)

	if total_steps == 0:
	return clamp_unit_interval(0.0)

	correct_decisions = 0
	unnecessary_invalidations = 0
	oscillations = 0

	last_action = None

	for step in history:
	action = step["action"]
	is_stale = step["is_stale"]

	if (is_stale and action in ["invalidate", "refresh"]) or (
	not is_stale and action == "keep"
	):
	correct_decisions += 1

	if action == "invalidate" and not is_stale:
	unnecessary_invalidations += 1

	if last_action and last_action != action:
	oscillations += 1

	last_action = action

	freshness = correct_decisions / total_steps
	efficiency = 1 - (unnecessary_invalidations / total_steps)
	stability = 1 - (oscillations / total_steps)

	score = 0.5 * freshness + 0.3 * efficiency + 0.2 * stability

	return clamp_unit_interval(score)