Spaces:

Mist-ic
/

sevzero

Sleeping

App Files Files Community

sevzero / tests /test_grader.py

Mist-ic

Add tests and fix premature termination bug

5f8bd3c about 2 months ago

raw

history blame contribute delete

2.86 kB

	"""Tests for the deterministic grader."""

	import sys
	import os

	sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

	from server.grader import grade_episode


	class TestGraderBounds:
	"""Score is always 0.0–1.0."""

	def test_perfect_score(self):
	result = grade_episode(
	final_slo_score=1.0,
	steps_taken=3,
	max_steps=10,
	actions_taken=[
	{"tick": 0, "action": "inspect_logs", "target": "svc", "success": True},
	{"tick": 1, "action": "restart_service", "target": "svc", "success": True},
	],
	terminated=True,
	termination_reason="resolved",
	)
	assert 0.0 <= result.score <= 1.0
	assert result.score > 0.8 # Resolved quickly = high score

	def test_zero_score(self):
	result = grade_episode(
	final_slo_score=0.0,
	steps_taken=10,
	max_steps=10,
	actions_taken=[],
	terminated=True,
	termination_reason="timeout",
	)
	assert result.score == 0.0

	def test_partial_credit(self):
	result = grade_episode(
	final_slo_score=0.5,
	steps_taken=10,
	max_steps=10,
	actions_taken=[
	{"tick": i, "action": "noop", "success": True}
	for i in range(10)
	],
	terminated=True,
	termination_reason="timeout",
	)
	assert 0.0 < result.score < 1.0

	def test_determinism(self):
	args = dict(
	final_slo_score=0.7,
	steps_taken=5,
	max_steps=20,
	actions_taken=[
	{"tick": 0, "action": "inspect_logs", "target": "svc", "success": True},
	{"tick": 1, "action": "restart_service", "target": "svc", "success": True},
	],
	terminated=True,
	termination_reason="timeout",
	)
	r1 = grade_episode(**args)
	r2 = grade_episode(**args)
	assert r1.score == r2.score

	def test_resolved_bonus(self):
	"""Resolved episodes should score higher than timed-out ones at same SLO."""
	resolved = grade_episode(
	final_slo_score=1.0,
	steps_taken=5,
	max_steps=10,
	actions_taken=[{"tick": i, "action": "restart_service", "target": "svc", "success": True} for i in range(5)],
	terminated=True,
	termination_reason="resolved",
	)
	timeout = grade_episode(
	final_slo_score=1.0,
	steps_taken=10,
	max_steps=10,
	actions_taken=[{"tick": i, "action": "noop", "success": True} for i in range(10)],
	terminated=True,
	termination_reason="timeout",
	)
	assert resolved.score > timeout.score