ajaxwin
refactor: Update grading logic and submission handling across tasks for improved accuracy and consistency
cfae7a7
raw
history blame
1.26 kB
"""
grader.py (Task 2 – Property Discovery)
-----------------------------------------
Deterministic scorer for natural-language property submissions.
One submission attempt per episode.
Grade range: 0.0 – 1.0 (matchscore output, already normalised).
"""
from typing import Tuple
from utils import SemanticMatcher
_SCORE_MIN = 0.001 # grades are strictly (0, 1)
_SCORE_MAX = 0.999
def _clamp(v: float) -> float:
return max(_SCORE_MIN, min(_SCORE_MAX, v))
class Task2Grader:
"""
Grades a Task 2 property submission.
Parameters
----------
function_name : name of the target function
property : the 'property' field from the target function's data
"""
def __init__(self, function_name: str, property: str) -> None:
self.function_name = function_name
self.property = property
def grade(self, submitted: str) -> Tuple[float, str]:
"""Deterministic grade strictly in (0, 1)."""
if not submitted or not submitted.strip():
return _clamp(0.0), "no_match" # β†’ 0.001
matcher = SemanticMatcher()
score = matcher.matchscore(self.property, submitted) # already clamped by SemanticMatcher
return _clamp(score), matcher.confidence()