Spaces:
Sleeping
Sleeping
ajaxwin
refactor: Update grading logic and submission handling across tasks for improved accuracy and consistency
cfae7a7 | """ | |
| grader.py (Task 2 β Property Discovery) | |
| ----------------------------------------- | |
| Deterministic scorer for natural-language property submissions. | |
| One submission attempt per episode. | |
| Grade range: 0.0 β 1.0 (matchscore output, already normalised). | |
| """ | |
| from typing import Tuple | |
| from utils import SemanticMatcher | |
| _SCORE_MIN = 0.001 # grades are strictly (0, 1) | |
| _SCORE_MAX = 0.999 | |
| def _clamp(v: float) -> float: | |
| return max(_SCORE_MIN, min(_SCORE_MAX, v)) | |
| class Task2Grader: | |
| """ | |
| Grades a Task 2 property submission. | |
| Parameters | |
| ---------- | |
| function_name : name of the target function | |
| property : the 'property' field from the target function's data | |
| """ | |
| def __init__(self, function_name: str, property: str) -> None: | |
| self.function_name = function_name | |
| self.property = property | |
| def grade(self, submitted: str) -> Tuple[float, str]: | |
| """Deterministic grade strictly in (0, 1).""" | |
| if not submitted or not submitted.strip(): | |
| return _clamp(0.0), "no_match" # β 0.001 | |
| matcher = SemanticMatcher() | |
| score = matcher.matchscore(self.property, submitted) # already clamped by SemanticMatcher | |
| return _clamp(score), matcher.confidence() |