File size: 2,195 Bytes
7203787
 
 
 
 
cfae7a7
7203787
48661cd
 
 
7203787
 
 
1b91307
48661cd
1b91307
7203787
 
 
 
 
 
 
cfae7a7
 
7203787
cfae7a7
48661cd
 
 
7203787
48661cd
cfae7a7
1b91307
48661cd
 
41a051f
 
 
7203787
48661cd
cfae7a7
48661cd
7203787
48661cd
1b91307
48661cd
 
 
 
 
41a051f
7203787
1b91307
7203787
 
cfae7a7
 
1b91307
cfae7a7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
grader.py  (Task 3 – Rule Checker)
------------------------------------
Deterministic grader for function-identification submissions.

Grade table
───────────
  1        β†’ submitted function is the exact target (case-insensitive)
  0.50     β†’ submitted function is a direct internal subfunction of the target
  0.001    β†’ anything else

"""

import json
from math import exp
from typing import Dict, Any

class Task3Grader:
    """
    Grades a Task 3 submit_function submission.

    Parameters
    ----------
    target_function          : dict with at least 'name' and 'code' keys
    property_specification   : the property the target function violates
    """
    
    REWARD_CORRECT = 1
    REWARD_PARTIAL = 0.5
    REWARD_WRONG   = 0.001

    def __init__(self, target_function: Dict[str, Any], property_specification: Dict | str, max_steps: int) -> None:
        self.target_function        = target_function
        self.property_specification = property_specification
        self.max_steps              = max_steps
        self._decay                 = 0.01
    
    def _clamp(self, reward: float) -> float:
        return max(0.001, min(0.999, reward))

    def grade(self, submitted_function: str, steps: int, cummulative_cost: int) -> float:
        """Returns deterministic grade strictly in (0, 1)."""

        norm = submitted_function.strip().lower()
        reward = self.REWARD_WRONG
        if norm == self.target_function["name"].strip().lower():
            reward = self.REWARD_CORRECT
        elif norm in self.target_function.get("code", "").strip().lower():
            reward = self.REWARD_PARTIAL
        
        penalty = self._decay ** (-(steps * cummulative_cost) / self.max_steps)
        return self._clamp(reward * penalty)

    def get_canonical_answer(self) -> Dict[str, Dict | str]:
        """For debugging / logging only β€” do not expose to the agent."""
        return {
            "target_function":        self.target_function,
            "property_specification": json.dumps(self.property_specification)
                if isinstance(self.property_specification, dict) else self.property_specification,
        }