File size: 1,624 Bytes
9fdf681
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499458e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
class TaskDef:
    def __init__(self, name: str, initial_msg: str, expected_route: str, required_info: list = None, needs_refund: bool = False):
        self.name = name
        self.initial_msg = initial_msg
        self.expected_route = expected_route
        self.required_info = required_info or[]
        self.needs_refund = needs_refund

# Define 3 strict tasks (Easy, Medium, Hard)
TASKS =[
    TaskDef(
        name="easy_password_reset",
        initial_msg="I forgot my password and cannot log in.",
        expected_route="IT_SUPPORT"
    ),
    TaskDef(
        name="medium_hardware_issue",
        initial_msg="My laptop won't turn on.",
        expected_route="HARDWARE_SUPPORT",
        required_info=["serial_number"]
    ),
    TaskDef(
        name="hard_refund_processing",
        initial_msg="I want a refund for my recent purchase, it arrived broken.",
        expected_route="BILLING",
        required_info=["order_id", "photo_evidence"],
        needs_refund=True
    )
]

def grader(task: TaskDef, final_state: dict) -> float:
    """Deterministic programmatic grader returning 0.0 to 1.0"""
    score = 0.0
    total_checks = 1 + len(task.required_info) + (1 if task.needs_refund else 0)
    
    if final_state.get("route") == task.expected_route:
        score += 1.0
        
    for info in task.required_info:
        if info in final_state.get("collected_info",[]):
            score += 1.0
            
    if task.needs_refund and final_state.get("refund_processed", False):
        score += 1.0
        
    final_score = score / total_checks
    return max(0.01, min(0.99, final_score))