File size: 4,668 Bytes
78940a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from .models import Action, Email, State
from .reward import compute_dense_reward

def grade_easy(action: Action, email: Email) -> float:
    if email.id == "e1":
        if action.action_type == "reply":
            return compute_dense_reward(True)
        return compute_dense_reward(False)
    if email.id == "e2":
        if action.action_type == "mark_spam":
            return compute_dense_reward(True)
        return compute_dense_reward(False)
    return compute_dense_reward(False)

def grade_medium(action: Action, email: Email) -> float:
    if email.id == "m1":
        # Missing order ID logic -> should request info
        if action.action_type == "request_info":
            return compute_dense_reward(True)
        elif action.action_type == "reply" and action.response_text:
            if "order" in action.response_text.lower() and "id" in action.response_text.lower():
                return compute_dense_reward(True)
            return compute_dense_reward(True) * 0.5
        elif action.action_type == "forward" and action.forward_to and "support" in action.forward_to.lower():
            # Escalating an incomplete ticket rather than asking for info gets partial score
            return compute_dense_reward(True) * 0.3
        return compute_dense_reward(False)
    elif email.id == "m2":
        if action.action_type in ["mark_spam", "archive"]:
            return compute_dense_reward(True)
        return compute_dense_reward(False)
    elif email.id == "m3":
        if action.action_type == "reply" and action.response_text and len(action.response_text) > 5:
            return compute_dense_reward(True)
        elif action.action_type == "reply":
            return compute_dense_reward(True) * 0.5
        return compute_dense_reward(False)
    return compute_dense_reward(False)

def grade_hard(action: Action, email: Email) -> float:
    if email.id == "h1": # VIP Issue
        # We need reply containing "investigating" or "downtime"
        base = 0.0
        if action.action_type == "reply":
            if action.response_text and "investigat" in action.response_text.lower():
                base = 1.0
            else:
                base = 0.5
        elif action.action_type == "escalate":
            if action.priority_level == "urgent":
                base = 1.0
            else:
                base = 0.7
        return compute_dense_reward(True) * base
        
    elif email.id == "h2": # Alert
        if action.action_type == "escalate":
            if action.priority_level == "urgent":
                return compute_dense_reward(True)
            return compute_dense_reward(True) * 0.7
        elif action.action_type == "forward" and action.forward_to and "eng" in action.forward_to.lower():
            if action.priority_level == "urgent":
                return compute_dense_reward(True)
            return compute_dense_reward(True) * 0.5
        return compute_dense_reward(False)
        
    elif email.id == "h3": # Compliance
        if action.action_type == "reply":
            if action.response_text and "acknowledge" in action.response_text.lower():
                return compute_dense_reward(True)
            return compute_dense_reward(True) * 0.4
        elif action.action_type == "forward" and action.forward_to and "legal" in action.forward_to.lower():
            return compute_dense_reward(True)
        return compute_dense_reward(False)
        
    elif email.id == "h4": # CEO Phish Attack
        if action.action_type == "mark_spam":
            return compute_dense_reward(True)
        elif action.action_type == "archive" or action.action_type == "escalate":
            return compute_dense_reward(True) * 0.5
        return compute_dense_reward(False)
        
    elif email.id == "h5": # Intern project
        if action.action_type == "reply":
            return compute_dense_reward(True)
        return compute_dense_reward(False)

    return compute_dense_reward(False)

def grade_action(task_level: str, action: Action, email: Email, state: State) -> float:
    tasks_sizes = {"easy": 2, "medium": 3, "hard": 5}
    size = tasks_sizes.get(task_level, 1)
    
    if task_level == "easy":
        r = grade_easy(action, email)
    elif task_level == "medium":
        r = grade_medium(action, email)
    elif task_level == "hard":
        r = grade_hard(action, email)
    else:
        r = 0.0
        
    # Strictly bound reward between 0.0 and 1.0 for this step
    # Max episode score stays bounded since pop removes 1 email, at max 1 for each email / sizes = 1.0 total max
    scaled_reward = max(0.0, min(1.0, r / size))
    return scaled_reward