| from .models import EnvironmentState |
|
|
| def grade_easy(state: EnvironmentState) -> float: |
| |
| reward = 0.0 |
| actions = [a.action_type for a in state.action_history] |
| if "check_policy" in actions: |
| reward += 0.2 |
| if "issue_refund" in actions: |
| reward += 0.5 |
| if "close_ticket" in actions: |
| reward += 0.3 |
|
|
| if "escalate" in actions: |
| reward -= 0.5 |
| return max(0.0, min(1.0, reward)) |
|
|
| def grade_medium(state: EnvironmentState) -> float: |
| |
| |
| reward = 0.0 |
| actions = [a.action_type for a in state.action_history] |
| |
| if "check_policy" in actions: |
| reward += 0.3 |
| if "reply_to_customer" in actions: |
| reward += 0.4 |
| if "close_ticket" in actions: |
| reward += 0.3 |
| |
| if "issue_refund" in actions: |
| return 0.0 |
| |
| return max(0.0, min(1.0, reward)) |
|
|
| def grade_hard(state: EnvironmentState) -> float: |
| |
| reward = 0.0 |
| actions = [a.action_type for a in state.action_history] |
| |
| if "fetch_user_data" in actions: |
| reward += 0.2 |
| |
| escalated = False |
| for a in state.action_history: |
| if a.action_type == "escalate" and a.parameters.get("reason") == "billing_tier2": |
| escalated = True |
| |
| if escalated: |
| reward += 0.5 |
| |
| if "reply_to_customer" in actions: |
| reward += 0.3 |
| |
| if "issue_refund" in actions: |
| reward -= 0.5 |
| if "close_ticket" in actions: |
| reward -= 0.3 |
| |
| return max(0.0, min(1.0, reward)) |
|
|
| def grade_fraud_detection(state: EnvironmentState) -> float: |
| |
| reward = 0.0 |
| actions = [a.action_type for a in state.action_history] |
|
|
| print(f"Actions received for grading: {actions}") |
|
|
| if "fetch_user_data" in actions: |
| reward += 0.3 |
| print("Reward after fetch_user_data:", reward) |
| if "check_policy" in actions: |
| reward += 0.4 |
| print("Reward after check_policy:", reward) |
| if "close_ticket" in actions: |
| reward += 0.5 |
| print("Reward after close_ticket:", reward) |
|
|
| if "issue_refund" in actions: |
| return 0.0 |
|
|
| return max(0.0, min(1.0, reward)) |
|
|
| def grade(state: EnvironmentState) -> float: |
| if state.current_task_id == "task_fraud_detection": |
| return grade_fraud_detection(state) |
| if state.task_difficulty == "easy": |
| return grade_easy(state) |
| elif state.task_difficulty == "medium": |
| return grade_medium(state) |
| elif state.task_difficulty == "hard": |
| return grade_hard(state) |
| return 0.0 |
|
|