Spaces:
Sleeping
Sleeping
| from .entities import Ticket | |
| from .models import Action, RewardSignal | |
| def compute_reward( | |
| previous: Ticket, | |
| current: Ticket, | |
| action: Action, | |
| distortion_penalty: float = 0.0, | |
| inner_product_penalty: float = 0.0, | |
| ) -> RewardSignal: | |
| base_value = 0.0 | |
| reason = "neutral" | |
| if previous.status != "resolved" and current.status == "resolved": | |
| base_value = 1.0 | |
| reason = "ticket_resolved" | |
| elif action.action_type == "escalate": | |
| base_value = 0.1 | |
| reason = "escalated_for_support" | |
| elif current.attempts_used > previous.attempts_used: | |
| base_value = -0.05 | |
| reason = "step_cost" | |
| penalty = max(distortion_penalty, 0.0) + max(inner_product_penalty, 0.0) | |
| if penalty > 0: | |
| reason = f"{reason}+quant_penalty" | |
| value = base_value - penalty | |
| value = max(-1.0, min(1.0, value)) | |
| return RewardSignal(value=value, reason=reason) | |