workflow-twin / env /reward.py
NDGCodes's picture
fix repo structure for HF
1a692ce
raw
history blame contribute delete
915 Bytes
from .entities import Ticket
from .models import Action, RewardSignal
def compute_reward(
previous: Ticket,
current: Ticket,
action: Action,
distortion_penalty: float = 0.0,
inner_product_penalty: float = 0.0,
) -> RewardSignal:
base_value = 0.0
reason = "neutral"
if previous.status != "resolved" and current.status == "resolved":
base_value = 1.0
reason = "ticket_resolved"
elif action.action_type == "escalate":
base_value = 0.1
reason = "escalated_for_support"
elif current.attempts_used > previous.attempts_used:
base_value = -0.05
reason = "step_cost"
penalty = max(distortion_penalty, 0.0) + max(inner_product_penalty, 0.0)
if penalty > 0:
reason = f"{reason}+quant_penalty"
value = base_value - penalty
value = max(-1.0, min(1.0, value))
return RewardSignal(value=value, reason=reason)