""" here we gonna define the reward function for our agent, so that it can learn or adapt the environment and able to get/achieve the rewards for the actions it takes in the environment. OR Per step reward """ # The rewarding system we writing here will be within the scale of -20 to +20. """ The factors we are using (5 factors): 1. Correct action = positive reward (2 to 10) 2. Wrong action = negative reward (-1 to -3) 3. Resolve with FIX (Episode success) = large positive reward (+10 to +15) 4. Resolve WITHOUT FIX (Prevents lying) = negative reward (-5 to -10) 5. Max steps reached (Episode failure) = negative reward (-5) """ def calculate_reward(action, incident, fix_applied, step, max_steps): # agents says resolved but didn't fix - penalty if action == "resolve" and not fix_applied: return -10.0 # agent ran out of steps - penalty if step >= max_steps: return -5.0 # agent fixed and resolved the incident (succes) if action == "resolve" and fix_applied: return 15.0 # for correct fix action if action == incident["fix_action"] and not fix_applied: return 5.0 # Diagnostic actions - helpful but doesn't fix if action in ["inspect_logs", "inspect_request"]: return 0.5 # for wrong action return -2.0