"""
here we gonna define the reward function for our agent,
so that it can learn or adapt the environment and 
able to get/achieve the rewards for the actions it takes in the environment.
OR 
Per step reward
"""
# The rewarding system we writing here will be within the scale of -20 to +20.
"""
The factors we are using (5 factors):
1. Correct action = positive reward (2 to 10)
2. Wrong action = negative reward (-1 to -3)
3. Resolve with FIX (Episode success) = large positive reward (+10 to +15)
4. Resolve WITHOUT FIX (Prevents lying) = negative reward (-5 to -10)
5. Max steps reached (Episode failure) = negative reward (-5)
"""

def calculate_reward(action, incident, fix_applied, step, max_steps):
  
  # agents says resolved but didn't fix - penalty 
  if action == "resolve" and not fix_applied:
    return -10.0 
  
  # agent ran out of steps - penalty
  if step >= max_steps:
    return -5.0
  
  # agent fixed and resolved the incident (succes)
  if action == "resolve" and fix_applied:
    return 15.0
  
  # for correct fix action
  if action == incident["fix_action"] and not fix_applied:
    return 5.0
  
  # Diagnostic actions - helpful but doesn't fix
  if action in ["inspect_logs", "inspect_request"]:
    return 0.5
  
  # for wrong action
  return -2.0