""" 
The three main methods implementation ->
1. reset() - start of each episode
2. step(action) - agent takes an action
3. state() - called anytime 
"""

"""
1. For incident selection - curriculum learning approach (easy -> medium -> hard)
2. For Reward factors - 5 factors (correct, wrong, resolve with/without fix, max steps)
3. For episode end conditions - resolved with fix , resolved without fix , max steps reached 
4. For action space -  8 actions(including diagnostic , fix , terminal)
5. For max steps - 10 steps per episode
6. For reward - range is -20 to +20
"""

"""
extra info ->
1. stage 1 episodes -> 1-10 
2. stage 2 epiosdes -> 11-25
3. stage 3 epiosdes -> 26+
"""

"""
Our 3 models->
1. observation-  what agent sees at each step 
2. action - what agent can do at each step
3. EnvState - internal tracking of the environment 
"""

import random
from typing import Dict, Any, Tuple, Optional, List
from pydantic import BaseModel 

from environment.incident_generator import get_random_incident, get_incident_by_type
from environment.action_space import is_valid_action
from environment.reward import calculate_reward

class Observation(BaseModel): 
  step: int
  max_steps: int
  incident_summary : str
  logs: List[str]
  response_code: int
  fix_applied: bool
  is_resolved: bool
    
class Action(BaseModel):
  action_name: str

class EnvState(BaseModel):
  current_incident: Dict[str, Any]
  step_counter: int
  fix_applied: bool
  total_reward: float
  is_resolved: bool  

class APITriageEnv:
  def __init__(self, max_steps = 10):
    self.max_steps = max_steps
    self.step_counter = 0
    self.done = False
    self.incident = None
    self.fix_applied = False
    self.total_reward = 0.0
    self.total_episodes = 0

  def reset(self):
    self.step_counter = 0
    self.done = False
    self.fix_applied = False     
    self.total_reward = 0.0
    self.total_episodes += 1

    # implying the curriculum learning approach here
    if self.total_episodes <= 10:
      # stage 1 -> easy incidents (auth_error, missing_fields)
      incident_type  = random.choice(["auth_error", "missing_fields"])
      self.incident = get_incident_by_type(incident_type)
    elif self.total_episodes <= 25:
      # stage 2 -> medium incidents
      incident_type = random.choice(["rate_limit", "timeout", "wrong_endpoint"])
      self.incident = get_incident_by_type(incident_type)
    elif self.total_episodes > 25:
      # stage 3 -> hard incidents
      incident_type = "server_error"
      self.incident = get_incident_by_type(incident_type)  

    return self.state()  
  
  def state(self):
    """Returns what the agent sees at current step"""
    return Observation(
        step=self.step_counter,
        max_steps=self.max_steps,
        incident_summary=self.incident["summary"],
        logs=self.incident["logs"],
        response_code=self.incident["code"],
        fix_applied=self.fix_applied,
        is_resolved=self.done
    )
  
  def step(self, action):
    """Agent takes an action and environment responds with new state and reward"""
    # 1. if episode is done or finished already 
    if self.done:
      state = self.state()
      reward = 0.0
      info  = {"error": "episode is already finished "}
      done = True
      return state, reward, done, info
    
    # 2. increment step counter and check is action is valid
    self.step_counter += 1

    # 3. validate the action 
    if not is_valid_action(action):
      state = self.state()
      reward = -2.0
      info = {"error" : "the action is not valid"}
      done = False
      return state, reward , done , info

    # 4. Reward calculation  
    reward = calculate_reward(action , self.incident, self.fix_applied, self.step_counter , self.max_steps)
 
    # 5. updating fix applied status if the action is the correct fix action
    if action == self.incident["fix_action"]:
      self.fix_applied = True

    # 6. update toatal reward 
    self.total_reward += reward

    # 7. prepare info (for all cases )
    info = {
      "step": self.step_counter,
      "incident_type": self.incident["type"],
      "fix_applied": self.fix_applied,
      "total_reward": self.total_reward
    }

    # 8. check if the epiosde is resolved
    if action == "resolve":
      self.done = True
      info["resolution"] = "success" if self.fix_applied else "failure - resolved without fix"
    

    # 9. check if epsiode is not resolved that means max steps are reached    
    if self.step_counter >= self.max_steps:
      self.done = True
      info["resolution"] = "failure - max steps reached"  
    
    # 10. final return (one return at the end)
    return self.state(), reward, self.done, info