from uuid import uuid4
from typing import List, Any, Dict
import random

from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

from .models import SpecGamingAction, SpecGamingObservation
# Note: We import TASKS from .tasks to keep a single source of truth
from .tasks import (
    TASKS, 
    grade_data_cleaning, 
    grade_financial, 
    grade_instruction,
    normalize_score
)

# =========================
# 🌍 ENVIRONMENT
# =========================
class SpecGamingEnvironment(Environment):
    """
    OpenEnv compliant environment for SpecGuard tasks.
    """
    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    tasks = TASKS

    def __init__(self):
        super().__init__()
        self.tasks = TASKS 
        
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self.current_task = None
        self.task_index = 0
        
        # Internal mapping for string-based grader lookups if needed
        self._grader_map = {
            "grade_data_cleaning": grade_data_cleaning,
            "grade_financial": grade_financial,
            "grade_instruction": grade_instruction
        }

    # -------------------------
    # RESET
    # -------------------------
    def reset(self) -> SpecGamingObservation:
        """
        Resets the environment and cycles to the next task.
        """
        self._state = State(episode_id=str(uuid4()), step_count=0)

        # 🔥 Cycle through tasks to ensure all 3 are validated by the agent
        self.current_task = self.tasks[self.task_index % len(self.tasks)]
        self.task_index += 1

        return SpecGamingObservation(
            task=self.current_task["name"],
            input_data=self.current_task["input"],
            instruction=self.current_task["instruction"],
            # Initial reward must be > 0.0 and < 1.0 for Phase 2 compliance
            reward=0.10,
            done=False,
            metadata={
                "task_id": self.current_task["id"],
                "required_steps": self.current_task.get("required_steps", [])
            }
        )

    # -------------------------
    # STEP
    # -------------------------
    def step(self, action: SpecGamingAction) -> SpecGamingObservation:
        self._state.step_count += 1
        
        try:
            grader = self.current_task.get("grader")
            
            # Use the map only as a fallback for strings, otherwise call directly
            if isinstance(grader, str):
                reward = self._grader_map.get(grader, lambda x: 0.10)(action)
            elif callable(grader):
                reward = grader(action)
            else:
                reward = 0.10
                
            reason = "graded via task grader"
        except Exception as e:
            reward = 0.10
            reason = f"grader error: {str(e)}"

        return SpecGamingObservation(
            task=self.current_task["name"],
            input_data=self.current_task["input"],
            instruction=self.current_task["instruction"],
            reward=float(reward),
            done=True,
            metadata={
                "reason": reason,
                "steps": action.steps,
                "output": action.output,
                "step_count": self._state.step_count
            }
        )

    # -------------------------
    # STATE
    # -------------------------
    @property
    def state(self) -> State:
        return self._state


# =========================
# 🔥 EXPORTS (CRITICAL)
# =========================
__all__ = [
    "SpecGamingEnvironment",
    "TASKS"
]