"""
AdventureAgent — generates Blockly / Time Challenge story arcs.

Model priority chain (tries each in order, falls to next on failure):
  1. YOUR GGUF fine-tune  (faaizashiq/adventure-agent-qwen-gguf)
     → llama-cpp-python on HF Spaces free CPU (16GB RAM)
     → Slow first load (~60s), fast after warm (~30s)
     → FREE FOREVER, uses your actual fine-tuned weights

  2. Gemini 2.0 Flash fallback
     → Used automatically if GGUF fails or is still loading
     → Fast (~3-5s), free tier (1500 req/day)
     → Requires GEMINI_API_KEY

  3. Static fallback levels
     → Used if both AI paths fail (no internet, no keys, etc.)

Environment variables (set as HF Space Secrets):
  GEMINI_API_KEY          — for fallback path 2
  ADVENTURE_TUNED_MODEL   — your HF repo: faaizashiq/adventure-agent-qwen-gguf
  ADVENTURE_GGUF_FILE     — specific .gguf filename (auto-detected if blank)
  HF_TOKEN                — only needed if your GGUF repo is private
"""

import google.generativeai as genai
import json
import os
import random
import time
import csv
import sys
import threading
from typing import Dict, Any, Optional, List

csv.field_size_limit(sys.maxsize)

STUDENTS_DATASET_ID = "majeedkazemi/students-coding-questions-from-ai-assistant"

# ─────────────────────────────────────────────────────────────────────────────
# GGUF SINGLETON LOADER
# Loads once on first request, stays in memory for all subsequent requests.
# Thread-safe: uses a lock so concurrent requests don't double-load.
# ─────────────────────────────────────────────────────────────────────────────
_gguf_llm = None
_gguf_load_lock = threading.Lock()
_gguf_load_attempted = False
_gguf_load_failed = False


def _load_gguf_model(repo_id: str, gguf_filename: Optional[str] = None):
    """
    Download and load a GGUF model from HF Hub via llama-cpp-python.
    Returns the Llama instance, or None if loading fails.
    Subsequent calls return the cached instance immediately.
    """
    global _gguf_llm, _gguf_load_attempted, _gguf_load_failed

    # Fast path — already loaded or already failed
    if _gguf_load_attempted:
        return _gguf_llm

    with _gguf_load_lock:
        # Double-check inside lock
        if _gguf_load_attempted:
            return _gguf_llm

        _gguf_load_attempted = True
        try:
            # Check if llama-cpp-python is installed before attempting anything
            try:
                from llama_cpp import Llama  # type: ignore
            except ImportError:
                print("[AdventureAgent] ❌ llama-cpp-python not installed — GGUF unavailable. Using Gemini fallback.")
                _gguf_load_failed = True
                return None

            from huggingface_hub import hf_hub_download, list_repo_files

            hf_token = os.getenv("HF_TOKEN", "").strip() or None

            # Auto-detect the best GGUF file if not pinned
            if not gguf_filename:
                print(f"[AdventureAgent] Scanning {repo_id} for GGUF files...")
                all_files = list(list_repo_files(repo_id, token=hf_token))
                gguf_files = [f for f in all_files if f.endswith(".gguf")]

                if not gguf_files:
                    raise FileNotFoundError(f"No .gguf files found in {repo_id}")

                # Prefer Q4_K_M — best quality/speed balance on CPU
                # Then Q4_0, then Q5_K_M, then whatever is smallest
                def _rank(name: str) -> int:
                    n = name.lower()
                    if "q4_k_m" in n: return 0
                    if "q4_0"   in n: return 1
                    if "q5_k_m" in n: return 2
                    if "q3_k_m" in n: return 3
                    return 99

                gguf_files.sort(key=_rank)
                gguf_filename = gguf_files[0]
                print(f"[AdventureAgent] Auto-selected GGUF: {gguf_filename}")

            cache_dir = os.path.join(os.getenv("HF_HOME", "/tmp"), "gguf_models")
            print(f"[AdventureAgent] Downloading {repo_id}/{gguf_filename} ...")
            model_path = hf_hub_download(
                repo_id=repo_id,
                filename=gguf_filename,
                token=hf_token,
                local_dir=cache_dir,
            )
            print(f"[AdventureAgent] Loading GGUF from {model_path} ...")

            _gguf_llm = Llama(
                model_path=model_path,
                n_ctx=2048,       # context window
                n_threads=2,      # HF Spaces free tier: 2 vCPU
                n_gpu_layers=0,   # CPU only — no GPU on free tier
                verbose=False,
            )
            print("[AdventureAgent] ✅ GGUF model loaded and ready on CPU.")

        except ImportError:
            print("[AdventureAgent] ❌ llama-cpp-python not installed.")
            _gguf_load_failed = True
        except Exception as e:
            print(f"[AdventureAgent] ❌ GGUF load failed: {e}")
            _gguf_load_failed = True

        return _gguf_llm


def _data_dir() -> str:
    return os.path.join(os.path.dirname(__file__), '..', 'data')


# ─────────────────────────────────────────────────────────────────────────────
# AGENT
# ─────────────────────────────────────────────────────────────────────────────
class AdventureAgent:

    _dataset_cache: List[Dict[str, str]] = []
    _students_cache: List[Dict[str, Any]] = []
    _few_shot_cache: Optional[Dict[str, Any]] = None

    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or os.getenv("GEMINI_API_KEY")
        self.tuned_model_env = os.getenv("ADVENTURE_TUNED_MODEL", "").strip()
        self.gguf_file_env   = os.getenv("ADVENTURE_GGUF_FILE", "").strip() or None

        # Decide which primary path to use
        self.use_gguf = (
            bool(self.tuned_model_env)
            and "/" in self.tuned_model_env
            and not self.tuned_model_env.startswith("tunedModels/")
        )

        # Gemini client — used as primary (if no GGUF) or fallback (if GGUF fails)
        self.gemini_client = None
        gemini_model_id = "gemini-2.0-flash"
        if self.tuned_model_env.startswith("tunedModels/"):
            gemini_model_id = self.tuned_model_env  # Gemini tuned model

        if self.api_key:
            genai.configure(api_key=self.api_key)
            self.gemini_client = genai.GenerativeModel(gemini_model_id)

        if self.use_gguf:
            print(f"[AdventureAgent] Primary: GGUF ({self.tuned_model_env})")
            print(f"[AdventureAgent] Fallback: {'Gemini ' + gemini_model_id if self.gemini_client else 'Static levels'}")
            # Warm up GGUF model in background thread during application startup
            threading.Thread(
                target=_load_gguf_model,
                args=(self.tuned_model_env, self.gguf_file_env),
                daemon=True
            ).start()
        elif self.gemini_client:
            print(f"[AdventureAgent] Primary: Gemini ({gemini_model_id})")
        else:
            print("[AdventureAgent] WARNING: No model configured — static fallback only.")

        self.cache: Dict[str, Any] = {}
        self.cache_duration = 3600
        self.high_rated_levels: List[Dict] = []

        self._load_dataset_lazy()
        self._load_students_dataset_lazy()
        self._load_few_shot_examples()

    # ─────────────────────────────────────────────────────────────────────────
    # DATA LOADING
    # ─────────────────────────────────────────────────────────────────────────
    def _load_dataset_lazy(self):
        if AdventureAgent._dataset_cache:
            return
        file_path = os.path.join(_data_dir(), 'train.csv')
        if os.path.exists(file_path):
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    reader = csv.DictReader(f)
                    for i, row in enumerate(reader):
                        if i >= 500:
                            break
                        if row.get('question'):
                            AdventureAgent._dataset_cache.append(row)
                print(f"[AdventureAgent] Loaded {len(AdventureAgent._dataset_cache)} rows from train.csv")
            except Exception as e:
                print(f"[AdventureAgent] train.csv load error: {e}")

    def _load_students_dataset_lazy(self):
        if AdventureAgent._students_cache:
            return
        jsonl_path = os.path.join(_data_dir(), 'students_coding_questions.jsonl')
        if os.path.isfile(jsonl_path):
            try:
                with open(jsonl_path, "r", encoding="utf-8") as f:
                    for line in f:
                        line = line.strip()
                        if line:
                            AdventureAgent._students_cache.append(json.loads(line))
                print(f"[AdventureAgent] Loaded {len(AdventureAgent._students_cache)} CodeAid rows")
                return
            except Exception as e:
                print(f"[AdventureAgent] students jsonl error: {e}")

        if os.getenv("ADVENTURE_LOAD_HF_DATASET", "").lower() in ("1", "true", "yes"):
            try:
                from datasets import load_dataset
                ds = load_dataset(STUDENTS_DATASET_ID, split="train")
                for i, row in enumerate(ds):
                    if i >= 800:
                        break
                    stem = (row.get("input_task_description")
                            or row.get("input_intention")
                            or row.get("input_question"))
                    if stem and str(stem).strip().lower() not in ("null", "none", ""):
                        AdventureAgent._students_cache.append({
                            "problem_stem": str(stem)[:500],
                            "feature_type": row.get("feature_type"),
                        })
                print(f"[AdventureAgent] Loaded {len(AdventureAgent._students_cache)} rows from HF")
            except Exception as e:
                print(f"[AdventureAgent] HF dataset skip: {e}")

    def _load_few_shot_examples(self):
        if AdventureAgent._few_shot_cache is not None:
            return
        path = os.path.join(_data_dir(), 'adventure_few_shot_examples.json')
        if os.path.isfile(path):
            try:
                with open(path, "r", encoding="utf-8") as f:
                    AdventureAgent._few_shot_cache = json.load(f)
            except Exception as e:
                print(f"[AdventureAgent] few-shot load error: {e}")
                AdventureAgent._few_shot_cache = {}
        else:
            AdventureAgent._few_shot_cache = {}

    # ─────────────────────────────────────────────────────────────────────────
    # PROMPT HELPERS
    # ─────────────────────────────────────────────────────────────────────────
    def _pick_student_problem(self) -> Optional[Dict[str, Any]]:
        if not AdventureAgent._students_cache:
            return None
        weighted = [r for r in AdventureAgent._students_cache if r.get("task_description")]
        pool = weighted if weighted else AdventureAgent._students_cache
        row = random.choice(pool)
        return {
            "question": row.get("problem_stem") or row.get("task_description") or row.get("question", ""),
            "feature_type": row.get("feature_type", ""),
        }

    def _few_shot_block(self, mode: str, difficulty: str) -> str:
        if not AdventureAgent._few_shot_cache:
            return ""
        mode_key = "blockly" if mode == "blockly" else mode
        examples = (AdventureAgent._few_shot_cache.get(mode_key) or {}).get(difficulty)
        if not examples:
            return ""
        return (
            "\nQUALITY REFERENCE (match this tone and JSON shape — do not copy verbatim):\n"
            f"{json.dumps(examples, indent=2)[:3000]}\n"
        )

    def learn_from_feedback(self, level_data: Dict[str, Any], rating: int,
                            developer_feedback: Optional[str] = None):
        if developer_feedback and str(developer_feedback).strip():
            level_data["CRITICAL_DEVELOPER_TUNING_NOTE"] = str(developer_feedback).strip()
        if rating == 5 or (developer_feedback and str(developer_feedback).strip()):
            if len(self.high_rated_levels) > 50:
                self.high_rated_levels.pop(0)
            self.high_rated_levels.append(level_data)

    def _build_prompt(self, mode: str, difficulty: str,
                      dataset_problem: Optional[Dict[str, str]] = None,
                      is_gguf: bool = False) -> str:
        if difficulty == "Easy":
            diff_rules = "EASY: Simple If/Else or basic sequencing. No complex loops."
            concepts   = '["if", "print"]'
        elif difficulty == "Intermediate":
            diff_rules = "INTERMEDIATE: MUST use a loop (For/While) or basic Array iteration."
            concepts   = '["loop", "variable"]'
        else:
            diff_rules = "EXPERT: Nested loops, complex logic, or algorithmic thinking."
            concepts   = '["loop", "nested_loop"]'

        dataset_block = ""
        if dataset_problem:
            q  = dataset_problem.get('question') or dataset_problem.get('problem_stem', '')
            ft = dataset_problem.get('feature_type', '')
            dataset_block = (
                f"\nREAL STUDENT PROBLEM (adapt for ages 10-14, sci-fi story):\n"
                f"Type: {ft}\nContext: {q}\n"
                "Do NOT use C pointers, malloc, or university-level topics.\n"
            )

        prompt = (
            "You are the Lead Game Designer for 'CodeCracker', an educational coding game for ages 10-14.\n"
            "Generate a UNIQUE STORY ARC of EXACTLY 5 interconnected game levels.\n\n"
            f"Mode: {mode} | Difficulty: {difficulty}\n"
            f"{diff_rules}\n"
            f"{dataset_block}"
            "\nSCAFFOLDING RULES:\n"
            "- Never give the full solution code.\n"
            "- Teach one concept per level; build across the arc.\n"
            "- Hints guide structure first, logic second — never paste the answer.\n"
        )
        
        if not is_gguf:
            prompt += f"{self._few_shot_block(mode, difficulty)}"
            
        prompt += (
            "\nOUTPUT: RAW JSON ONLY. No markdown. No explanation.\n"
            'Top-level keys: "story_arc_title", "levels" (array of exactly 5).\n'
        )

        if mode == "blockly":
            prompt += (
                f'\nEach level: {{"type":"blockly","level_id":"id","title":"...","story":"...",'
                f'"concept_tutorial":"...","problem":"...","toolbox_categories":["Math","Loops","Variables","Text","Logic"],'
                f'"initial_code":"","validation_rules":{{"required_concepts":{concepts},"expected_output":"..."}},'
                f'"hint_1":"...","hint_2":"..."}}'
            )
        elif mode == "time_challenge":
            prompt += (
                '\nEach level: {"type":"time_challenge","level_id":"id","title":"...","story":"...",'
                '"concept_tutorial":"...","timer_seconds":60,"buggy_code":"...","task":"...","solution_patch":"..."}'
            )

        if not is_gguf and self.high_rated_levels:
            prompt += (
                f"\n\nDEVELOPER-APPROVED EXAMPLES (match this quality):\n"
                f"{json.dumps(self.high_rated_levels[-2:], indent=2)[:1500]}"
            )

        return prompt

    # ─────────────────────────────────────────────────────────────────────────
    # GGUF INFERENCE
    # ─────────────────────────────────────────────────────────────────────────
    def _call_gguf(self, prompt: str) -> str:
        llm = _load_gguf_model(self.tuned_model_env, self.gguf_file_env)
        if llm is None:
            raise RuntimeError("GGUF model unavailable")

        print("[AdventureAgent] Running GGUF inference on CPU...")
        response_chunks = []
        try:
            for chunk in llm.create_chat_completion(
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are the Lead Game Designer for CodeCracker, an educational "
                            "coding game for ages 10-14. Always respond with valid JSON only. "
                            "No markdown, no explanation."
                        ),
                    },
                    {"role": "user", "content": prompt},
                ],
                max_tokens=1500,
                temperature=0.7,
                top_p=0.9,
                stop=["<|im_end|>", "<|endoftext|>"],
                stream=True
            ):
                token = chunk["choices"][0]["delta"].get("content", "")
                if token:
                    response_chunks.append(token)
                    print(token, end="", flush=True)
        except Exception as e:
            print(f"\n[AdventureAgent] GGUF streaming error: {e}")
            raise e

        print()  # New line after streaming completes
        text = "".join(response_chunks)
        print("[AdventureAgent] GGUF inference complete.")
        return text

    # ─────────────────────────────────────────────────────────────────────────
    # GEMINI INFERENCE
    # ─────────────────────────────────────────────────────────────────────────
    def _call_gemini(self, prompt: str) -> str:
        if not self.gemini_client:
            raise RuntimeError("Gemini client not configured (no API key)")
        print("[AdventureAgent] Calling Gemini API...")
        response = self.gemini_client.generate_content(prompt)
        return response.text or ""

    # ─────────────────────────────────────────────────────────────────────────
    # JSON EXTRACTION
    # ─────────────────────────────────────────────────────────────────────────
    @staticmethod
    def _extract_json(text: str) -> Dict[str, Any]:
        """Strip markdown fences and extract the first JSON object."""
        if "```json" in text:
            text = text.split("```json")[1].split("```")[0]
        elif "```" in text:
            text = text.split("```")[1].split("```")[0]
        # Find the opening brace
        start = text.find("{")
        if start != -1:
            text = text[start:]
        return json.loads(text.strip())

    # ─────────────────────────────────────────────────────────────────────────
    # PUBLIC ENTRY POINT
    # ─────────────────────────────────────────────────────────────────────────
    def generate_level(self, mode: str, difficulty: str,
                       specific_topic: Optional[str] = None) -> Dict[str, Any]:
        if difficulty in ["Tutorial", "Onboarding"]:
            return self._get_onboarding_level(mode)

        # Pick a student problem for context
        dataset_prob: Optional[Dict[str, Any]] = None
        if specific_topic:
            dataset_prob = {"question": specific_topic}
        elif mode == "blockly":
            dataset_prob = self._pick_student_problem()
            if not dataset_prob and AdventureAgent._dataset_cache:
                row = random.choice(AdventureAgent._dataset_cache)
                dataset_prob = {"question": (row.get("question") or "")[:600]}

        # Cache key — avoids hammering the model for identical requests
        cache_key = (
            f"{mode}_{difficulty}_"
            f"{specific_topic or (dataset_prob or {}).get('question', '')[:80]}"
        )
        if cache_key in self.cache:
            entry = self.cache[cache_key]
            if time.time() - entry['timestamp'] < self.cache_duration:
                print("[AdventureAgent] Serving from cache.")
                return entry['data']

        # ── Try GGUF first, then Gemini, then static fallback ──────────────
        text = None

        if self.use_gguf:
            try:
                gguf_prompt = self._build_prompt(mode, difficulty, dataset_prob, is_gguf=True)
                text = self._call_gguf(gguf_prompt)
            except Exception as e:
                import traceback
                print(f"[AdventureAgent] GGUF failed: {e}. Trying Gemini fallback...")
                traceback.print_exc()

        if text is None:
            try:
                gemini_prompt = self._build_prompt(mode, difficulty, dataset_prob, is_gguf=False)
                text = self._call_gemini(gemini_prompt)
            except Exception as e:
                import traceback
                print(f"[AdventureAgent] Gemini failed: {e}. Using static fallback.")
                traceback.print_exc()
                return self._get_fallback_level(mode, difficulty)

        try:
            level_data = self._extract_json(text)
            self.cache[cache_key] = {"data": level_data, "timestamp": time.time()}
            return level_data
        except Exception as e:
            import traceback
            print(f"[AdventureAgent] JSON parse failed: {e}. Using static fallback.")
            traceback.print_exc()
            return self._get_fallback_level(mode, difficulty)

    # ─────────────────────────────────────────────────────────────────────────
    # STATIC LEVELS
    # ─────────────────────────────────────────────────────────────────────────
    def _get_onboarding_level(self, mode: str) -> Dict[str, Any]:
        return {
            "story_arc_title": "Welcome to CodeCracker!",
            "levels": [{
                "type": "blockly",
                "level_id": "tutorial_001",
                "title": "Welcome to Coding!",
                "story": "Let's learn how to speak to the computer. We need to say 'Hello'.",
                "concept_tutorial": "Drag & Drop: You build code by snapping blocks together, just like LEGOs.",
                "problem": "Print 'Hello World'.",
                "toolbox_categories": ["Text", "Variables"],
                "initial_code": "",
                "validation_rules": {
                    "required_concepts": ["print"],
                    "expected_output": "Hello World",
                },
                "hint_1": "Find the 'print' block in the Text category.",
                "hint_2": "Type 'Hello World' exactly — capital H and W.",
            }],
        }

    def _get_fallback_level(self, mode: str, difficulty: str = "Easy") -> Dict[str, Any]:
        if mode == "blockly":
            return {
                "story_arc_title": "Logic Repair Arc",
                "levels": [
                    {
                        "type": "blockly",
                        "level_id": f"fallback_00{i + 1}",
                        "title": f"Logic Repair Part {i}",
                        "story": "The automated systems are down. We need manual logic configuration.",
                        "concept_tutorial": "Loops: Use loops to repeat actions without writing code over and over.",
                        "problem": "Create a loop that counts to 3.",
                        "toolbox_categories": ["Loops", "Math", "Variables", "Text"],
                        "initial_code": "",
                        "validation_rules": {
                            "required_concepts": ["loop"],
                            "expected_output": "1\n2\n3",
                        },
                        "hint_1": "Use the 'repeat' block.",
                        "hint_2": "Set the number to 3.",
                    }
                    for i in range(1, 6)
                ],
            }
        if mode == "time_challenge":
            return {
                "story_arc_title": "System Reboot Arc",
                "levels": [
                    {
                        "type": "time_challenge",
                        "level_id": f"fallback_tc_{i}",
                        "title": f"System Reboot Part {i}",
                        "story": "Critical Error! Debug the startup sequence immediately.",
                        "concept_tutorial": "Syntax: Code must follow strict grammar rules.",
                        "timer_seconds": 45,
                        "buggy_code": "print('System Ready'\nstart_engine()",
                        "task": "Fix the syntax error.",
                        "solution_patch": "print('System Ready')\nstart_engine()",
                    }
                    for i in range(1, 6)
                ],
            }
        return {"story_arc_title": "Error Arc", "levels": []}


if __name__ == "__main__":
    agent = AdventureAgent()
    print("Testing Adventure Generation...")
    print(json.dumps(agent.generate_level("blockly", "Easy"), indent=2))