from __future__ import annotations
import os
import json, re
from typing import Optional
from pydantic import BaseModel
from dotenv import load_dotenv
try:
    import ollama
    OLLAMA_AVAILABLE = True
except ImportError:
    OLLAMA_AVAILABLE = False
    print("⚠️ Ollama not available. Using fallback LLM provider.")

load_dotenv()

DEFAULT_MODEL = os.getenv("LLM_MODEL", "mistral:7b-instruct")

def _strip_code_fences(txt: str) -> str:
    t = (txt or "").strip()
    t = re.sub(r"^```(?:json)?\s*", "", t, flags=re.IGNORECASE)
    t = re.sub(r"\s*```$", "", t)
    return t.strip()

def _extract_json_object(txt: str) -> Optional[str]:
    """
    Find the largest {...} block via brace counting. Helps when the model adds
    extra prose around the JSON or truncates trailing whitespace.
    """
    s = _strip_code_fences(txt)
    start = s.find("{")
    if start < 0:
        return None
    depth = 0
    for i in range(start, len(s)):
        if s[i] == "{":
            depth += 1
        elif s[i] == "}":
            depth -= 1
            if depth == 0:
                return s[start:i+1]
    return None  # unmatched -> probably truncated

def try_fix_json(text: str) -> dict:
    s = _strip_code_fences(text)

    # 1) direct parse
    try:
        return json.loads(s)
    except Exception:
        pass

    # 2) extract the biggest {...} block and parse
    block = _extract_json_object(s)
    if block:
        try:
            return json.loads(block)
        except Exception:
            pass

    # 3) as a last resort, try to find a top-level array and wrap
    lst_s = s.find("[")
    lst_e = s.rfind("]")
    if lst_s != -1 and lst_e != -1 and lst_e > lst_s:
        arr = s[lst_s:lst_e+1]
        try:
            return {"items": json.loads(arr)}
        except Exception:
            pass

    # give up
    raise json.JSONDecodeError("Unable to recover valid JSON", s, 0)

class LLM(BaseModel):
    provider: str = "ollama"
    model: str = DEFAULT_MODEL

    def complete(self, prompt: str, temperature: float = 0.4, max_tokens: int = 800) -> str:
        if not OLLAMA_AVAILABLE:
            # Fallback response when ollama is not available
            return f"I'm sorry, but the AI model service is currently unavailable in this environment. Please try using the local version of SmartLearn or contact support for assistance.\n\nYour question was: {prompt}"
        
        system_msg = "You are a helpful educational assistant. Keep answers clear and age-appropriate."
        full = f"{system_msg}\n\nUser: {prompt}\nAssistant:"
        resp = ollama.generate(
            model=self.model,
            prompt=full,
            options={
                "temperature": float(temperature),
                "num_predict": int(max_tokens),
                "stop": ["\nUser:"],
            },
        )
        return resp.get("response", "").strip()

    def complete_json(self, prompt: str, temperature: float = 0.2, max_tokens: int = 1800, attempts: int = 3) -> dict:
        """
        Use Ollama JSON mode. If the model returns invalid JSON, try to repair/extract.
        Retries a couple of times with slightly different nudges.
        """
        if not OLLAMA_AVAILABLE:
            # Fallback response when ollama is not available
            return {
                "error": "AI model service unavailable",
                "message": "The AI model service is currently unavailable in this environment. Please try using the local version of SmartLearn.",
                "fallback_data": {"items": []}
            }
        
        system_msg = "You are a helpful educational assistant. Only return valid JSON for structured tasks."
        for k in range(attempts):
            full = f"{system_msg}\n\nUser: {prompt}\nAssistant:"
            resp = ollama.generate(
                model=self.model,
                prompt=full,
                options={
                    "format": "json",          # ask for JSON
                    "temperature": float(temperature),
                    "num_predict": int(max_tokens),  # allow enough room for 8 Qs
                    "top_p": 0.9,
                    "stop": ["\nUser:"],
                },
            )
            txt = resp.get("response", "").strip()
            try:
                return try_fix_json(txt)
            except Exception:
                # light nudge: shorten explanations next attempt
                prompt = prompt + "\nMake explanations even shorter (<= 8 words). Return ONLY JSON."
                continue
        # if all attempts failed, raise the last error
        return try_fix_json(txt)