#!/usr/bin/env python3
import os, re, json, textwrap, traceback
from decimal import Decimal
from typing import List, Tuple

from flask import Flask, request, render_template, url_for
from flask_cors import CORS

from rag_mini import (
    search,
    ensure_ready,
    DEFAULT_TOPK,
    rag_debug_info,   # for /debug/rag
)

# ------------ LLM config ------------
LLM_PROVIDER    = (os.getenv("LLM_PROVIDER") or "openai").strip().lower()
LLM_MODEL       = (os.getenv("LLM_MODEL") or "gpt-4o-mini").strip()
LLM_API_KEY     = os.getenv("OPENAI_API_KEY") or os.getenv("LLM_API_KEY")
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")  # optional (Azure/proxy)

app = Flask(__name__)
app.secret_key = os.getenv("FLASK_SECRET_KEY", "change-me-please")
CORS(app)

SYSTEM_RULES = """You are MaterialMind, a materials-selection assistant.
Return two things:
1) JSON with a ranked shortlist:
{
  "candidates": [
    {
      "name": "string",
      "score": 0,            // 0..400 (sum of 4 independent 0..100 utilities)
      "score_pct": 0,        // 0..100 normalized display
      "reasons": ["..."],
      "tradeoffs": ["..."],
      "citations": ["[1]", "[2]"]
    }
  ]
}
2) After the JSON, provide 3–6 concise bullets on trade-offs.
Rules:
- Use only the provided context; cite with [1], [2]. No fabrication.
- Utilities per criterion are in [0,1]. Cost utility increases as cost decreases.
- Weights (performance, stability, cost, availability) are independent 0..100 (NOT normalized).
"""

ANSWER_TEMPLATE = """User constraints:
- Environment: {environment}
- Temperature: {temperature}
- Min UTS (MPa): {min_uts}
- Max density (g/cm^3): {max_density}
- Budget: {budget}
- Process: {process}
Independent priorities (0..100 each):
- performance={w_perf}, stability={w_stab}, cost={w_cost}, availability={w_avail}
Question:
For {environment} at {temperature}, shortlist materials that meet UTS ≥ {min_uts} MPa and density ≤ {max_density} g/cm^3.
Consider budget={budget} and process={process}. Rank by performance, stability, cost, and availability.
Context snippets (numbered):
{context}
Citations:
{citations}
Now, first output ONLY the JSON block (no preamble). Then the short narrative.
"""

# ---------- helpers ----------
def to_dec(x, default:int)->Decimal:
    try:
        s = (x or "").strip()
        return Decimal(s if s else str(default))
    except Exception:
        return Decimal(default)

def format_context(hits: List[Tuple[str, str]]):
    blocks, cites = [], []
    for i,(text,cite) in enumerate(hits,1):
        snippet = textwrap.shorten((text or "").replace("\n"," "), width=450, placeholder=" …")
        blocks.append(f"[{i}] {snippet}")
        cites.append(f"[{i}] {cite}")
    return "\n".join(blocks), "\n".join(cites)

def extract_json_block(text:str):
    if not text:
        return None
    m = re.search(r"```json\s*(\{.*?\})\s*```", text, flags=re.S|re.I)
    blob = m.group(1) if m else None
    if not blob:
        s = text
        start = s.find("{")
        while start != -1:
            depth = 0
            for j in range(start, len(s)):
                ch = s[j]
                if ch == "{": depth += 1
                elif ch == "}":
                    depth -= 1
                    if depth == 0:
                        blob = s[start:j+1]
                        break
            if blob: break
            start = s.find("{", start+1)
    if not blob:
        return None
    try:
        return json.loads(blob)
    except Exception:
        return None

# ---------- LLM caller ----------
def call_llm_cloud(system:str, user:str)->str:
    prov = LLM_PROVIDER; model = LLM_MODEL
    if prov in ("openai","oai"):
        from openai import OpenAI
        client = OpenAI(api_key=LLM_API_KEY, base_url=OPENAI_BASE_URL or None)
        r = client.chat.completions.create(
            model=model,
            temperature=0.2,
            max_tokens=1200,
            messages=[{"role":"system","content":system},
                      {"role":"user","content":user}],
        )
        return r.choices[0].message.content
    elif prov in ("together","tg"):
        from together import Together
        client = Together(api_key=LLM_API_KEY)
        r = client.chat.completions.create(
            model=model, temperature=0.2, max_tokens=1200,
            messages=[{"role":"system","content":system},{"role":"user","content":user}],
        )
        return r.choices[0].message.content
    else:
        from huggingface_hub import InferenceClient
        hf_token = LLM_API_KEY or os.getenv("HUGGINGFACEHUB_API_TOKEN")
        client = InferenceClient(model=model, token=hf_token)
        try:
            out = client.chat_completion(
                messages=[{"role":"system","content":system},{"role":"user","content":user}],
                max_tokens=1200, temperature=0.2,
            )
            return out.choices[0].message["content"]
        except Exception:
            return client.text_generation(
                prompt=f"{system}\n\n{user}\n", max_new_tokens=1200, temperature=0.2
            )

# ---------- routes ----------
@app.get("/healthz")
def healthz():
    return {
        "ok": True,
        "provider": LLM_PROVIDER,
        "model": LLM_MODEL,
        "has_api_key": bool(LLM_API_KEY),
    }, 200

@app.get("/debug/rag")
def debug_rag():
    return rag_debug_info(), 200

@app.get("/")
def index():
    return render_template("index.html", default_k=DEFAULT_TOPK)

@app.post("/recommend")
def recommend():
    try:
        environment  = request.form.get("environment","").strip() or "seawater"
        temperature  = request.form.get("temperature","").strip() or "20–25 °C"
        min_uts      = request.form.get("min_uts","").strip() or "0"
        max_density  = request.form.get("max_density","").strip() or "100"
        budget       = request.form.get("budget","").strip() or "open"
        process      = request.form.get("process","").strip() or "any"

        w_perf  = to_dec(request.form.get("w_perf"), 75)
        w_stab  = to_dec(request.form.get("w_stab"), 100)
        w_cost  = to_dec(request.form.get("w_cost"), 75)
        w_avail = to_dec(request.form.get("w_avail"), 75)

        try: k = int(request.form.get("k", DEFAULT_TOPK))
        except: k = DEFAULT_TOPK

        question = (f"For {environment} at {temperature}, shortlist materials that meet "
                    f"UTS ≥ {min_uts} MPa and density ≤ {max_density} g/cm^3. "
                    f"Consider budget={budget} and process={process}. "
                    f"Rank by performance, stability, cost, and availability.")

        # RAG search (never crash UI)
        try:
            hits = search(question, k=k)
            rag_error = ""
        except Exception as e:
            app.logger.exception("RAG search failed")
            hits = []
            rag_error = f"RAG error: {type(e).__name__}: {e}"

        ctx, cites = format_context(hits)

        user_prompt = ANSWER_TEMPLATE.format(
            environment=environment, temperature=temperature,
            min_uts=min_uts, max_density=max_density, budget=budget, process=process,
            w_perf=str(int(w_perf)), w_stab=str(int(w_stab)),
            w_cost=str(int(w_cost)), w_avail=str(int(w_avail)),
            context=ctx, citations=cites
        )

        # LLM call (never crash UI)
        raw = ""
        try:
            raw = call_llm_cloud(SYSTEM_RULES, user_prompt)
        except Exception as e:
            app.logger.exception("LLM call failed")
            raw = f"ERROR calling LLM ({LLM_PROVIDER}:{LLM_MODEL}): {type(e).__name__}: {e}"

        parsed = extract_json_block(raw) if raw else None
        candidates = (parsed or {}).get("candidates", []) if parsed else []

        if rag_error:
            raw = f"{rag_error}\n\n{raw}"

        return render_template(
            "results.html",
            candidates=candidates,
            citations=(cites.splitlines() if cites else []),
            environment=environment,
            temperature=temperature,
            raw_output=raw or "",
        )
    except Exception as e:
        app.logger.exception("recommend() hard failure")
        tb = traceback.format_exc()
        return render_template(
            "results.html",
            candidates=[],
            citations=[],
            environment="(unknown)",
            temperature="(unknown)",
            raw_output=f"FATAL: {type(e).__name__}: {e}\n\n{tb}",
        ), 200

if __name__ == "__main__":
    ensure_ready()
    port = int(os.getenv("PORT", "7860"))
    app.run(host="0.0.0.0", port=port, debug=False)