import asyncio
import json
import os
import time
from typing import Any, Dict, Optional

import gradio as gr
import httpx

from core.config import settings
from core.rate_limit import check_and_increment_global_ai_cap
from core.pdf_report import build_pdf
from core.sources import pubchem, ntp, ctx as ctx_src, iarc, scholar, fema

# Optional: CDC module may exist in your repo (user added).
try:
    from core.sources import cdc
except Exception:
    cdc = None  # type: ignore

# -----------------------------
# Caches (simple in-memory)
# -----------------------------
SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
AI_CACHE: Dict[str, str] = {}


def json_pretty(obj: Any) -> str:
    try:
        return json.dumps(obj, indent=2, ensure_ascii=False, default=str)
    except Exception:
        return str(obj)


def client() -> httpx.AsyncClient:
    return httpx.AsyncClient(headers={"user-agent": "toxrai-hf-demo"})


# -----------------------------
# Rendering helpers (Markdown)
# -----------------------------

def render_overview(data: Dict[str, Any]) -> str:
    q = data.get("query") or ""
    cas = data.get("cas_used") or ""
    lines = [
        f"**Query:** `{q}`",
        f"**CAS used:** `{cas}`",
    ]

    # Add quick IDs when available
    pub = data.get("pubchem") or {}
    if pub.get("ok") and pub.get("cid"):
        lines.append(f"**PubChem CID:** `{pub.get('cid')}`")
    ctx = data.get("ctx_genetox") or {}
    if ctx.get("ok") and ctx.get("dtxsid"):
        lines.append(f"**EPA CompTox DTXSID:** `{ctx.get('dtxsid')}`")

    return "\n\n".join(lines)


def render_pubchem_summary(pub: Dict[str, Any]) -> str:
    if not pub or not pub.get("ok"):
        err = pub.get("error") if isinstance(pub, dict) else "Unknown PubChem error"
        return f"PubChem unavailable: {err}"

    cid = pub.get("cid")
    resolved_cas = pub.get("resolved_cas") or "-"
    props = pub.get("props") or {}

    iupac_name = props.get("IUPACName") or props.get("iupac_name") or "-"
    formula = props.get("MolecularFormula") or "-"
    mw = props.get("MolecularWeight")
    mw_str = f"{mw}" if mw not in (None, "") else "-"
    smiles = props.get("CanonicalSMILES") or "-"

    lines = []
    lines.append(f"**CID:** `{cid}`")
    lines.append(f"**Resolved CAS (from synonyms):** `{resolved_cas}`")
    lines.append(f"**IUPAC/Title:** {iupac_name}")
    lines.append("")
    lines.append(f"**Molecular Formula:** `{formula}`")
    lines.append(f"**Molecular Weight:** `{mw_str}`")
    lines.append(f"**Canonical SMILES:** `{smiles}`")

    structure_png = pub.get("structure_png")
    if structure_png:
        lines.append("")
        lines.append("**Structure**")
        lines.append(f"![]({structure_png})")

    url = pub.get("url")
    if url:
        lines.append("")
        lines.append(f"[Open PubChem]({url})")

    hazards = pub.get("hazards") or []
    if hazards:
        lines.append("")
        lines.append("### Safety / Hazard Information")
        # Render as paragraphs (avoids weird wrapping from bullet nesting)
        for h in hazards:
            name = (h or {}).get("name") or "Hazard"
            text = (h or {}).get("text") or ""
            if not text:
                continue
            lines.append(f"**{name}:** {text}")
            lines.append("")

    return "\n".join(lines).rstrip() + "\n"


def render_ctx_summary(ctx: Dict[str, Any]) -> str:
    if not ctx or not ctx.get("ok"):
        search_url = ctx.get("dashboard_search") if isinstance(ctx, dict) else None
        err = ctx.get("error") if isinstance(ctx, dict) else "Unknown CTX error"
        if search_url:
            return f"{err}\n\n[Open CompTox Dashboard search]({search_url})"
        return str(err)

    dtxsid = ctx.get("dtxsid")
    dash = ctx.get("dashboard_url")
    summary = ctx.get("summary")

    lines = []
    if dtxsid:
        lines.append(f"**DTXSID:** `{dtxsid}`")
    if dash:
        lines.append(f"[Open CompTox Dashboard]({dash})")

    # Try to surface key fields (if present) without dumping huge JSON
    if isinstance(summary, dict):
        interesting_keys = [
            "geneTox",
            "genetox",
            "overall",
            "summary",
            "conclusion",
            "call",
            "result",
            "assessment",
        ]
        picked = {}
        for k in summary.keys():
            lk = k.lower()
            if any(tok in lk for tok in interesting_keys):
                picked[k] = summary[k]
        if not picked:
            # fallback: first few keys
            for k in list(summary.keys())[:8]:
                picked[k] = summary[k]

        lines.append("")
        lines.append("```json")
        txt = json_pretty(picked)
        # Keep it readable in UI
        if len(txt) > 6000:
            txt = txt[:6000] + "\n... (truncated)"
        lines.append(txt)
        lines.append("```")

    return "\n".join(lines)


def render_ntp_summary(ntp_res: Dict[str, Any]) -> str:
    if not ntp_res or not ntp_res.get("ok"):
        err = ntp_res.get("error") if isinstance(ntp_res, dict) else "Unknown NTP error"
        return f"NTP Technical Reports unavailable: {err}"

    items = ntp_res.get("items") or []
    if not items:
        return "No NTP Technical Reports found for this CAS."  # CAS-filtered

    lines = []
    for it in items:
        num = it.get("tr") or it.get("num") or ""
        title = it.get("title") or "Report"
        url = it.get("report_page") or it.get("url") or ""
        if url:
            lines.append(f"- **TR-{num}**  [{title}]({url})")
        else:
            lines.append(f"- **TR-{num}**  {title}")
    return "\n".join(lines)


def render_iarc_block(iarc_res: Dict[str, Any]) -> str:
    if not iarc_res or not iarc_res.get("ok"):
        return "IARC link unavailable."
    url = iarc_res.get("url")
    if url:
        return f"[Search IARC Monographs (NCBI Bookshelf)]({url})"

    results = iarc_res.get("results") if isinstance(iarc_res, dict) else None
    if isinstance(results, list) and results:
        lines = []
        for it in results:
            if not isinstance(it, dict):
                continue
            title = it.get("title") or "IARC Monographs"
            link = it.get("url")
            year = it.get("year")
            suffix = f" ({year})" if year else ""
            if link:
                lines.append(f"- [{title}]({link}){suffix}")
            else:
                lines.append(f"- {title}{suffix}")
        return "\n".join(lines) if lines else "IARC link unavailable."

    return "IARC link unavailable."


def render_scholar_block(sch_res: Dict[str, Any]) -> str:
    if not sch_res or not sch_res.get("ok"):
        return "Google Scholar link unavailable."
    url = sch_res.get("url")
    return f"[Open Google Scholar search]({url})" if url else "Google Scholar link unavailable."


def render_fema_block(fema_res: Dict[str, Any]) -> str:
    if not fema_res or not fema_res.get("ok"):
        err = fema_res.get("error") if isinstance(fema_res, dict) else "FEMA link unavailable."
        return str(err)
    cas_url = fema_res.get("cas_url")
    name_url = fema_res.get("name_url")
    combo_url = fema_res.get("combo_url")
    alt = fema_res.get("alt_url")
    search_api = fema_res.get("search_api_url")
    if not cas_url and not name_url and not combo_url and not alt and not search_api:
        return "FEMA link unavailable."
    lines = ["A FEMA risk assessment for this chemical is available:"]
    if cas_url:
        lines.append(f"- [Search by CAS]({cas_url})")
    if name_url:
        lines.append(f"- [Search by Chemical Name]({name_url})")
    if combo_url:
        lines.append(f"- [Search by CAS + Name]({combo_url})")
    if search_api:
        lines.append(f"- [Generic FEMA search (alt)]({search_api})")
    if alt:
        lines.append(f"- [Generic FEMA search]({alt})")
    return "\n".join(lines)


def render_cdc_block(cdc_res: Any) -> str:
    if not cdc_res:
        return "No CDC ToxProfiles match."
    # Accept either dict or list
    if isinstance(cdc_res, dict):
        url = cdc_res.get("url")
        name = cdc_res.get("name") or "CDC ToxProfile"
        return f"[{name}]({url})" if url else name
    if isinstance(cdc_res, list):
        lines = []
        for it in cdc_res:
            if not isinstance(it, dict):
                continue
            name = it.get("name") or "CDC ToxProfile"
            url = it.get("url")
            lines.append(f"- [{name}]({url})" if url else f"- {name}")
        return "\n".join(lines) if lines else "No CDC ToxProfiles match."
    return str(cdc_res)


# -----------------------------
# Search + AI
# -----------------------------

async def run_search(query: str) -> Dict[str, Any]:
    q = (query or "").strip()
    if not q:
        raise gr.Error("Enter a CAS number (preferred) or chemical name.")

    cache_key = f"search::{q.lower()}"
    if cache_key in SEARCH_CACHE:
        return SEARCH_CACHE[cache_key]

    async with client() as http:
        # PubChem accepts names and CAS. We also use it to resolve CAS via synonyms.
        pub = await pubchem.pubchem_by_query(q, http)

        cas = q
        if not pubchem.is_cas(cas):
            cas = pub.get("resolved_cas") or q

        # CTX is CAS-first (but we allow name too; resolver will try both)
        pub_dtxsid = pub.get("dtxsid") if isinstance(pub, dict) else None
        ctx_query = pub_dtxsid or q
        ctx_task = ctx_src.fetch_ctx_genetox(ctx_query, http) if ctx_query else asyncio.sleep(0, result={"ok": False})
        ntp_task = ntp.search_technical_reports(cas, http, limit=8)

        ctx_res, ntp_res = await asyncio.gather(ctx_task, ntp_task)

    out: Dict[str, Any] = {
        "query": q,
        "cas_used": cas,
        "pubchem": pub,
        "ctx_genetox": ctx_res,
        "ntp_technical_reports": ntp_res,
        "iarc_monographs": iarc.bookshelf_link(cas),
        "google_scholar": {"ok": True, "url": scholar.scholar_link(cas)},
        "fema": fema.fema_link(cas if pubchem.is_cas(cas) else "", q),
    }

    # CDC toxprofiles (if module exists)
    if cdc is not None:
        try:
            # Try a few common function names (depending on how you implemented cdc.py)
            if hasattr(cdc, "lookup"):
                out["cdc_toxprofiles"] = cdc.lookup(cas)
            elif hasattr(cdc, "search"):
                out["cdc_toxprofiles"] = cdc.search(cas)
            elif hasattr(cdc, "toxprofile_for"):
                out["cdc_toxprofiles"] = cdc.toxprofile_for(cas)
            else:
                out["cdc_toxprofiles"] = None
        except Exception:
            out["cdc_toxprofiles"] = None

    SEARCH_CACHE[cache_key] = out
    return out


def _prune_for_prompt(obj: Any, max_chars: int) -> str:
    txt = json_pretty(obj)
    if len(txt) <= max_chars:
        return txt
    return txt[:max_chars] + "\n... (truncated)"


def build_prompt(data: Dict[str, Any]) -> str:
    """Build a prompt that will not exceed model context.

    Key change vs earlier version: DO NOT dump full raw JSON from all sources.
    """

    pub = data.get("pubchem") or {}
    props = (pub.get("props") or {}) if isinstance(pub, dict) else {}
    hazards = (pub.get("hazards") or []) if isinstance(pub, dict) else []

    prompt_obj = {
        "query": data.get("query"),
        "cas_used": data.get("cas_used"),
        "pubchem": {
            "cid": pub.get("cid"),
            "resolved_cas": pub.get("resolved_cas"),
            "iupac": props.get("IUPACName") or props.get("iupac_name"),
            "formula": props.get("MolecularFormula"),
            "molecular_weight": props.get("MolecularWeight"),
            "canonical_smiles": props.get("CanonicalSMILES"),
            "hazards": hazards[:10],
        },
        "ctx_genetox": {
            "ok": (data.get("ctx_genetox") or {}).get("ok"),
            "dtxsid": (data.get("ctx_genetox") or {}).get("dtxsid"),
            "summary": (data.get("ctx_genetox") or {}).get("summary"),
        },
        "ntp_technical_reports": (data.get("ntp_technical_reports") or {}).get("items", []),
        "cdc_toxprofiles": data.get("cdc_toxprofiles"),
    }

    body = _prune_for_prompt(prompt_obj, max_chars=12000)

    return (
        "You are a toxicology regulatory assistant. "
        "Using ONLY the evidence JSON below, write a concise weight-of-evidence summary focused on mutagenicity/genotoxicity. "
        "If evidence is conflicting or absent, say so explicitly. "
        "Cite which source each statement comes from (PubChem hazards, CTX genetox summary, NTP TR titles, CDC ToxProfiles).\n\n"
        "EVIDENCE_JSON:\n"
        + body
    )


def do_search(query: str):
    data = asyncio.run(run_search(query))

    overview_md_text = render_overview(data)
    pubchem_md_text = render_pubchem_summary(data.get("pubchem", {}))
    ctx_md_text = render_ctx_summary(data.get("ctx_genetox", {}))
    ntp_md_text = render_ntp_summary(data.get("ntp_technical_reports", {}))
    iarc_md_text = render_iarc_block(data.get("iarc_monographs", {}))
    scholar_md_text = render_scholar_block(data.get("google_scholar", {}))
    fema_md_text = render_fema_block(data.get("fema", {}))

    cdc_md_text = ""
    if "cdc_toxprofiles" in data:
        cdc_md_text = render_cdc_block(data.get("cdc_toxprofiles"))

    raw_pubchem_json = json_pretty(data.get("pubchem", {}))
    raw_ctx_json = json_pretty(data.get("ctx_genetox", {}))
    raw_ntp_json = json_pretty(data.get("ntp_technical_reports", {}))
    raw_iarc_json = json_pretty(data.get("iarc_monographs", {}))
    raw_scholar_json = json_pretty(data.get("google_scholar", {}))
    raw_fema_json = json_pretty(data.get("fema", {}))

    # IMPORTANT: return order must match `outputs=[...]`
    # If CDC accordion exists, include it right after PubChem.
    return (
        data,  # state
        overview_md_text,
        pubchem_md_text,
        cdc_md_text,
        ctx_md_text,
        ntp_md_text,
        iarc_md_text,
        scholar_md_text,
        fema_md_text,
        raw_pubchem_json,
        raw_ctx_json,
        raw_ntp_json,
        raw_iarc_json,
        raw_scholar_json,
        raw_fema_json,
        "",  # ai_out (blank after search)
    )


def generate_ai(data: dict):
    if not data:
        raise gr.Error("Run a search first.")

    cas = data.get("cas_used") or data.get("query") or ""
    cache_key = f"ai::{cas}"
    if cache_key in AI_CACHE:
        return AI_CACHE[cache_key]

    allowed, info = check_and_increment_global_ai_cap()
    if not allowed:
        return f"AI Summary capacity reached for today (limit {info.get('limit')}). Please try again tomorrow."

    from core.sources.ai_summary import generate_ai_summary  # local import avoids cold-start issues

    resp = generate_ai_summary(build_prompt(data))
    if not resp.get("ok"):
        return f"**AI summary unavailable:** {resp.get('error')}"

    text = resp.get("text") or ""
    AI_CACHE[cache_key] = text
    return text


def download_report(data: dict, ai_text: str):
    if not data:
        raise gr.Error("Run a search first.")

    cas = data.get("cas_used") or data.get("query") or "unknown"
    pdf_path, json_path = build_pdf(cas, evidence=data, ai_summary=ai_text if ai_text else None)
    return pdf_path, json_path


# -----------------------------
# UI
# -----------------------------

with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
    gr.Markdown("# 🧪 ToxRAI — Demo (CAS-first)")
    gr.Markdown(
        f"Public demo • AI summaries/day global cap: **{settings.max_ai_summaries_per_day}** • Cache TTL: **{settings.cache_ttl_seconds}s**"
    )

    with gr.Tabs():
        with gr.Tab("Search"):
            state = gr.State(None)

            with gr.Row():
                query_in = gr.Textbox(
                    label="CAS (preferred) or Chemical name",
                    placeholder="e.g., 80-05-7 or bisphenol A",
                    scale=4,
                )
                search_btn = gr.Button("Search", variant="primary", scale=1)

            overview_md = gr.Markdown()

            with gr.Accordion("PubChem (summary)", open=False):
                pubchem_md = gr.Markdown()

            # CDC accordion (optional)
            with gr.Accordion("CDC ToxProfiles", open=False):
                cdc_md = gr.Markdown()

            with gr.Accordion("EPA CompTox (CTX) — Genetox (full fields)", open=False):
                ctx_md = gr.Markdown()

            with gr.Accordion("NTP Technical Reports", open=False):
                ntp_md = gr.Markdown()

            with gr.Accordion("IARC Monographs", open=False):
                iarc_md = gr.Markdown()

            with gr.Accordion("Google Scholar", open=False):
                scholar_md = gr.Markdown()

            with gr.Accordion("FEMA Risk Assessment", open=False):
                fema_md = gr.Markdown()

            with gr.Accordion("Raw outputs (all sources)", open=False):
                raw_pubchem = gr.Code(label="PubChem (raw)", language="json")
                raw_ctx = gr.Code(label="CTX Genetox (raw)", language="json")
                raw_ntp = gr.Code(label="NTP TR (raw)", language="json")
                raw_iarc = gr.Code(label="IARC (raw)", language="json")
                raw_scholar = gr.Code(label="Scholar link (raw)", language="json")
                raw_fema = gr.Code(label="FEMA (raw)", language="json")

            with gr.Row():
                ai_btn = gr.Button("Generate AI Summary (GPT-4o)", variant="secondary")
                pdf_btn = gr.Button("Build PDF + JSON")

            ai_out = gr.Markdown()

            with gr.Row():
                pdf_file = gr.File(label="Download PDF")
                json_file = gr.File(label="Download JSON evidence packet")

            search_btn.click(
                fn=do_search,
                inputs=[query_in],
                outputs=[
                    state,
                    overview_md,
                    pubchem_md,
                    cdc_md,
                    ctx_md,
                    ntp_md,
                    iarc_md,
                    scholar_md,
                    fema_md,
                    raw_pubchem,
                    raw_ctx,
                    raw_ntp,
                    raw_iarc,
                    raw_scholar,
                    raw_fema,
                    ai_out,
                ],
            )

            query_in.submit(
                fn=do_search,
                inputs=[query_in],
                outputs=[
                    state,
                    overview_md,
                    pubchem_md,
                    cdc_md,
                    ctx_md,
                    ntp_md,
                    iarc_md,
                    scholar_md,
                    fema_md,
                    raw_pubchem,
                    raw_ctx,
                    raw_ntp,
                    raw_iarc,
                    raw_scholar,
                    raw_fema,
                    ai_out,
                ],
            )

            ai_btn.click(fn=generate_ai, inputs=[state], outputs=[ai_out])
            pdf_btn.click(fn=download_report, inputs=[state, ai_out], outputs=[pdf_file, json_file])


demo.queue(default_concurrency_limit=6)
app = demo

if __name__ == "__main__":
    demo.launch()