Spaces:

hchevva
/

TOXRA.AI

Sleeping

App Files Files Community

hchevva commited on Feb 6

Commit

05faca4

verified ·

1 Parent(s): e73b687

Upload app.py

Browse files

Files changed (1) hide show

app.py +295 -199

app.py CHANGED Viewed

@@ -1,228 +1,254 @@
-import os
 import json
 import time
-import asyncio
-from dataclasses import dataclass
-from typing import Any, Dict, Tuple
 import gradio as gr
-from core.http import client
 from core.rate_limit import check_and_increment_global_ai_cap
-from core.validate import is_cas
 from core.pdf_report import build_pdf
-from core.sources import pubchem, ntp, ctx as ctx_src, iarc, scholar, fema, cdc
-from core.sources.ai_summary import generate_ai_summary
 # -----------------------------
-# Settings
-# -----------------------------
-@dataclass
-class Settings:
-    openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o")
-    max_ai_summaries_per_day: int = int(os.getenv("MAX_AI_SUMMARIES_PER_DAY", "100"))
-    cache_ttl_seconds: int = int(os.getenv("CACHE_TTL_SECONDS", "86400"))
-settings = Settings()
-# -----------------------------
-# Simple in-memory caches
 # -----------------------------
 SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
 AI_CACHE: Dict[str, str] = {}
-# -----------------------------
-# Utilities
-# -----------------------------
-def _pretty(obj: Any) -> str:
     try:
-        return json.dumps(obj, indent=2, ensure_ascii=False)
     except Exception:
         return str(obj)
-def _truncate_text(s: str, max_chars: int) -> str:
-    if not s:
-        return ""
-    if len(s) <= max_chars:
-        return s
-    return s[:max_chars] + "\n\n[TRUNCATED]\n"
 # -----------------------------
-# Renderers (Markdown blocks)
 # -----------------------------
-def render_overview(data: dict) -> str:
-    q = data.get("query", "")
-    cas = data.get("cas_used", "")
-    return f"**Query:** `{q}`\n\n**CAS used:** `{cas}`"
-def render_pubchem_summary(pub: dict) -> str:
-    if not pub or not pub.get("ok"):
-        return f"PubChem unavailable: {pub.get('error') if isinstance(pub, dict) else 'unknown'}"
-    cid = pub.get("cid", "")
-    resolved_cas = pub.get("resolved_cas", "")
-    iupac = pub.get("iupac_name") or pub.get("title") or "-"
-    mf = pub.get("molecular_formula") or "-"
-    mw = pub.get("molecular_weight") or "-"
-    smiles = pub.get("canonical_smiles") or "-"
-    struct_url = pub.get("structure_png") or ""
     lines = [
-        f"**CID:** `{cid}`",
-        f"**Resolved CAS (from synonyms):** `{resolved_cas}`" if resolved_cas else "",
-        f"**IUPAC/Title:** {iupac}",
-        "",
-        f"**Molecular Formula:** `{mf}`",
-        f"**Molecular Weight:** `{mw}`",
-        f"**Canonical SMILES:** `{smiles}`",
-        "",
     ]
-    if struct_url:
-        lines += [
-            "**Structure**",
-            f"![structure]({struct_url})",
-            "",
-        ]
-    pc_url = pub.get("url") or ""
-    if pc_url:
-        lines.append(f"[Open PubChem]({pc_url})")
-    return "\n".join([x for x in lines if x != ""])
-def render_ctx_summary(ctx: dict) -> str:
     if not ctx or not ctx.get("ok"):
-        return ctx.get("error") or "CTX unavailable."
-    # Full fields already in ctx dict; render a compact header + note
-    dtxsid = ctx.get("dtxsid") or ""
     lines = []
     if dtxsid:
         lines.append(f"**DTXSID:** `{dtxsid}`")
-    hazard = ctx.get("ghs_hazard_statements") or ""
-    echa = ctx.get("echa_cl_summary") or ""
-    if hazard:
-        lines.append(f"\n**GHS Hazard Statements:**\n\n{hazard}")
-    if echa:
-        lines.append(f"\n**ECHA C&L Summary:**\n\n{echa}")
-    dash = ctx.get("dashboard_search_url") or ""
     if dash:
-        lines.append(f"\n[Open CompTox Dashboard search]({dash})")
-    return "\n".join(lines) if lines else "No DTXSID found for this query."
-def render_ntp_summary(ntp_obj: dict) -> str:
-    if not ntp_obj or not ntp_obj.get("ok"):
-        return ntp_obj.get("error") or "NTP unavailable."
-    hits = ntp_obj.get("hits") or []
-    if not hits:
-        return "No NTP Technical Reports found for this CAS."
-    lines = []
-    for h in hits:
-        tr = h.get("tr") or h.get("title") or "NTP Technical Report"
-        url = h.get("url") or ""
-        pdf = h.get("pdf") or ""
-        # Always show PDF link when available
-        if pdf:
-            lines.append(f"- **{tr}** — [Report page]({url}) • [PDF]({pdf})" if url else f"- **{tr}** — [PDF]({pdf})")
-        else:
-            lines.append(f"- **{tr}** — [Report page]({url})" if url else f"- **{tr}**")
     return "\n".join(lines)
-def render_cdc_summary(cdc_obj: dict) -> str:
-    if not cdc_obj or not cdc_obj.get("ok"):
-        return cdc_obj.get("error") or "CDC toxprofiles unavailable."
-    matches = cdc_obj.get("matches") or []
-    if not matches:
-        return "No toxprofile is available for the chemical."
     lines = []
-    for m in matches:
-        name = m.get("name") or "ToxProfile"
-        cas = m.get("cas") or ""
-        url = m.get("url") or ""
         if url:
-            lines.append(f"- **{name}** (CAS: {cas}) — [CDC ToxProfile]({url})")
         else:
-            lines.append(f"- **{name}** (CAS: {cas})")
     return "\n".join(lines)
-def render_iarc_block(obj: dict) -> str:
-    if not obj or not obj.get("ok"):
         return "IARC link unavailable."
-    url = obj.get("url") or ""
-    if not url:
-        return "IARC link unavailable."
-    return f"[Open IARC Monographs search]({url})"
-def render_scholar_block(obj: dict) -> str:
-    if not obj or not obj.get("ok"):
-        return "Scholar link unavailable."
-    url = obj.get("url") or ""
-    return f"[Open Google Scholar search]({url})" if url else "Scholar link unavailable."
-def render_fema_block(obj: dict) -> str:
-    if not obj or not obj.get("ok"):
-        return "FEMA link unavailable."
-    url = obj.get("url") or ""
     return f"[Open FEMA / Fragrance Materials Safety Resource search]({url})" if url else "FEMA link unavailable."
-# -----------------------------
-# Prompt builder (keep small)
-# -----------------------------
-def build_prompt(data: dict) -> str:
-    cas = data.get("cas_used") or data.get("query") or "unknown"
-    pub = data.get("pubchem", {})
-    ctx = data.get("ctx_genetox", {})
-    ntp_obj = data.get("ntp_technical_reports", {})
-    cdc_obj = data.get("cdc_toxprofiles", {})
-    prompt = f"""You are a toxicology assistant. Summarize weight-of-evidence for mutagenicity/genotoxicity.
-Chemical CAS: {cas}
-PUBCHEM (selected fields):
-{_pretty({k: pub.get(k) for k in ['cid','resolved_cas','iupac_name','title','molecular_formula','molecular_weight','canonical_smiles','url']})}
-CTX (selected blocks only):
-{_pretty({k: ctx.get(k) for k in ['dtxsid','ghs_hazard_statements','echa_cl_summary','genetox_records']})}
-NTP Technical Reports (hits):
-{_pretty(ntp_obj.get('hits') if isinstance(ntp_obj, dict) else ntp_obj)}
-CDC ToxProfiles (matches):
-{_pretty(cdc_obj.get('matches') if isinstance(cdc_obj, dict) else cdc_obj)}
-Write a concise, structured summary:
-- Identity & key links
-- Genetox signals (Ames, micronucleus, chromosomal aberrations, etc.)
-- Any conflicts/inconsistencies
-- Overall conclusion (low/medium/high concern)
-- What data is missing
-"""
-    return _truncate_text(prompt, max_chars=16000)
 # -----------------------------
-# Search pipeline
 # -----------------------------
-async def run_search(query: str) -> dict:
     q = (query or "").strip()
     if not q:
         raise gr.Error("Enter a CAS number (preferred) or chemical name.")
@@ -232,65 +258,130 @@ async def run_search(query: str) -> dict:
         return SEARCH_CACHE[cache_key]
     async with client() as http:
         pub = await pubchem.pubchem_by_query(q, http)
         cas = q
-        if not is_cas(cas):
             cas = pub.get("resolved_cas") or q
-        ctx_task = (
-            ctx_src.fetch_ctx_genetox(cas, http)
-            if is_cas(cas)
-            else asyncio.sleep(0, result={"ok": False, "error": "CTX requires CAS (CAS-first)."})
-        )
         ntp_task = ntp.search_technical_reports(cas, http, limit=8)
         ctx_res, ntp_res = await asyncio.gather(ctx_task, ntp_task)
-    # CDC (offline/local index): try resolved CAS first, then name fallback inside cdc.search()
-    cdc_res = cdc.search(q, cas=cas if is_cas(cas) else None, limit=8)
-    out = {
         "query": q,
         "cas_used": cas,
         "pubchem": pub,
         "ctx_genetox": ctx_res,
         "ntp_technical_reports": ntp_res,
-        "cdc_toxprofiles": cdc_res,
         "iarc_monographs": iarc.bookshelf_link(cas),
         "google_scholar": {"ok": True, "url": scholar.scholar_link(cas)},
-        "fema": fema.fema_link(cas if is_cas(cas) else q),
     }
     SEARCH_CACHE[cache_key] = out
     return out
 def do_search(query: str):
     data = asyncio.run(run_search(query))
     overview_md_text = render_overview(data)
     pubchem_md_text = render_pubchem_summary(data.get("pubchem", {}))
     ctx_md_text = render_ctx_summary(data.get("ctx_genetox", {}))
     ntp_md_text = render_ntp_summary(data.get("ntp_technical_reports", {}))
-    cdc_md_text = render_cdc_summary(data.get("cdc_toxprofiles", {}))
     iarc_md_text = render_iarc_block(data.get("iarc_monographs", {}))
     scholar_md_text = render_scholar_block(data.get("google_scholar", {}))
     fema_md_text = render_fema_block(data.get("fema", {}))
-    raw_pubchem_json = _pretty(data.get("pubchem", {}))
-    raw_ctx_json = _pretty(data.get("ctx_genetox", {}))
-    raw_ntp_json = _pretty(data.get("ntp_technical_reports", {}))
-    raw_iarc_json = _pretty(data.get("iarc_monographs", {}))
-    raw_scholar_json = _pretty(data.get("google_scholar", {}))
-    raw_fema_json = _pretty(data.get("fema", {}))
     return (
-        data,
         overview_md_text,
         pubchem_md_text,
         ctx_md_text,
         ntp_md_text,
-        cdc_md_text,
         iarc_md_text,
         scholar_md_text,
         fema_md_text,
@@ -300,13 +391,14 @@ def do_search(query: str):
         raw_iarc_json,
         raw_scholar_json,
         raw_fema_json,
-        "",  # ai_out blank after search
     )
 def generate_ai(data: dict):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or ""
     cache_key = f"ai::{cas}"
     if cache_key in AI_CACHE:
@@ -316,6 +408,8 @@ def generate_ai(data: dict):
     if not allowed:
         return f"AI Summary capacity reached for today (limit {info.get('limit')}). Please try again tomorrow."
     resp = generate_ai_summary(build_prompt(data))
     if not resp.get("ok"):
         return f"**AI summary unavailable:** {resp.get('error')}"
@@ -328,19 +422,17 @@ def generate_ai(data: dict):
 def download_report(data: dict, ai_text: str):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or "unknown"
     pdf_path, json_path = build_pdf(cas, evidence=data, ai_summary=ai_text if ai_text else None)
     return pdf_path, json_path
 # -----------------------------
-# UI (light, production-like)
 # -----------------------------
-LIGHT_CSS = """
-.gradio-container { background: white !important; }
-"""
-with gr.Blocks(title="ToxRAI (HF Demo)", css=LIGHT_CSS) as demo:
     gr.Markdown("# 🧪 ToxRAI — Demo (CAS-first)")
     gr.Markdown(
         f"Public demo • AI summaries/day global cap: **{settings.max_ai_summaries_per_day}** • Cache TTL: **{settings.cache_ttl_seconds}s**"
@@ -363,6 +455,7 @@ with gr.Blocks(title="ToxRAI (HF Demo)", css=LIGHT_CSS) as demo:
             with gr.Accordion("PubChem (summary)", open=False):
                 pubchem_md = gr.Markdown()
             with gr.Accordion("CDC ToxProfiles", open=False):
                 cdc_md = gr.Markdown()
@@ -406,9 +499,9 @@ with gr.Blocks(title="ToxRAI (HF Demo)", css=LIGHT_CSS) as demo:
                     state,
                     overview_md,
                     pubchem_md,
                     ctx_md,
                     ntp_md,
-                    cdc_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
@@ -429,9 +522,9 @@ with gr.Blocks(title="ToxRAI (HF Demo)", css=LIGHT_CSS) as demo:
                     state,
                     overview_md,
                     pubchem_md,
                     ctx_md,
                     ntp_md,
-                    cdc_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
@@ -449,5 +542,8 @@ with gr.Blocks(title="ToxRAI (HF Demo)", css=LIGHT_CSS) as demo:
             pdf_btn.click(fn=download_report, inputs=[state, ai_out], outputs=[pdf_file, json_file])
 if __name__ == "__main__":
-    demo.queue().launch()

+import asyncio
 import json
+import os
 import time
+from typing import Any, Dict, Optional
 import gradio as gr
+import httpx
+from core.config import settings
 from core.rate_limit import check_and_increment_global_ai_cap
 from core.pdf_report import build_pdf
+from core.sources import pubchem, ntp, ctx as ctx_src, iarc, scholar, fema
+# Optional: CDC module may exist in your repo (user added).
+try:
+    from core.sources import cdc
+except Exception:
+    cdc = None  # type: ignore
 # -----------------------------
+# Caches (simple in-memory)
 # -----------------------------
 SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
 AI_CACHE: Dict[str, str] = {}
+def json_pretty(obj: Any) -> str:
     try:
+        return json.dumps(obj, indent=2, ensure_ascii=False, default=str)
     except Exception:
         return str(obj)
+def client() -> httpx.AsyncClient:
+    return httpx.AsyncClient(headers={"user-agent": "toxrai-hf-demo"})
 # -----------------------------
+# Rendering helpers (Markdown)
 # -----------------------------
+def render_overview(data: Dict[str, Any]) -> str:
+    q = data.get("query") or ""
+    cas = data.get("cas_used") or ""
     lines = [
+        f"**Query:** `{q}`",
+        f"**CAS used:** `{cas}`",
     ]
+    # Add quick IDs when available
+    pub = data.get("pubchem") or {}
+    if pub.get("ok") and pub.get("cid"):
+        lines.append(f"**PubChem CID:** `{pub.get('cid')}`")
+    ctx = data.get("ctx_genetox") or {}
+    if ctx.get("ok") and ctx.get("dtxsid"):
+        lines.append(f"**EPA CompTox DTXSID:** `{ctx.get('dtxsid')}`")
+    return "\n\n".join(lines)
+def render_pubchem_summary(pub: Dict[str, Any]) -> str:
+    if not pub or not pub.get("ok"):
+        err = pub.get("error") if isinstance(pub, dict) else "Unknown PubChem error"
+        return f"PubChem unavailable: {err}"
+    cid = pub.get("cid")
+    resolved_cas = pub.get("resolved_cas") or "-"
+    props = pub.get("props") or {}
+    iupac_name = props.get("IUPACName") or props.get("iupac_name") or "-"
+    formula = props.get("MolecularFormula") or "-"
+    mw = props.get("MolecularWeight")
+    mw_str = f"{mw}" if mw not in (None, "") else "-"
+    smiles = props.get("CanonicalSMILES") or "-"
+    lines = []
+    lines.append(f"**CID:** `{cid}`")
+    lines.append(f"**Resolved CAS (from synonyms):** `{resolved_cas}`")
+    lines.append(f"**IUPAC/Title:** {iupac_name}")
+    lines.append("")
+    lines.append(f"**Molecular Formula:** `{formula}`")
+    lines.append(f"**Molecular Weight:** `{mw_str}`")
+    lines.append(f"**Canonical SMILES:** `{smiles}`")
+    structure_png = pub.get("structure_png")
+    if structure_png:
+        lines.append("")
+        lines.append("**Structure**")
+        lines.append(f"![]({structure_png})")
+    url = pub.get("url")
+    if url:
+        lines.append("")
+        lines.append(f"[Open PubChem]({url})")
+    hazards = pub.get("hazards") or []
+    if hazards:
+        lines.append("")
+        lines.append("### Safety / Hazard Information")
+        # Render as paragraphs (avoids weird wrapping from bullet nesting)
+        for h in hazards:
+            name = (h or {}).get("name") or "Hazard"
+            text = (h or {}).get("text") or ""
+            if not text:
+                continue
+            lines.append(f"**{name}:** {text}")
+            lines.append("")
+    return "\n".join(lines).rstrip() + "\n"
+def render_ctx_summary(ctx: Dict[str, Any]) -> str:
     if not ctx or not ctx.get("ok"):
+        search_url = ctx.get("dashboard_search") if isinstance(ctx, dict) else None
+        err = ctx.get("error") if isinstance(ctx, dict) else "Unknown CTX error"
+        if search_url:
+            return f"{err}\n\n[Open CompTox Dashboard search]({search_url})"
+        return str(err)
+    dtxsid = ctx.get("dtxsid")
+    dash = ctx.get("dashboard_url")
+    summary = ctx.get("summary")
     lines = []
     if dtxsid:
         lines.append(f"**DTXSID:** `{dtxsid}`")
     if dash:
+        lines.append(f"[Open CompTox Dashboard]({dash})")
+    # Try to surface key fields (if present) without dumping huge JSON
+    if isinstance(summary, dict):
+        interesting_keys = [
+            "geneTox",
+            "genetox",
+            "overall",
+            "summary",
+            "conclusion",
+            "call",
+            "result",
+            "assessment",
+        ]
+        picked = {}
+        for k in summary.keys():
+            lk = k.lower()
+            if any(tok in lk for tok in interesting_keys):
+                picked[k] = summary[k]
+        if not picked:
+            # fallback: first few keys
+            for k in list(summary.keys())[:8]:
+                picked[k] = summary[k]
+        lines.append("")
+        lines.append("```json")
+        txt = json_pretty(picked)
+        # Keep it readable in UI
+        if len(txt) > 6000:
+            txt = txt[:6000] + "\n... (truncated)"
+        lines.append(txt)
+        lines.append("```")
     return "\n".join(lines)
+def render_ntp_summary(ntp_res: Dict[str, Any]) -> str:
+    if not ntp_res or not ntp_res.get("ok"):
+        err = ntp_res.get("error") if isinstance(ntp_res, dict) else "Unknown NTP error"
+        return f"NTP Technical Reports unavailable: {err}"
+    items = ntp_res.get("items") or []
+    if not items:
+        return "No NTP Technical Reports found for this CAS."  # CAS-filtered
     lines = []
+    for it in items:
+        num = it.get("tr") or it.get("num") or ""
+        title = it.get("title") or "Report"
+        url = it.get("report_page") or it.get("url") or ""
         if url:
+            lines.append(f"- **TR-{num}**  [{title}]({url})")
         else:
+            lines.append(f"- **TR-{num}**  {title}")
     return "\n".join(lines)
+def render_iarc_block(iarc_res: Dict[str, Any]) -> str:
+    if not iarc_res or not iarc_res.get("ok"):
         return "IARC link unavailable."
+    url = iarc_res.get("url")
+    if url:
+        return f"[Search IARC Monographs (NCBI Bookshelf)]({url})"
+    results = iarc_res.get("results") if isinstance(iarc_res, dict) else None
+    if isinstance(results, list) and results:
+        lines = []
+        for it in results:
+            if not isinstance(it, dict):
+                continue
+            title = it.get("title") or "IARC Monographs"
+            link = it.get("url")
+            year = it.get("year")
+            suffix = f" ({year})" if year else ""
+            if link:
+                lines.append(f"- [{title}]({link}){suffix}")
+            else:
+                lines.append(f"- {title}{suffix}")
+        return "\n".join(lines) if lines else "IARC link unavailable."
+    return "IARC link unavailable."
+def render_scholar_block(sch_res: Dict[str, Any]) -> str:
+    if not sch_res or not sch_res.get("ok"):
+        return "Google Scholar link unavailable."
+    url = sch_res.get("url")
+    return f"[Open Google Scholar search]({url})" if url else "Google Scholar link unavailable."
+def render_fema_block(fema_res: Dict[str, Any]) -> str:
+    if not fema_res or not fema_res.get("ok"):
+        err = fema_res.get("error") if isinstance(fema_res, dict) else "FEMA link unavailable."
+        return str(err)
+    url = fema_res.get("url")
     return f"[Open FEMA / Fragrance Materials Safety Resource search]({url})" if url else "FEMA link unavailable."
+def render_cdc_block(cdc_res: Any) -> str:
+    if not cdc_res:
+        return "No CDC ToxProfiles match."
+    # Accept either dict or list
+    if isinstance(cdc_res, dict):
+        url = cdc_res.get("url")
+        name = cdc_res.get("name") or "CDC ToxProfile"
+        return f"[{name}]({url})" if url else name
+    if isinstance(cdc_res, list):
+        lines = []
+        for it in cdc_res:
+            if not isinstance(it, dict):
+                continue
+            name = it.get("name") or "CDC ToxProfile"
+            url = it.get("url")
+            lines.append(f"- [{name}]({url})" if url else f"- {name}")
+        return "\n".join(lines) if lines else "No CDC ToxProfiles match."
+    return str(cdc_res)
 # -----------------------------
+# Search + AI
 # -----------------------------
+async def run_search(query: str) -> Dict[str, Any]:
     q = (query or "").strip()
     if not q:
         raise gr.Error("Enter a CAS number (preferred) or chemical name.")
         return SEARCH_CACHE[cache_key]
     async with client() as http:
+        # PubChem accepts names and CAS. We also use it to resolve CAS via synonyms.
         pub = await pubchem.pubchem_by_query(q, http)
         cas = q
+        if not pubchem.is_cas(cas):
             cas = pub.get("resolved_cas") or q
+        # CTX is CAS-first (but we allow name too; resolver will try both)
+        ctx_task = ctx_src.fetch_ctx_genetox(cas, http) if cas else asyncio.sleep(0, result={"ok": False})
         ntp_task = ntp.search_technical_reports(cas, http, limit=8)
         ctx_res, ntp_res = await asyncio.gather(ctx_task, ntp_task)
+    out: Dict[str, Any] = {
         "query": q,
         "cas_used": cas,
         "pubchem": pub,
         "ctx_genetox": ctx_res,
         "ntp_technical_reports": ntp_res,
         "iarc_monographs": iarc.bookshelf_link(cas),
         "google_scholar": {"ok": True, "url": scholar.scholar_link(cas)},
+        "fema": fema.fema_link(cas if pubchem.is_cas(cas) else q),
     }
+    # CDC toxprofiles (if module exists)
+    if cdc is not None:
+        try:
+            # Try a few common function names (depending on how you implemented cdc.py)
+            if hasattr(cdc, "lookup"):
+                out["cdc_toxprofiles"] = cdc.lookup(cas)
+            elif hasattr(cdc, "search"):
+                out["cdc_toxprofiles"] = cdc.search(cas)
+            elif hasattr(cdc, "toxprofile_for"):
+                out["cdc_toxprofiles"] = cdc.toxprofile_for(cas)
+            else:
+                out["cdc_toxprofiles"] = None
+        except Exception:
+            out["cdc_toxprofiles"] = None
     SEARCH_CACHE[cache_key] = out
     return out
+def _prune_for_prompt(obj: Any, max_chars: int) -> str:
+    txt = json_pretty(obj)
+    if len(txt) <= max_chars:
+        return txt
+    return txt[:max_chars] + "\n... (truncated)"
+def build_prompt(data: Dict[str, Any]) -> str:
+    """Build a prompt that will not exceed model context.
+    Key change vs earlier version: DO NOT dump full raw JSON from all sources.
+    """
+    pub = data.get("pubchem") or {}
+    props = (pub.get("props") or {}) if isinstance(pub, dict) else {}
+    hazards = (pub.get("hazards") or []) if isinstance(pub, dict) else []
+    prompt_obj = {
+        "query": data.get("query"),
+        "cas_used": data.get("cas_used"),
+        "pubchem": {
+            "cid": pub.get("cid"),
+            "resolved_cas": pub.get("resolved_cas"),
+            "iupac": props.get("IUPACName") or props.get("iupac_name"),
+            "formula": props.get("MolecularFormula"),
+            "molecular_weight": props.get("MolecularWeight"),
+            "canonical_smiles": props.get("CanonicalSMILES"),
+            "hazards": hazards[:10],
+        },
+        "ctx_genetox": {
+            "ok": (data.get("ctx_genetox") or {}).get("ok"),
+            "dtxsid": (data.get("ctx_genetox") or {}).get("dtxsid"),
+            "summary": (data.get("ctx_genetox") or {}).get("summary"),
+        },
+        "ntp_technical_reports": (data.get("ntp_technical_reports") or {}).get("items", []),
+        "cdc_toxprofiles": data.get("cdc_toxprofiles"),
+    }
+    body = _prune_for_prompt(prompt_obj, max_chars=12000)
+    return (
+        "You are a toxicology regulatory assistant. "
+        "Using ONLY the evidence JSON below, write a concise weight-of-evidence summary focused on mutagenicity/genotoxicity. "
+        "If evidence is conflicting or absent, say so explicitly. "
+        "Cite which source each statement comes from (PubChem hazards, CTX genetox summary, NTP TR titles, CDC ToxProfiles).\n\n"
+        "EVIDENCE_JSON:\n"
+        + body
+    )
 def do_search(query: str):
     data = asyncio.run(run_search(query))
     overview_md_text = render_overview(data)
     pubchem_md_text = render_pubchem_summary(data.get("pubchem", {}))
     ctx_md_text = render_ctx_summary(data.get("ctx_genetox", {}))
     ntp_md_text = render_ntp_summary(data.get("ntp_technical_reports", {}))
     iarc_md_text = render_iarc_block(data.get("iarc_monographs", {}))
     scholar_md_text = render_scholar_block(data.get("google_scholar", {}))
     fema_md_text = render_fema_block(data.get("fema", {}))
+    cdc_md_text = ""
+    if "cdc_toxprofiles" in data:
+        cdc_md_text = render_cdc_block(data.get("cdc_toxprofiles"))
+    raw_pubchem_json = json_pretty(data.get("pubchem", {}))
+    raw_ctx_json = json_pretty(data.get("ctx_genetox", {}))
+    raw_ntp_json = json_pretty(data.get("ntp_technical_reports", {}))
+    raw_iarc_json = json_pretty(data.get("iarc_monographs", {}))
+    raw_scholar_json = json_pretty(data.get("google_scholar", {}))
+    raw_fema_json = json_pretty(data.get("fema", {}))
+    # IMPORTANT: return order must match `outputs=[...]`
+    # If CDC accordion exists, include it right after PubChem.
     return (
+        data,  # state
         overview_md_text,
         pubchem_md_text,
+        cdc_md_text,
         ctx_md_text,
         ntp_md_text,
         iarc_md_text,
         scholar_md_text,
         fema_md_text,
         raw_iarc_json,
         raw_scholar_json,
         raw_fema_json,
+        "",  # ai_out (blank after search)
     )
 def generate_ai(data: dict):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or ""
     cache_key = f"ai::{cas}"
     if cache_key in AI_CACHE:
     if not allowed:
         return f"AI Summary capacity reached for today (limit {info.get('limit')}). Please try again tomorrow."
+    from core.sources.ai_summary import generate_ai_summary  # local import avoids cold-start issues
     resp = generate_ai_summary(build_prompt(data))
     if not resp.get("ok"):
         return f"**AI summary unavailable:** {resp.get('error')}"
 def download_report(data: dict, ai_text: str):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or "unknown"
     pdf_path, json_path = build_pdf(cas, evidence=data, ai_summary=ai_text if ai_text else None)
     return pdf_path, json_path
 # -----------------------------
+# UI
 # -----------------------------
+with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
     gr.Markdown("# 🧪 ToxRAI — Demo (CAS-first)")
     gr.Markdown(
         f"Public demo • AI summaries/day global cap: **{settings.max_ai_summaries_per_day}** • Cache TTL: **{settings.cache_ttl_seconds}s**"
             with gr.Accordion("PubChem (summary)", open=False):
                 pubchem_md = gr.Markdown()
+            # CDC accordion (optional)
             with gr.Accordion("CDC ToxProfiles", open=False):
                 cdc_md = gr.Markdown()
                     state,
                     overview_md,
                     pubchem_md,
+                    cdc_md,
                     ctx_md,
                     ntp_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
                     state,
                     overview_md,
                     pubchem_md,
+                    cdc_md,
                     ctx_md,
                     ntp_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
             pdf_btn.click(fn=download_report, inputs=[state, ai_out], outputs=[pdf_file, json_file])
+demo.queue(default_concurrency_limit=6)
+app = demo
 if __name__ == "__main__":
+    demo.launch()