Spaces:

hchevva
/

TOXRA.AI

Sleeping

App Files Files Community

hchevva commited on 16 days ago

Commit

3d936e9

verified ·

1 Parent(s): c3e44ad

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -269

app.py CHANGED Viewed

@@ -1,235 +1,228 @@
-import asyncio
-import json
 import os
 import time
-from typing import Any, Dict, Optional
 import gradio as gr
-import httpx
-from core.config import settings
 from core.rate_limit import check_and_increment_global_ai_cap
 from core.pdf_report import build_pdf
-from core.sources import pubchem, ntp, ctx as ctx_src, iarc, scholar, fema
-# Optional: CDC module may exist in your repo (user added).
-try:
-    from core.sources import cdc
-except Exception:
-    cdc = None  # type: ignore
 # -----------------------------
-# Caches (simple in-memory)
 # -----------------------------
 SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
 AI_CACHE: Dict[str, str] = {}
-def json_pretty(obj: Any) -> str:
     try:
-        return json.dumps(obj, indent=2, ensure_ascii=False, default=str)
     except Exception:
         return str(obj)
-def client() -> httpx.AsyncClient:
-    return httpx.AsyncClient(headers={"user-agent": "toxrai-hf-demo"})
 # -----------------------------
-# Rendering helpers (Markdown)
 # -----------------------------
-def render_overview(data: Dict[str, Any]) -> str:
-    q = data.get("query") or ""
-    cas = data.get("cas_used") or ""
-    lines = [
-        f"**Query:** `{q}`",
-        f"**CAS used:** `{cas}`",
-    ]
-    # Add quick IDs when available
-    pub = data.get("pubchem") or {}
-    if pub.get("ok") and pub.get("cid"):
-        lines.append(f"**PubChem CID:** `{pub.get('cid')}`")
-    ctx = data.get("ctx_genetox") or {}
-    if ctx.get("ok") and ctx.get("dtxsid"):
-        lines.append(f"**EPA CompTox DTXSID:** `{ctx.get('dtxsid')}`")
-    return "\n\n".join(lines)
-def render_pubchem_summary(pub: Dict[str, Any]) -> str:
-    if not pub or not pub.get("ok"):
-        err = pub.get("error") if isinstance(pub, dict) else "Unknown PubChem error"
-        return f"PubChem unavailable: {err}"
-    cid = pub.get("cid")
-    resolved_cas = pub.get("resolved_cas") or "-"
-    props = pub.get("props") or {}
-    iupac_name = props.get("IUPACName") or props.get("iupac_name") or "-"
-    formula = props.get("MolecularFormula") or "-"
-    mw = props.get("MolecularWeight")
-    mw_str = f"{mw}" if mw not in (None, "") else "-"
-    smiles = props.get("CanonicalSMILES") or "-"
-    lines = []
-    lines.append(f"**CID:** `{cid}`")
-    lines.append(f"**Resolved CAS (from synonyms):** `{resolved_cas}`")
-    lines.append(f"**IUPAC/Title:** {iupac_name}")
-    lines.append("")
-    lines.append(f"**Molecular Formula:** `{formula}`")
-    lines.append(f"**Molecular Weight:** `{mw_str}`")
-    lines.append(f"**Canonical SMILES:** `{smiles}`")
-    structure_png = pub.get("structure_png")
-    if structure_png:
-        lines.append("")
-        lines.append("**Structure**")
-        lines.append(f"![]({structure_png})")
-    url = pub.get("url")
-    if url:
-        lines.append("")
-        lines.append(f"[Open PubChem]({url})")
-    hazards = pub.get("hazards") or []
-    if hazards:
-        lines.append("")
-        lines.append("### Safety / Hazard Information")
-        # Render as paragraphs (avoids weird wrapping from bullet nesting)
-        for h in hazards:
-            name = (h or {}).get("name") or "Hazard"
-            text = (h or {}).get("text") or ""
-            if not text:
-                continue
-            lines.append(f"**{name}:** {text}")
-            lines.append("")
-    return "\n".join(lines).rstrip() + "\n"
-def render_ctx_summary(ctx: Dict[str, Any]) -> str:
-    if not ctx or not ctx.get("ok"):
-        search_url = ctx.get("dashboard_search") if isinstance(ctx, dict) else None
-        err = ctx.get("error") if isinstance(ctx, dict) else "Unknown CTX error"
-        if search_url:
-            return f"{err}\n\n[Open CompTox Dashboard search]({search_url})"
-        return str(err)
-    dtxsid = ctx.get("dtxsid")
-    dash = ctx.get("dashboard_url")
-    summary = ctx.get("summary")
     lines = []
     if dtxsid:
         lines.append(f"**DTXSID:** `{dtxsid}`")
     if dash:
-        lines.append(f"[Open CompTox Dashboard]({dash})")
-    # Try to surface key fields (if present) without dumping huge JSON
-    if isinstance(summary, dict):
-        interesting_keys = [
-            "geneTox",
-            "genetox",
-            "overall",
-            "summary",
-            "conclusion",
-            "call",
-            "result",
-            "assessment",
-        ]
-        picked = {}
-        for k in summary.keys():
-            lk = k.lower()
-            if any(tok in lk for tok in interesting_keys):
-                picked[k] = summary[k]
-        if not picked:
-            # fallback: first few keys
-            for k in list(summary.keys())[:8]:
-                picked[k] = summary[k]
-        lines.append("")
-        lines.append("```json")
-        txt = json_pretty(picked)
-        # Keep it readable in UI
-        if len(txt) > 6000:
-            txt = txt[:6000] + "\n... (truncated)"
-        lines.append(txt)
-        lines.append("```")
     return "\n".join(lines)
-def render_ntp_summary(ntp_res: Dict[str, Any]) -> str:
-    if not ntp_res or not ntp_res.get("ok"):
-        err = ntp_res.get("error") if isinstance(ntp_res, dict) else "Unknown NTP error"
-        return f"NTP Technical Reports unavailable: {err}"
-    items = ntp_res.get("items") or []
-    if not items:
-        return "No NTP Technical Reports found for this CAS."  # CAS-filtered
     lines = []
-    for it in items:
-        num = it.get("tr") or it.get("num") or ""
-        title = it.get("title") or "Report"
-        url = it.get("report_page") or it.get("url") or ""
         if url:
-            lines.append(f"- **TR-{num}**  [{title}]({url})")
         else:
-            lines.append(f"- **TR-{num}**  {title}")
     return "\n".join(lines)
-def render_iarc_block(iarc_res: Dict[str, Any]) -> str:
-    if not iarc_res or not iarc_res.get("ok"):
         return "IARC link unavailable."
-    url = iarc_res.get("url")
-    return f"[Search IARC Monographs (NCBI Bookshelf)]({url})" if url else "IARC link unavailable."
-def render_scholar_block(sch_res: Dict[str, Any]) -> str:
-    if not sch_res or not sch_res.get("ok"):
-        return "Google Scholar link unavailable."
-    url = sch_res.get("url")
-    return f"[Open Google Scholar search]({url})" if url else "Google Scholar link unavailable."
-def render_fema_block(fema_res: Dict[str, Any]) -> str:
-    if not fema_res or not fema_res.get("ok"):
-        err = fema_res.get("error") if isinstance(fema_res, dict) else "FEMA link unavailable."
-        return str(err)
-    url = fema_res.get("url")
     return f"[Open FEMA / Fragrance Materials Safety Resource search]({url})" if url else "FEMA link unavailable."
-def render_cdc_block(cdc_res: Any) -> str:
-    if not cdc_res:
-        return "No CDC ToxProfiles match."
-    # Accept either dict or list
-    if isinstance(cdc_res, dict):
-        url = cdc_res.get("url")
-        name = cdc_res.get("name") or "CDC ToxProfile"
-        return f"[{name}]({url})" if url else name
-    if isinstance(cdc_res, list):
-        lines = []
-        for it in cdc_res:
-            if not isinstance(it, dict):
-                continue
-            name = it.get("name") or "CDC ToxProfile"
-            url = it.get("url")
-            lines.append(f"- [{name}]({url})" if url else f"- {name}")
-        return "\n".join(lines) if lines else "No CDC ToxProfiles match."
-    return str(cdc_res)
 # -----------------------------
-# Search + AI
 # -----------------------------
-async def run_search(query: str) -> Dict[str, Any]:
     q = (query or "").strip()
     if not q:
         raise gr.Error("Enter a CAS number (preferred) or chemical name.")
@@ -239,130 +232,65 @@ async def run_search(query: str) -> Dict[str, Any]:
         return SEARCH_CACHE[cache_key]
     async with client() as http:
-        # PubChem accepts names and CAS. We also use it to resolve CAS via synonyms.
         pub = await pubchem.pubchem_by_query(q, http)
         cas = q
-        if not pubchem.is_cas(cas):
             cas = pub.get("resolved_cas") or q
-        # CTX is CAS-first (but we allow name too; resolver will try both)
-        ctx_task = ctx_src.fetch_ctx_genetox(cas, http) if cas else asyncio.sleep(0, result={"ok": False})
         ntp_task = ntp.search_technical_reports(cas, http, limit=8)
         ctx_res, ntp_res = await asyncio.gather(ctx_task, ntp_task)
-    out: Dict[str, Any] = {
         "query": q,
         "cas_used": cas,
         "pubchem": pub,
         "ctx_genetox": ctx_res,
         "ntp_technical_reports": ntp_res,
         "iarc_monographs": iarc.bookshelf_link(cas),
         "google_scholar": {"ok": True, "url": scholar.scholar_link(cas)},
-        "fema": fema.fema_link(cas if pubchem.is_cas(cas) else q),
     }
-    # CDC toxprofiles (if module exists)
-    if cdc is not None:
-        try:
-            # Try a few common function names (depending on how you implemented cdc.py)
-            if hasattr(cdc, "lookup"):
-                out["cdc_toxprofiles"] = cdc.lookup(cas)
-            elif hasattr(cdc, "search"):
-                out["cdc_toxprofiles"] = cdc.search(cas)
-            elif hasattr(cdc, "toxprofile_for"):
-                out["cdc_toxprofiles"] = cdc.toxprofile_for(cas)
-            else:
-                out["cdc_toxprofiles"] = None
-        except Exception:
-            out["cdc_toxprofiles"] = None
     SEARCH_CACHE[cache_key] = out
     return out
-def _prune_for_prompt(obj: Any, max_chars: int) -> str:
-    txt = json_pretty(obj)
-    if len(txt) <= max_chars:
-        return txt
-    return txt[:max_chars] + "\n... (truncated)"
-def build_prompt(data: Dict[str, Any]) -> str:
-    """Build a prompt that will not exceed model context.
-    Key change vs earlier version: DO NOT dump full raw JSON from all sources.
-    """
-    pub = data.get("pubchem") or {}
-    props = (pub.get("props") or {}) if isinstance(pub, dict) else {}
-    hazards = (pub.get("hazards") or []) if isinstance(pub, dict) else []
-    prompt_obj = {
-        "query": data.get("query"),
-        "cas_used": data.get("cas_used"),
-        "pubchem": {
-            "cid": pub.get("cid"),
-            "resolved_cas": pub.get("resolved_cas"),
-            "iupac": props.get("IUPACName") or props.get("iupac_name"),
-            "formula": props.get("MolecularFormula"),
-            "molecular_weight": props.get("MolecularWeight"),
-            "canonical_smiles": props.get("CanonicalSMILES"),
-            "hazards": hazards[:10],
-        },
-        "ctx_genetox": {
-            "ok": (data.get("ctx_genetox") or {}).get("ok"),
-            "dtxsid": (data.get("ctx_genetox") or {}).get("dtxsid"),
-            "summary": (data.get("ctx_genetox") or {}).get("summary"),
-        },
-        "ntp_technical_reports": (data.get("ntp_technical_reports") or {}).get("items", []),
-        "cdc_toxprofiles": data.get("cdc_toxprofiles"),
-    }
-    body = _prune_for_prompt(prompt_obj, max_chars=12000)
-    return (
-        "You are a toxicology regulatory assistant. "
-        "Using ONLY the evidence JSON below, write a concise weight-of-evidence summary focused on mutagenicity/genotoxicity. "
-        "If evidence is conflicting or absent, say so explicitly. "
-        "Cite which source each statement comes from (PubChem hazards, CTX genetox summary, NTP TR titles, CDC ToxProfiles).\n\n"
-        "EVIDENCE_JSON:\n"
-        + body
-    )
 def do_search(query: str):
     data = asyncio.run(run_search(query))
     overview_md_text = render_overview(data)
     pubchem_md_text = render_pubchem_summary(data.get("pubchem", {}))
     ctx_md_text = render_ctx_summary(data.get("ctx_genetox", {}))
     ntp_md_text = render_ntp_summary(data.get("ntp_technical_reports", {}))
     iarc_md_text = render_iarc_block(data.get("iarc_monographs", {}))
     scholar_md_text = render_scholar_block(data.get("google_scholar", {}))
     fema_md_text = render_fema_block(data.get("fema", {}))
-    cdc_md_text = ""
-    if "cdc_toxprofiles" in data:
-        cdc_md_text = render_cdc_block(data.get("cdc_toxprofiles"))
-    raw_pubchem_json = json_pretty(data.get("pubchem", {}))
-    raw_ctx_json = json_pretty(data.get("ctx_genetox", {}))
-    raw_ntp_json = json_pretty(data.get("ntp_technical_reports", {}))
-    raw_iarc_json = json_pretty(data.get("iarc_monographs", {}))
-    raw_scholar_json = json_pretty(data.get("google_scholar", {}))
-    raw_fema_json = json_pretty(data.get("fema", {}))
-    # IMPORTANT: return order must match `outputs=[...]`
-    # If CDC accordion exists, include it right after PubChem.
     return (
-        data,  # state
         overview_md_text,
         pubchem_md_text,
-        cdc_md_text,
         ctx_md_text,
         ntp_md_text,
         iarc_md_text,
         scholar_md_text,
         fema_md_text,
@@ -372,14 +300,13 @@ def do_search(query: str):
         raw_iarc_json,
         raw_scholar_json,
         raw_fema_json,
-        "",  # ai_out (blank after search)
     )
 def generate_ai(data: dict):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or ""
     cache_key = f"ai::{cas}"
     if cache_key in AI_CACHE:
@@ -389,8 +316,6 @@ def generate_ai(data: dict):
     if not allowed:
         return f"AI Summary capacity reached for today (limit {info.get('limit')}). Please try again tomorrow."
-    from core.sources.ai_summary import generate_ai_summary  # local import avoids cold-start issues
     resp = generate_ai_summary(build_prompt(data))
     if not resp.get("ok"):
         return f"**AI summary unavailable:** {resp.get('error')}"
@@ -403,17 +328,19 @@ def generate_ai(data: dict):
 def download_report(data: dict, ai_text: str):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or "unknown"
     pdf_path, json_path = build_pdf(cas, evidence=data, ai_summary=ai_text if ai_text else None)
     return pdf_path, json_path
 # -----------------------------
-# UI
 # -----------------------------
-with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
     gr.Markdown("# 🧪 ToxRAI — Demo (CAS-first)")
     gr.Markdown(
         f"Public demo • AI summaries/day global cap: **{settings.max_ai_summaries_per_day}** • Cache TTL: **{settings.cache_ttl_seconds}s**"
@@ -436,7 +363,6 @@ with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
             with gr.Accordion("PubChem (summary)", open=False):
                 pubchem_md = gr.Markdown()
-            # CDC accordion (optional)
             with gr.Accordion("CDC ToxProfiles", open=False):
                 cdc_md = gr.Markdown()
@@ -480,9 +406,9 @@ with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
                     state,
                     overview_md,
                     pubchem_md,
-                    cdc_md,
                     ctx_md,
                     ntp_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
@@ -503,9 +429,9 @@ with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
                     state,
                     overview_md,
                     pubchem_md,
-                    cdc_md,
                     ctx_md,
                     ntp_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
@@ -523,4 +449,5 @@ with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
             pdf_btn.click(fn=download_report, inputs=[state, ai_out], outputs=[pdf_file, json_file])
-demo.queue(default_concurrency_limit=6).launch()

 import os
+import json
 import time
+import asyncio
+from dataclasses import dataclass
+from typing import Any, Dict, Tuple
 import gradio as gr
+from core.http import client
 from core.rate_limit import check_and_increment_global_ai_cap
+from core.validate import is_cas
 from core.pdf_report import build_pdf
+from core.sources import pubchem, ntp, ctx as ctx_src, iarc, scholar, fema, cdc
+from core.sources.ai_summary import generate_ai_summary
+# -----------------------------
+# Settings
+# -----------------------------
+@dataclass
+class Settings:
+    openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o")
+    max_ai_summaries_per_day: int = int(os.getenv("MAX_AI_SUMMARIES_PER_DAY", "100"))
+    cache_ttl_seconds: int = int(os.getenv("CACHE_TTL_SECONDS", "86400"))
+settings = Settings()
 # -----------------------------
+# Simple in-memory caches
 # -----------------------------
 SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
 AI_CACHE: Dict[str, str] = {}
+# -----------------------------
+# Utilities
+# -----------------------------
+def _pretty(obj: Any) -> str:
     try:
+        return json.dumps(obj, indent=2, ensure_ascii=False)
     except Exception:
         return str(obj)
+def _truncate_text(s: str, max_chars: int) -> str:
+    if not s:
+        return ""
+    if len(s) <= max_chars:
+        return s
+    return s[:max_chars] + "\n\n[TRUNCATED]\n"
 # -----------------------------
+# Renderers (Markdown blocks)
 # -----------------------------
+def render_overview(data: dict) -> str:
+    q = data.get("query", "")
+    cas = data.get("cas_used", "")
+    return f"**Query:** `{q}`\n\n**CAS used:** `{cas}`"
+def render_pubchem_summary(pub: dict) -> str:
+    if not pub or not pub.get("ok"):
+        return f"PubChem unavailable: {pub.get('error') if isinstance(pub, dict) else 'unknown'}"
+    cid = pub.get("cid", "")
+    resolved_cas = pub.get("resolved_cas", "")
+    iupac = pub.get("iupac_name") or pub.get("title") or "-"
+    mf = pub.get("molecular_formula") or "-"
+    mw = pub.get("molecular_weight") or "-"
+    smiles = pub.get("canonical_smiles") or "-"
+    struct_url = pub.get("structure_png") or ""
+    lines = [
+        f"**CID:** `{cid}`",
+        f"**Resolved CAS (from synonyms):** `{resolved_cas}`" if resolved_cas else "",
+        f"**IUPAC/Title:** {iupac}",
+        "",
+        f"**Molecular Formula:** `{mf}`",
+        f"**Molecular Weight:** `{mw}`",
+        f"**Canonical SMILES:** `{smiles}`",
+        "",
+    ]
+    if struct_url:
+        lines += [
+            "**Structure**",
+            f"![structure]({struct_url})",
+            "",
+        ]
+    pc_url = pub.get("url") or ""
+    if pc_url:
+        lines.append(f"[Open PubChem]({pc_url})")
+    return "\n".join([x for x in lines if x != ""])
+def render_ctx_summary(ctx: dict) -> str:
+    if not ctx or not ctx.get("ok"):
+        return ctx.get("error") or "CTX unavailable."
+    # Full fields already in ctx dict; render a compact header + note
+    dtxsid = ctx.get("dtxsid") or ""
     lines = []
     if dtxsid:
         lines.append(f"**DTXSID:** `{dtxsid}`")
+    hazard = ctx.get("ghs_hazard_statements") or ""
+    echa = ctx.get("echa_cl_summary") or ""
+    if hazard:
+        lines.append(f"\n**GHS Hazard Statements:**\n\n{hazard}")
+    if echa:
+        lines.append(f"\n**ECHA C&L Summary:**\n\n{echa}")
+    dash = ctx.get("dashboard_search_url") or ""
     if dash:
+        lines.append(f"\n[Open CompTox Dashboard search]({dash})")
+    return "\n".join(lines) if lines else "No DTXSID found for this query."
+def render_ntp_summary(ntp_obj: dict) -> str:
+    if not ntp_obj or not ntp_obj.get("ok"):
+        return ntp_obj.get("error") or "NTP unavailable."
+    hits = ntp_obj.get("hits") or []
+    if not hits:
+        return "No NTP Technical Reports found for this CAS."
+    lines = []
+    for h in hits:
+        tr = h.get("tr") or h.get("title") or "NTP Technical Report"
+        url = h.get("url") or ""
+        pdf = h.get("pdf") or ""
+        # Always show PDF link when available
+        if pdf:
+            lines.append(f"- **{tr}** — [Report page]({url}) • [PDF]({pdf})" if url else f"- **{tr}** — [PDF]({pdf})")
+        else:
+            lines.append(f"- **{tr}** — [Report page]({url})" if url else f"- **{tr}**")
     return "\n".join(lines)
+def render_cdc_summary(cdc_obj: dict) -> str:
+    if not cdc_obj or not cdc_obj.get("ok"):
+        return cdc_obj.get("error") or "CDC toxprofiles unavailable."
+    matches = cdc_obj.get("matches") or []
+    if not matches:
+        return "No toxprofile is available for the chemical."
     lines = []
+    for m in matches:
+        name = m.get("name") or "ToxProfile"
+        cas = m.get("cas") or ""
+        url = m.get("url") or ""
         if url:
+            lines.append(f"- **{name}** (CAS: {cas}) — [CDC ToxProfile]({url})")
         else:
+            lines.append(f"- **{name}** (CAS: {cas})")
     return "\n".join(lines)
+def render_iarc_block(obj: dict) -> str:
+    if not obj or not obj.get("ok"):
         return "IARC link unavailable."
+    url = obj.get("url") or ""
+    if not url:
+        return "IARC link unavailable."
+    return f"[Open IARC Monographs search]({url})"
+def render_scholar_block(obj: dict) -> str:
+    if not obj or not obj.get("ok"):
+        return "Scholar link unavailable."
+    url = obj.get("url") or ""
+    return f"[Open Google Scholar search]({url})" if url else "Scholar link unavailable."
+def render_fema_block(obj: dict) -> str:
+    if not obj or not obj.get("ok"):
+        return "FEMA link unavailable."
+    url = obj.get("url") or ""
     return f"[Open FEMA / Fragrance Materials Safety Resource search]({url})" if url else "FEMA link unavailable."
+# -----------------------------
+# Prompt builder (keep small)
+# -----------------------------
+def build_prompt(data: dict) -> str:
+    cas = data.get("cas_used") or data.get("query") or "unknown"
+    pub = data.get("pubchem", {})
+    ctx = data.get("ctx_genetox", {})
+    ntp_obj = data.get("ntp_technical_reports", {})
+    cdc_obj = data.get("cdc_toxprofiles", {})
+    prompt = f"""You are a toxicology assistant. Summarize weight-of-evidence for mutagenicity/genotoxicity.
+Chemical CAS: {cas}
+PUBCHEM (selected fields):
+{_pretty({k: pub.get(k) for k in ['cid','resolved_cas','iupac_name','title','molecular_formula','molecular_weight','canonical_smiles','url']})}
+CTX (selected blocks only):
+{_pretty({k: ctx.get(k) for k in ['dtxsid','ghs_hazard_statements','echa_cl_summary','genetox_records']})}
+NTP Technical Reports (hits):
+{_pretty(ntp_obj.get('hits') if isinstance(ntp_obj, dict) else ntp_obj)}
+CDC ToxProfiles (matches):
+{_pretty(cdc_obj.get('matches') if isinstance(cdc_obj, dict) else cdc_obj)}
+Write a concise, structured summary:
+- Identity & key links
+- Genetox signals (Ames, micronucleus, chromosomal aberrations, etc.)
+- Any conflicts/inconsistencies
+- Overall conclusion (low/medium/high concern)
+- What data is missing
+"""
+    return _truncate_text(prompt, max_chars=16000)
 # -----------------------------
+# Search pipeline
 # -----------------------------
+async def run_search(query: str) -> dict:
     q = (query or "").strip()
     if not q:
         raise gr.Error("Enter a CAS number (preferred) or chemical name.")
         return SEARCH_CACHE[cache_key]
     async with client() as http:
         pub = await pubchem.pubchem_by_query(q, http)
         cas = q
+        if not is_cas(cas):
             cas = pub.get("resolved_cas") or q
+        ctx_task = (
+            ctx_src.fetch_ctx_genetox(cas, http)
+            if is_cas(cas)
+            else asyncio.sleep(0, result={"ok": False, "error": "CTX requires CAS (CAS-first)."})
+        )
         ntp_task = ntp.search_technical_reports(cas, http, limit=8)
         ctx_res, ntp_res = await asyncio.gather(ctx_task, ntp_task)
+    # CDC (offline/local index): try resolved CAS first, then name fallback inside cdc.search()
+    cdc_res = cdc.search(q, cas=cas if is_cas(cas) else None, limit=8)
+    out = {
         "query": q,
         "cas_used": cas,
         "pubchem": pub,
         "ctx_genetox": ctx_res,
         "ntp_technical_reports": ntp_res,
+        "cdc_toxprofiles": cdc_res,
         "iarc_monographs": iarc.bookshelf_link(cas),
         "google_scholar": {"ok": True, "url": scholar.scholar_link(cas)},
+        "fema": fema.fema_link(cas if is_cas(cas) else q),
     }
     SEARCH_CACHE[cache_key] = out
     return out
 def do_search(query: str):
     data = asyncio.run(run_search(query))
     overview_md_text = render_overview(data)
     pubchem_md_text = render_pubchem_summary(data.get("pubchem", {}))
     ctx_md_text = render_ctx_summary(data.get("ctx_genetox", {}))
     ntp_md_text = render_ntp_summary(data.get("ntp_technical_reports", {}))
+    cdc_md_text = render_cdc_summary(data.get("cdc_toxprofiles", {}))
     iarc_md_text = render_iarc_block(data.get("iarc_monographs", {}))
     scholar_md_text = render_scholar_block(data.get("google_scholar", {}))
     fema_md_text = render_fema_block(data.get("fema", {}))
+    raw_pubchem_json = _pretty(data.get("pubchem", {}))
+    raw_ctx_json = _pretty(data.get("ctx_genetox", {}))
+    raw_ntp_json = _pretty(data.get("ntp_technical_reports", {}))
+    raw_iarc_json = _pretty(data.get("iarc_monographs", {}))
+    raw_scholar_json = _pretty(data.get("google_scholar", {}))
+    raw_fema_json = _pretty(data.get("fema", {}))
     return (
+        data,
         overview_md_text,
         pubchem_md_text,
         ctx_md_text,
         ntp_md_text,
+        cdc_md_text,
         iarc_md_text,
         scholar_md_text,
         fema_md_text,
         raw_iarc_json,
         raw_scholar_json,
         raw_fema_json,
+        "",  # ai_out blank after search
     )
 def generate_ai(data: dict):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or ""
     cache_key = f"ai::{cas}"
     if cache_key in AI_CACHE:
     if not allowed:
         return f"AI Summary capacity reached for today (limit {info.get('limit')}). Please try again tomorrow."
     resp = generate_ai_summary(build_prompt(data))
     if not resp.get("ok"):
         return f"**AI summary unavailable:** {resp.get('error')}"
 def download_report(data: dict, ai_text: str):
     if not data:
         raise gr.Error("Run a search first.")
     cas = data.get("cas_used") or data.get("query") or "unknown"
     pdf_path, json_path = build_pdf(cas, evidence=data, ai_summary=ai_text if ai_text else None)
     return pdf_path, json_path
 # -----------------------------
+# UI (light, production-like)
 # -----------------------------
+LIGHT_CSS = """
+.gradio-container { background: white !important; }
+"""
+with gr.Blocks(title="ToxRAI (HF Demo)", css=LIGHT_CSS) as demo:
     gr.Markdown("# 🧪 ToxRAI — Demo (CAS-first)")
     gr.Markdown(
         f"Public demo • AI summaries/day global cap: **{settings.max_ai_summaries_per_day}** • Cache TTL: **{settings.cache_ttl_seconds}s**"
             with gr.Accordion("PubChem (summary)", open=False):
                 pubchem_md = gr.Markdown()
             with gr.Accordion("CDC ToxProfiles", open=False):
                 cdc_md = gr.Markdown()
                     state,
                     overview_md,
                     pubchem_md,
                     ctx_md,
                     ntp_md,
+                    cdc_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
                     state,
                     overview_md,
                     pubchem_md,
                     ctx_md,
                     ntp_md,
+                    cdc_md,
                     iarc_md,
                     scholar_md,
                     fema_md,
             pdf_btn.click(fn=download_report, inputs=[state, ai_out], outputs=[pdf_file, json_file])
+if __name__ == "__main__":
+    demo.queue().launch()