File size: 19,364 Bytes
05faca4
3d936e9
05faca4
f3b34ab
05faca4
f3b34ab
870568f
05faca4
870568f
05faca4
f3b34ab
e1f40f0
05faca4
e1f40f0
05faca4
 
 
 
 
3d936e9
 
05faca4
f3b34ab
 
 
870568f
05faca4
 
0dacfa6
05faca4
0dacfa6
e1f40f0
f3b34ab
 
05faca4
 
f3b34ab
 
e1f40f0
05faca4
e1f40f0
f3b34ab
05faca4
 
 
3d936e9
05faca4
 
3d936e9
f3b34ab
05faca4
 
 
 
 
 
 
 
 
f3b34ab
 
05faca4
 
 
 
f3b34ab
05faca4
 
 
f3b34ab
05faca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d936e9
05faca4
 
 
 
 
 
 
 
 
 
f3b34ab
e1f40f0
 
 
05faca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1f40f0
f3b34ab
 
 
05faca4
 
 
 
f3b34ab
05faca4
 
 
f3b34ab
e1f40f0
05faca4
 
 
 
e1f40f0
05faca4
e1f40f0
05faca4
e1f40f0
f3b34ab
 
05faca4
 
e1f40f0
05faca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d59d94c
 
744e7d9
ee87da6
744e7d9
 
82e1938
ee87da6
d59d94c
 
 
 
744e7d9
 
 
 
ee87da6
 
 
e1f40f0
 
05faca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3b34ab
 
 
05faca4
f3b34ab
05faca4
 
870568f
 
 
 
 
 
e1f40f0
870568f
 
05faca4
870568f
 
 
05faca4
f3b34ab
870568f
05faca4
744e7d9
 
 
870568f
 
 
 
05faca4
870568f
 
 
 
 
 
e1f40f0
d59d94c
870568f
 
05faca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1f40f0
870568f
 
 
05faca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9577056
 
05faca4
9577056
f3b34ab
 
 
e1f40f0
 
 
 
05faca4
 
 
 
 
 
 
 
 
 
e1f40f0
05faca4
 
870568f
05faca4
9577056
 
05faca4
9577056
 
 
 
 
e1f40f0
 
 
 
 
 
05faca4
870568f
 
 
 
 
 
05faca4
870568f
 
 
 
 
 
 
 
 
05faca4
 
e1f40f0
870568f
5a82b9c
870568f
 
 
 
 
 
 
 
 
05faca4
870568f
 
 
 
 
e1f40f0
05faca4
e1f40f0
f3b34ab
05faca4
870568f
 
 
 
 
a38858a
 
 
 
 
 
 
 
 
 
 
f3b34ab
a38858a
f3b34ab
a38858a
 
f3b34ab
05faca4
e1f40f0
 
 
a38858a
 
f3b34ab
a38858a
 
f3b34ab
a38858a
 
f3b34ab
e1f40f0
a38858a
f3b34ab
a38858a
 
f3b34ab
a38858a
 
 
 
 
 
 
f3b34ab
a38858a
 
 
f3b34ab
a38858a
f3b34ab
a38858a
 
 
9577056
f3b34ab
 
 
 
 
 
 
05faca4
f3b34ab
 
 
 
 
 
 
 
 
 
 
 
 
 
e1f40f0
f3b34ab
 
 
 
 
 
 
05faca4
f3b34ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05faca4
 
 
3d936e9
05faca4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
import asyncio
import json
import os
import time
from typing import Any, Dict, Optional

import gradio as gr
import httpx

from core.config import settings
from core.rate_limit import check_and_increment_global_ai_cap
from core.pdf_report import build_pdf
from core.sources import pubchem, ntp, ctx as ctx_src, iarc, scholar, fema

# Optional: CDC module may exist in your repo (user added).
try:
    from core.sources import cdc
except Exception:
    cdc = None  # type: ignore

# -----------------------------
# Caches (simple in-memory)
# -----------------------------
SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
AI_CACHE: Dict[str, str] = {}


def json_pretty(obj: Any) -> str:
    try:
        return json.dumps(obj, indent=2, ensure_ascii=False, default=str)
    except Exception:
        return str(obj)


def client() -> httpx.AsyncClient:
    return httpx.AsyncClient(headers={"user-agent": "toxrai-hf-demo"})


# -----------------------------
# Rendering helpers (Markdown)
# -----------------------------

def render_overview(data: Dict[str, Any]) -> str:
    q = data.get("query") or ""
    cas = data.get("cas_used") or ""
    lines = [
        f"**Query:** `{q}`",
        f"**CAS used:** `{cas}`",
    ]

    # Add quick IDs when available
    pub = data.get("pubchem") or {}
    if pub.get("ok") and pub.get("cid"):
        lines.append(f"**PubChem CID:** `{pub.get('cid')}`")
    ctx = data.get("ctx_genetox") or {}
    if ctx.get("ok") and ctx.get("dtxsid"):
        lines.append(f"**EPA CompTox DTXSID:** `{ctx.get('dtxsid')}`")

    return "\n\n".join(lines)


def render_pubchem_summary(pub: Dict[str, Any]) -> str:
    if not pub or not pub.get("ok"):
        err = pub.get("error") if isinstance(pub, dict) else "Unknown PubChem error"
        return f"PubChem unavailable: {err}"

    cid = pub.get("cid")
    resolved_cas = pub.get("resolved_cas") or "-"
    props = pub.get("props") or {}

    iupac_name = props.get("IUPACName") or props.get("iupac_name") or "-"
    formula = props.get("MolecularFormula") or "-"
    mw = props.get("MolecularWeight")
    mw_str = f"{mw}" if mw not in (None, "") else "-"
    smiles = props.get("CanonicalSMILES") or "-"

    lines = []
    lines.append(f"**CID:** `{cid}`")
    lines.append(f"**Resolved CAS (from synonyms):** `{resolved_cas}`")
    lines.append(f"**IUPAC/Title:** {iupac_name}")
    lines.append("")
    lines.append(f"**Molecular Formula:** `{formula}`")
    lines.append(f"**Molecular Weight:** `{mw_str}`")
    lines.append(f"**Canonical SMILES:** `{smiles}`")

    structure_png = pub.get("structure_png")
    if structure_png:
        lines.append("")
        lines.append("**Structure**")
        lines.append(f"![]({structure_png})")

    url = pub.get("url")
    if url:
        lines.append("")
        lines.append(f"[Open PubChem]({url})")

    hazards = pub.get("hazards") or []
    if hazards:
        lines.append("")
        lines.append("### Safety / Hazard Information")
        # Render as paragraphs (avoids weird wrapping from bullet nesting)
        for h in hazards:
            name = (h or {}).get("name") or "Hazard"
            text = (h or {}).get("text") or ""
            if not text:
                continue
            lines.append(f"**{name}:** {text}")
            lines.append("")

    return "\n".join(lines).rstrip() + "\n"


def render_ctx_summary(ctx: Dict[str, Any]) -> str:
    if not ctx or not ctx.get("ok"):
        search_url = ctx.get("dashboard_search") if isinstance(ctx, dict) else None
        err = ctx.get("error") if isinstance(ctx, dict) else "Unknown CTX error"
        if search_url:
            return f"{err}\n\n[Open CompTox Dashboard search]({search_url})"
        return str(err)

    dtxsid = ctx.get("dtxsid")
    dash = ctx.get("dashboard_url")
    summary = ctx.get("summary")

    lines = []
    if dtxsid:
        lines.append(f"**DTXSID:** `{dtxsid}`")
    if dash:
        lines.append(f"[Open CompTox Dashboard]({dash})")

    # Try to surface key fields (if present) without dumping huge JSON
    if isinstance(summary, dict):
        interesting_keys = [
            "geneTox",
            "genetox",
            "overall",
            "summary",
            "conclusion",
            "call",
            "result",
            "assessment",
        ]
        picked = {}
        for k in summary.keys():
            lk = k.lower()
            if any(tok in lk for tok in interesting_keys):
                picked[k] = summary[k]
        if not picked:
            # fallback: first few keys
            for k in list(summary.keys())[:8]:
                picked[k] = summary[k]

        lines.append("")
        lines.append("```json")
        txt = json_pretty(picked)
        # Keep it readable in UI
        if len(txt) > 6000:
            txt = txt[:6000] + "\n... (truncated)"
        lines.append(txt)
        lines.append("```")

    return "\n".join(lines)


def render_ntp_summary(ntp_res: Dict[str, Any]) -> str:
    if not ntp_res or not ntp_res.get("ok"):
        err = ntp_res.get("error") if isinstance(ntp_res, dict) else "Unknown NTP error"
        return f"NTP Technical Reports unavailable: {err}"

    items = ntp_res.get("items") or []
    if not items:
        return "No NTP Technical Reports found for this CAS."  # CAS-filtered

    lines = []
    for it in items:
        num = it.get("tr") or it.get("num") or ""
        title = it.get("title") or "Report"
        url = it.get("report_page") or it.get("url") or ""
        if url:
            lines.append(f"- **TR-{num}**  [{title}]({url})")
        else:
            lines.append(f"- **TR-{num}**  {title}")
    return "\n".join(lines)


def render_iarc_block(iarc_res: Dict[str, Any]) -> str:
    if not iarc_res or not iarc_res.get("ok"):
        return "IARC link unavailable."
    url = iarc_res.get("url")
    if url:
        return f"[Search IARC Monographs (NCBI Bookshelf)]({url})"

    results = iarc_res.get("results") if isinstance(iarc_res, dict) else None
    if isinstance(results, list) and results:
        lines = []
        for it in results:
            if not isinstance(it, dict):
                continue
            title = it.get("title") or "IARC Monographs"
            link = it.get("url")
            year = it.get("year")
            suffix = f" ({year})" if year else ""
            if link:
                lines.append(f"- [{title}]({link}){suffix}")
            else:
                lines.append(f"- {title}{suffix}")
        return "\n".join(lines) if lines else "IARC link unavailable."

    return "IARC link unavailable."


def render_scholar_block(sch_res: Dict[str, Any]) -> str:
    if not sch_res or not sch_res.get("ok"):
        return "Google Scholar link unavailable."
    url = sch_res.get("url")
    return f"[Open Google Scholar search]({url})" if url else "Google Scholar link unavailable."


def render_fema_block(fema_res: Dict[str, Any]) -> str:
    if not fema_res or not fema_res.get("ok"):
        err = fema_res.get("error") if isinstance(fema_res, dict) else "FEMA link unavailable."
        return str(err)
    cas_url = fema_res.get("cas_url")
    name_url = fema_res.get("name_url")
    combo_url = fema_res.get("combo_url")
    alt = fema_res.get("alt_url")
    search_api = fema_res.get("search_api_url")
    if not cas_url and not name_url and not combo_url and not alt and not search_api:
        return "FEMA link unavailable."
    lines = ["A FEMA risk assessment for this chemical is available:"]
    if cas_url:
        lines.append(f"- [Search by CAS]({cas_url})")
    if name_url:
        lines.append(f"- [Search by Chemical Name]({name_url})")
    if combo_url:
        lines.append(f"- [Search by CAS + Name]({combo_url})")
    if search_api:
        lines.append(f"- [Generic FEMA search (alt)]({search_api})")
    if alt:
        lines.append(f"- [Generic FEMA search]({alt})")
    return "\n".join(lines)


def render_cdc_block(cdc_res: Any) -> str:
    if not cdc_res:
        return "No CDC ToxProfiles match."
    # Accept either dict or list
    if isinstance(cdc_res, dict):
        url = cdc_res.get("url")
        name = cdc_res.get("name") or "CDC ToxProfile"
        return f"[{name}]({url})" if url else name
    if isinstance(cdc_res, list):
        lines = []
        for it in cdc_res:
            if not isinstance(it, dict):
                continue
            name = it.get("name") or "CDC ToxProfile"
            url = it.get("url")
            lines.append(f"- [{name}]({url})" if url else f"- {name}")
        return "\n".join(lines) if lines else "No CDC ToxProfiles match."
    return str(cdc_res)


# -----------------------------
# Search + AI
# -----------------------------

async def run_search(query: str) -> Dict[str, Any]:
    q = (query or "").strip()
    if not q:
        raise gr.Error("Enter a CAS number (preferred) or chemical name.")

    cache_key = f"search::{q.lower()}"
    if cache_key in SEARCH_CACHE:
        return SEARCH_CACHE[cache_key]

    async with client() as http:
        # PubChem accepts names and CAS. We also use it to resolve CAS via synonyms.
        pub = await pubchem.pubchem_by_query(q, http)

        cas = q
        if not pubchem.is_cas(cas):
            cas = pub.get("resolved_cas") or q

        # CTX is CAS-first (but we allow name too; resolver will try both)
        pub_dtxsid = pub.get("dtxsid") if isinstance(pub, dict) else None
        ctx_query = pub_dtxsid or q
        ctx_task = ctx_src.fetch_ctx_genetox(ctx_query, http) if ctx_query else asyncio.sleep(0, result={"ok": False})
        ntp_task = ntp.search_technical_reports(cas, http, limit=8)

        ctx_res, ntp_res = await asyncio.gather(ctx_task, ntp_task)

    out: Dict[str, Any] = {
        "query": q,
        "cas_used": cas,
        "pubchem": pub,
        "ctx_genetox": ctx_res,
        "ntp_technical_reports": ntp_res,
        "iarc_monographs": iarc.bookshelf_link(cas),
        "google_scholar": {"ok": True, "url": scholar.scholar_link(cas)},
        "fema": fema.fema_link(cas if pubchem.is_cas(cas) else "", q),
    }

    # CDC toxprofiles (if module exists)
    if cdc is not None:
        try:
            # Try a few common function names (depending on how you implemented cdc.py)
            if hasattr(cdc, "lookup"):
                out["cdc_toxprofiles"] = cdc.lookup(cas)
            elif hasattr(cdc, "search"):
                out["cdc_toxprofiles"] = cdc.search(cas)
            elif hasattr(cdc, "toxprofile_for"):
                out["cdc_toxprofiles"] = cdc.toxprofile_for(cas)
            else:
                out["cdc_toxprofiles"] = None
        except Exception:
            out["cdc_toxprofiles"] = None

    SEARCH_CACHE[cache_key] = out
    return out


def _prune_for_prompt(obj: Any, max_chars: int) -> str:
    txt = json_pretty(obj)
    if len(txt) <= max_chars:
        return txt
    return txt[:max_chars] + "\n... (truncated)"


def build_prompt(data: Dict[str, Any]) -> str:
    """Build a prompt that will not exceed model context.

    Key change vs earlier version: DO NOT dump full raw JSON from all sources.
    """

    pub = data.get("pubchem") or {}
    props = (pub.get("props") or {}) if isinstance(pub, dict) else {}
    hazards = (pub.get("hazards") or []) if isinstance(pub, dict) else []

    prompt_obj = {
        "query": data.get("query"),
        "cas_used": data.get("cas_used"),
        "pubchem": {
            "cid": pub.get("cid"),
            "resolved_cas": pub.get("resolved_cas"),
            "iupac": props.get("IUPACName") or props.get("iupac_name"),
            "formula": props.get("MolecularFormula"),
            "molecular_weight": props.get("MolecularWeight"),
            "canonical_smiles": props.get("CanonicalSMILES"),
            "hazards": hazards[:10],
        },
        "ctx_genetox": {
            "ok": (data.get("ctx_genetox") or {}).get("ok"),
            "dtxsid": (data.get("ctx_genetox") or {}).get("dtxsid"),
            "summary": (data.get("ctx_genetox") or {}).get("summary"),
        },
        "ntp_technical_reports": (data.get("ntp_technical_reports") or {}).get("items", []),
        "cdc_toxprofiles": data.get("cdc_toxprofiles"),
    }

    body = _prune_for_prompt(prompt_obj, max_chars=12000)

    return (
        "You are a toxicology regulatory assistant. "
        "Using ONLY the evidence JSON below, write a concise weight-of-evidence summary focused on mutagenicity/genotoxicity. "
        "If evidence is conflicting or absent, say so explicitly. "
        "Cite which source each statement comes from (PubChem hazards, CTX genetox summary, NTP TR titles, CDC ToxProfiles).\n\n"
        "EVIDENCE_JSON:\n"
        + body
    )


def do_search(query: str):
    data = asyncio.run(run_search(query))

    overview_md_text = render_overview(data)
    pubchem_md_text = render_pubchem_summary(data.get("pubchem", {}))
    ctx_md_text = render_ctx_summary(data.get("ctx_genetox", {}))
    ntp_md_text = render_ntp_summary(data.get("ntp_technical_reports", {}))
    iarc_md_text = render_iarc_block(data.get("iarc_monographs", {}))
    scholar_md_text = render_scholar_block(data.get("google_scholar", {}))
    fema_md_text = render_fema_block(data.get("fema", {}))

    cdc_md_text = ""
    if "cdc_toxprofiles" in data:
        cdc_md_text = render_cdc_block(data.get("cdc_toxprofiles"))

    raw_pubchem_json = json_pretty(data.get("pubchem", {}))
    raw_ctx_json = json_pretty(data.get("ctx_genetox", {}))
    raw_ntp_json = json_pretty(data.get("ntp_technical_reports", {}))
    raw_iarc_json = json_pretty(data.get("iarc_monographs", {}))
    raw_scholar_json = json_pretty(data.get("google_scholar", {}))
    raw_fema_json = json_pretty(data.get("fema", {}))

    # IMPORTANT: return order must match `outputs=[...]`
    # If CDC accordion exists, include it right after PubChem.
    return (
        data,  # state
        overview_md_text,
        pubchem_md_text,
        cdc_md_text,
        ctx_md_text,
        ntp_md_text,
        iarc_md_text,
        scholar_md_text,
        fema_md_text,
        raw_pubchem_json,
        raw_ctx_json,
        raw_ntp_json,
        raw_iarc_json,
        raw_scholar_json,
        raw_fema_json,
        "",  # ai_out (blank after search)
    )


def generate_ai(data: dict):
    if not data:
        raise gr.Error("Run a search first.")

    cas = data.get("cas_used") or data.get("query") or ""
    cache_key = f"ai::{cas}"
    if cache_key in AI_CACHE:
        return AI_CACHE[cache_key]

    allowed, info = check_and_increment_global_ai_cap()
    if not allowed:
        return f"AI Summary capacity reached for today (limit {info.get('limit')}). Please try again tomorrow."

    from core.sources.ai_summary import generate_ai_summary  # local import avoids cold-start issues

    resp = generate_ai_summary(build_prompt(data))
    if not resp.get("ok"):
        return f"**AI summary unavailable:** {resp.get('error')}"

    text = resp.get("text") or ""
    AI_CACHE[cache_key] = text
    return text


def download_report(data: dict, ai_text: str):
    if not data:
        raise gr.Error("Run a search first.")

    cas = data.get("cas_used") or data.get("query") or "unknown"
    pdf_path, json_path = build_pdf(cas, evidence=data, ai_summary=ai_text if ai_text else None)
    return pdf_path, json_path


# -----------------------------
# UI
# -----------------------------

with gr.Blocks(title="ToxRAI (HF Demo)") as demo:
    gr.Markdown("# 🧪 ToxRAI — Demo (CAS-first)")
    gr.Markdown(
        f"Public demo • AI summaries/day global cap: **{settings.max_ai_summaries_per_day}** • Cache TTL: **{settings.cache_ttl_seconds}s**"
    )

    with gr.Tabs():
        with gr.Tab("Search"):
            state = gr.State(None)

            with gr.Row():
                query_in = gr.Textbox(
                    label="CAS (preferred) or Chemical name",
                    placeholder="e.g., 80-05-7 or bisphenol A",
                    scale=4,
                )
                search_btn = gr.Button("Search", variant="primary", scale=1)

            overview_md = gr.Markdown()

            with gr.Accordion("PubChem (summary)", open=False):
                pubchem_md = gr.Markdown()

            # CDC accordion (optional)
            with gr.Accordion("CDC ToxProfiles", open=False):
                cdc_md = gr.Markdown()

            with gr.Accordion("EPA CompTox (CTX) — Genetox (full fields)", open=False):
                ctx_md = gr.Markdown()

            with gr.Accordion("NTP Technical Reports", open=False):
                ntp_md = gr.Markdown()

            with gr.Accordion("IARC Monographs", open=False):
                iarc_md = gr.Markdown()

            with gr.Accordion("Google Scholar", open=False):
                scholar_md = gr.Markdown()

            with gr.Accordion("FEMA Risk Assessment", open=False):
                fema_md = gr.Markdown()

            with gr.Accordion("Raw outputs (all sources)", open=False):
                raw_pubchem = gr.Code(label="PubChem (raw)", language="json")
                raw_ctx = gr.Code(label="CTX Genetox (raw)", language="json")
                raw_ntp = gr.Code(label="NTP TR (raw)", language="json")
                raw_iarc = gr.Code(label="IARC (raw)", language="json")
                raw_scholar = gr.Code(label="Scholar link (raw)", language="json")
                raw_fema = gr.Code(label="FEMA (raw)", language="json")

            with gr.Row():
                ai_btn = gr.Button("Generate AI Summary (GPT-4o)", variant="secondary")
                pdf_btn = gr.Button("Build PDF + JSON")

            ai_out = gr.Markdown()

            with gr.Row():
                pdf_file = gr.File(label="Download PDF")
                json_file = gr.File(label="Download JSON evidence packet")

            search_btn.click(
                fn=do_search,
                inputs=[query_in],
                outputs=[
                    state,
                    overview_md,
                    pubchem_md,
                    cdc_md,
                    ctx_md,
                    ntp_md,
                    iarc_md,
                    scholar_md,
                    fema_md,
                    raw_pubchem,
                    raw_ctx,
                    raw_ntp,
                    raw_iarc,
                    raw_scholar,
                    raw_fema,
                    ai_out,
                ],
            )

            query_in.submit(
                fn=do_search,
                inputs=[query_in],
                outputs=[
                    state,
                    overview_md,
                    pubchem_md,
                    cdc_md,
                    ctx_md,
                    ntp_md,
                    iarc_md,
                    scholar_md,
                    fema_md,
                    raw_pubchem,
                    raw_ctx,
                    raw_ntp,
                    raw_iarc,
                    raw_scholar,
                    raw_fema,
                    ai_out,
                ],
            )

            ai_btn.click(fn=generate_ai, inputs=[state], outputs=[ai_out])
            pdf_btn.click(fn=download_report, inputs=[state, ai_out], outputs=[pdf_file, json_file])


demo.queue(default_concurrency_limit=6)
app = demo

if __name__ == "__main__":
    demo.launch()