Spaces:
Running
Running
Update literature_explorer.py
Browse files- literature_explorer.py +90 -46
literature_explorer.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
-
|
|
|
|
| 5 |
|
| 6 |
import gradio as gr
|
| 7 |
import numpy as np
|
|
@@ -57,7 +58,6 @@ ORGAN_HINTS: Dict[str, List[str]] = {
|
|
| 57 |
"immune_blood": ["immune", "cytok", "inflamm", "blood", "plasma", "serum", "hemat", "lymph", "macrophage"],
|
| 58 |
}
|
| 59 |
|
| 60 |
-
|
| 61 |
def infer_organ_label(doc_text: str) -> str:
|
| 62 |
t = (doc_text or "").lower()
|
| 63 |
scores = {k: 0 for k in ORGAN_HINTS.keys()}
|
|
@@ -70,7 +70,6 @@ def infer_organ_label(doc_text: str) -> str:
|
|
| 70 |
if not best or best[0][1] == 0:
|
| 71 |
return "unknown"
|
| 72 |
|
| 73 |
-
# if 2+ organs are close, label mixed
|
| 74 |
top_org, top_score = best[0]
|
| 75 |
if len(best) > 1 and best[1][1] > 0 and (top_score - best[1][1]) <= 1:
|
| 76 |
return "mixed"
|
|
@@ -93,7 +92,6 @@ ENZYMES_BY_ORGAN: Dict[str, List[str]] = {
|
|
| 93 |
"unknown": [],
|
| 94 |
}
|
| 95 |
|
| 96 |
-
# conservative regex patterns
|
| 97 |
ENZYME_REGEXES = [
|
| 98 |
re.compile(r"\bCYP\s?(\d[A-Z]?\d?[A-Z]?\d?)\b", re.IGNORECASE),
|
| 99 |
re.compile(r"\bUGT\s?(\d[A-Z0-9]+)\b", re.IGNORECASE),
|
|
@@ -115,7 +113,6 @@ def detect_enzymes(text: str, organ: str) -> List[str]:
|
|
| 115 |
if e in up:
|
| 116 |
out.append(e)
|
| 117 |
|
| 118 |
-
# regex enrich
|
| 119 |
for rx in ENZYME_REGEXES:
|
| 120 |
for m in rx.finditer(t):
|
| 121 |
g = (m.group(1) or "").upper()
|
|
@@ -141,7 +138,6 @@ def detect_enzymes(text: str, organ: str) -> List[str]:
|
|
| 141 |
x = "P-gp"
|
| 142 |
out2.append(x)
|
| 143 |
|
| 144 |
-
# dedupe
|
| 145 |
seen = set()
|
| 146 |
final = []
|
| 147 |
for x in out2:
|
|
@@ -172,40 +168,15 @@ PATHWAY_TERMS = [
|
|
| 172 |
"cytokine signaling",
|
| 173 |
]
|
| 174 |
|
| 175 |
-
PATHWAY_REGEXES = [
|
| 176 |
-
re.compile(r"\boxidative stress\b", re.IGNORECASE),
|
| 177 |
-
re.compile(r"\bNrf2\b", re.IGNORECASE),
|
| 178 |
-
re.compile(r"\bAhR\b", re.IGNORECASE),
|
| 179 |
-
re.compile(r"\bNF[-\s]?κ?B\b", re.IGNORECASE),
|
| 180 |
-
re.compile(r"\bp53\b", re.IGNORECASE),
|
| 181 |
-
re.compile(r"\bMAPK\b", re.IGNORECASE),
|
| 182 |
-
re.compile(r"\bPPAR\b", re.IGNORECASE),
|
| 183 |
-
re.compile(r"\bapoptos(?:is|e|ic)\b", re.IGNORECASE),
|
| 184 |
-
re.compile(r"\bDNA damage response\b", re.IGNORECASE),
|
| 185 |
-
re.compile(r"\bmitochondrial dysfunction\b", re.IGNORECASE),
|
| 186 |
-
re.compile(r"\bestrogen receptor\b", re.IGNORECASE),
|
| 187 |
-
re.compile(r"\bandrogen receptor\b", re.IGNORECASE),
|
| 188 |
-
re.compile(r"\binflammat(?:ion|ory)\b", re.IGNORECASE),
|
| 189 |
-
re.compile(r"\bcytokine signaling\b", re.IGNORECASE),
|
| 190 |
-
]
|
| 191 |
-
|
| 192 |
def detect_pathways(text: str) -> List[str]:
|
| 193 |
t = text or ""
|
| 194 |
-
out = []
|
| 195 |
-
for rx in PATHWAY_REGEXES:
|
| 196 |
-
if rx.search(t):
|
| 197 |
-
# map to friendly labels
|
| 198 |
-
# simplest: also do direct term scan afterwards
|
| 199 |
-
pass
|
| 200 |
tl = t.lower()
|
|
|
|
| 201 |
for term in PATHWAY_TERMS:
|
| 202 |
if term.lower() in tl:
|
| 203 |
out.append(term)
|
| 204 |
-
# ensure NF-kB catch even if κ symbol etc
|
| 205 |
if re.search(r"\bNF[-\s]?κ?B\b", t, flags=re.IGNORECASE) and "NF-kB" not in out:
|
| 206 |
out.append("NF-kB")
|
| 207 |
-
|
| 208 |
-
# dedupe preserve order
|
| 209 |
seen = set()
|
| 210 |
final = []
|
| 211 |
for x in out:
|
|
@@ -241,6 +212,13 @@ def is_text_based(pages: List[Tuple[int, str]]) -> bool:
|
|
| 241 |
joined = " ".join([clean_text(t) for _, t in pages if clean_text(t)])
|
| 242 |
return len(joined) >= 200
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
# =============================
|
| 246 |
# OpenAI helpers
|
|
@@ -280,7 +258,7 @@ def detect_endpoints(text: str) -> List[str]:
|
|
| 280 |
|
| 281 |
|
| 282 |
# =============================
|
| 283 |
-
#
|
| 284 |
# =============================
|
| 285 |
def split_sentences(text: str) -> List[str]:
|
| 286 |
t = re.sub(r"\s+", " ", (text or "")).strip()
|
|
@@ -329,10 +307,10 @@ def empty_index() -> Dict[str, Any]:
|
|
| 329 |
|
| 330 |
def build_index(files, api_key: str, embedding_model: str):
|
| 331 |
if not files:
|
| 332 |
-
return empty_index(), pd.DataFrame(), pd.DataFrame(), "Upload PDFs then click Build Search Index.", gr.update(choices=[]), gr.update(choices=[])
|
| 333 |
|
| 334 |
if len(files) > MAX_PDFS:
|
| 335 |
-
return empty_index(), pd.DataFrame(), pd.DataFrame(), f"Upload limit exceeded: max {MAX_PDFS} PDFs for pilot.", gr.update(choices=[]), gr.update(choices=[])
|
| 336 |
|
| 337 |
idx = empty_index()
|
| 338 |
papers_rows: List[Dict[str, Any]] = []
|
|
@@ -384,17 +362,18 @@ def build_index(files, api_key: str, embedding_model: str):
|
|
| 384 |
|
| 385 |
papers_df = pd.DataFrame(papers_rows, columns=["file","organ","pages_indexed","text_based"])
|
| 386 |
|
| 387 |
-
# Endpoint
|
| 388 |
-
matrix = []
|
| 389 |
endpoint_names = list(ENDPOINT_HINTS.keys())
|
|
|
|
| 390 |
for p in papers_rows:
|
| 391 |
if not p.get("text_based"):
|
| 392 |
continue
|
| 393 |
pid = p["paper_id"]
|
| 394 |
-
row = {"file": p["file"], "organ": p["organ"]}
|
| 395 |
p_pages = [r for r in page_rows if r["paper_id"] == pid]
|
|
|
|
| 396 |
for ep in endpoint_names:
|
| 397 |
-
|
|
|
|
| 398 |
matrix.append(row)
|
| 399 |
endpoint_matrix_df = pd.DataFrame(matrix) if matrix else pd.DataFrame(columns=["file","organ"] + endpoint_names)
|
| 400 |
|
|
@@ -508,6 +487,10 @@ def search(
|
|
| 508 |
pid = r["paper_id"]
|
| 509 |
org = (papers.get(pid, {}) or {}).get("organ", "unknown")
|
| 510 |
ctx = expanded_context(r.get("text", ""), query, n_sentences=5)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
rows.append({
|
| 513 |
"file": r.get("file",""),
|
|
@@ -517,20 +500,21 @@ def search(
|
|
| 517 |
"endpoints": "; ".join(r.get("endpoints") or []),
|
| 518 |
"enzymes": "; ".join((r.get("enzymes") or [])[:12]),
|
| 519 |
"pathways": "; ".join((r.get("pathways") or [])[:12]),
|
| 520 |
-
"
|
| 521 |
})
|
| 522 |
|
| 523 |
-
snippet =
|
| 524 |
evidence.append(f"- **{r.get('file','')}** (p.{r.get('page','')}): {snippet}")
|
| 525 |
|
| 526 |
-
|
|
|
|
| 527 |
evidence_md = "### Evidence used\n" + "\n".join(evidence[:8])
|
| 528 |
|
| 529 |
# grounded mini-summary
|
| 530 |
mini_summary = "(mini-summary unavailable)"
|
| 531 |
try:
|
| 532 |
client = get_client(api_key)
|
| 533 |
-
payload = [{"file": x["file"], "page": x["page"], "
|
| 534 |
|
| 535 |
system_msg = (
|
| 536 |
"You are a literature assistant for toxicology researchers. "
|
|
@@ -550,6 +534,38 @@ def search(
|
|
| 550 |
return results_df, mini_md, evidence_md
|
| 551 |
|
| 552 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
# =============================
|
| 554 |
# Tab plugin (Option A)
|
| 555 |
# =============================
|
|
@@ -558,7 +574,7 @@ def build_literature_explorer_tab():
|
|
| 558 |
"## Literature Explorer (Pilot)\n"
|
| 559 |
f"- Limits: **max {MAX_PDFS} PDFs**, **max {MAX_PAGES_PER_PDF} pages/PDF**\n"
|
| 560 |
"- Text-based PDFs only (not scanned/image PDFs).\n"
|
| 561 |
-
"-
|
| 562 |
)
|
| 563 |
|
| 564 |
idx_state = gr.State(empty_index())
|
|
@@ -573,7 +589,9 @@ def build_literature_explorer_tab():
|
|
| 573 |
build_btn = gr.Button("Build Search Index", variant="primary")
|
| 574 |
index_status = gr.Textbox(label="Index status", interactive=False)
|
| 575 |
papers_df = gr.Dataframe(label="Indexed papers", interactive=False, wrap=True)
|
| 576 |
-
|
|
|
|
|
|
|
| 577 |
|
| 578 |
with gr.Group():
|
| 579 |
gr.Markdown("### Search across indexed papers")
|
|
@@ -589,7 +607,21 @@ def build_literature_explorer_tab():
|
|
| 589 |
search_btn = gr.Button("Search", variant="secondary")
|
| 590 |
|
| 591 |
mini_summary_md = gr.Markdown()
|
| 592 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
evidence_md = gr.Markdown()
|
| 594 |
|
| 595 |
build_btn.click(
|
|
@@ -602,4 +634,16 @@ def build_literature_explorer_tab():
|
|
| 602 |
fn=search,
|
| 603 |
inputs=[query, idx_state, api_key, embedding_model, summary_model, endpoint_filter, organ_filter, enzyme_filter, pathway_filter, top_k],
|
| 604 |
outputs=[results_df, mini_summary_md, evidence_md]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
)
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
+
import textwrap
|
| 5 |
+
from typing import Any, Dict, List, Tuple
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
import numpy as np
|
|
|
|
| 58 |
"immune_blood": ["immune", "cytok", "inflamm", "blood", "plasma", "serum", "hemat", "lymph", "macrophage"],
|
| 59 |
}
|
| 60 |
|
|
|
|
| 61 |
def infer_organ_label(doc_text: str) -> str:
|
| 62 |
t = (doc_text or "").lower()
|
| 63 |
scores = {k: 0 for k in ORGAN_HINTS.keys()}
|
|
|
|
| 70 |
if not best or best[0][1] == 0:
|
| 71 |
return "unknown"
|
| 72 |
|
|
|
|
| 73 |
top_org, top_score = best[0]
|
| 74 |
if len(best) > 1 and best[1][1] > 0 and (top_score - best[1][1]) <= 1:
|
| 75 |
return "mixed"
|
|
|
|
| 92 |
"unknown": [],
|
| 93 |
}
|
| 94 |
|
|
|
|
| 95 |
ENZYME_REGEXES = [
|
| 96 |
re.compile(r"\bCYP\s?(\d[A-Z]?\d?[A-Z]?\d?)\b", re.IGNORECASE),
|
| 97 |
re.compile(r"\bUGT\s?(\d[A-Z0-9]+)\b", re.IGNORECASE),
|
|
|
|
| 113 |
if e in up:
|
| 114 |
out.append(e)
|
| 115 |
|
|
|
|
| 116 |
for rx in ENZYME_REGEXES:
|
| 117 |
for m in rx.finditer(t):
|
| 118 |
g = (m.group(1) or "").upper()
|
|
|
|
| 138 |
x = "P-gp"
|
| 139 |
out2.append(x)
|
| 140 |
|
|
|
|
| 141 |
seen = set()
|
| 142 |
final = []
|
| 143 |
for x in out2:
|
|
|
|
| 168 |
"cytokine signaling",
|
| 169 |
]
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
def detect_pathways(text: str) -> List[str]:
|
| 172 |
t = text or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
tl = t.lower()
|
| 174 |
+
out = []
|
| 175 |
for term in PATHWAY_TERMS:
|
| 176 |
if term.lower() in tl:
|
| 177 |
out.append(term)
|
|
|
|
| 178 |
if re.search(r"\bNF[-\s]?κ?B\b", t, flags=re.IGNORECASE) and "NF-kB" not in out:
|
| 179 |
out.append("NF-kB")
|
|
|
|
|
|
|
| 180 |
seen = set()
|
| 181 |
final = []
|
| 182 |
for x in out:
|
|
|
|
| 212 |
joined = " ".join([clean_text(t) for _, t in pages if clean_text(t)])
|
| 213 |
return len(joined) >= 200
|
| 214 |
|
| 215 |
+
def hard_wrap(s: str, width: int = 110) -> str:
|
| 216 |
+
s = (s or "").strip()
|
| 217 |
+
if not s:
|
| 218 |
+
return ""
|
| 219 |
+
return "\n".join(textwrap.fill(line, width=width, break_long_words=True, break_on_hyphens=True)
|
| 220 |
+
for line in s.splitlines() if line.strip())
|
| 221 |
+
|
| 222 |
|
| 223 |
# =============================
|
| 224 |
# OpenAI helpers
|
|
|
|
| 258 |
|
| 259 |
|
| 260 |
# =============================
|
| 261 |
+
# Expanded context = 3–5 sentences (PDF lines unreliable)
|
| 262 |
# =============================
|
| 263 |
def split_sentences(text: str) -> List[str]:
|
| 264 |
t = re.sub(r"\s+", " ", (text or "")).strip()
|
|
|
|
| 307 |
|
| 308 |
def build_index(files, api_key: str, embedding_model: str):
|
| 309 |
if not files:
|
| 310 |
+
return empty_index(), pd.DataFrame(), pd.DataFrame(), "Upload PDFs then click Build Search Index.", gr.update(choices=[""], value=""), gr.update(choices=[""], value="")
|
| 311 |
|
| 312 |
if len(files) > MAX_PDFS:
|
| 313 |
+
return empty_index(), pd.DataFrame(), pd.DataFrame(), f"Upload limit exceeded: max {MAX_PDFS} PDFs for pilot.", gr.update(choices=[""], value=""), gr.update(choices=[""], value="")
|
| 314 |
|
| 315 |
idx = empty_index()
|
| 316 |
papers_rows: List[Dict[str, Any]] = []
|
|
|
|
| 362 |
|
| 363 |
papers_df = pd.DataFrame(papers_rows, columns=["file","organ","pages_indexed","text_based"])
|
| 364 |
|
| 365 |
+
# �� Endpoint correlation: present/absent per paper (cleaner)
|
|
|
|
| 366 |
endpoint_names = list(ENDPOINT_HINTS.keys())
|
| 367 |
+
matrix = []
|
| 368 |
for p in papers_rows:
|
| 369 |
if not p.get("text_based"):
|
| 370 |
continue
|
| 371 |
pid = p["paper_id"]
|
|
|
|
| 372 |
p_pages = [r for r in page_rows if r["paper_id"] == pid]
|
| 373 |
+
row = {"file": p["file"], "organ": p["organ"]}
|
| 374 |
for ep in endpoint_names:
|
| 375 |
+
present = any(ep in (r.get("endpoints") or []) for r in p_pages)
|
| 376 |
+
row[ep] = "present" if present else ""
|
| 377 |
matrix.append(row)
|
| 378 |
endpoint_matrix_df = pd.DataFrame(matrix) if matrix else pd.DataFrame(columns=["file","organ"] + endpoint_names)
|
| 379 |
|
|
|
|
| 487 |
pid = r["paper_id"]
|
| 488 |
org = (papers.get(pid, {}) or {}).get("organ", "unknown")
|
| 489 |
ctx = expanded_context(r.get("text", ""), query, n_sentences=5)
|
| 490 |
+
ctx_wrapped = hard_wrap(ctx, width=110)
|
| 491 |
+
|
| 492 |
+
preview = ctx.strip()
|
| 493 |
+
preview = (preview[:220] + "…") if len(preview) > 220 else preview
|
| 494 |
|
| 495 |
rows.append({
|
| 496 |
"file": r.get("file",""),
|
|
|
|
| 500 |
"endpoints": "; ".join(r.get("endpoints") or []),
|
| 501 |
"enzymes": "; ".join((r.get("enzymes") or [])[:12]),
|
| 502 |
"pathways": "; ".join((r.get("pathways") or [])[:12]),
|
| 503 |
+
"preview": preview,
|
| 504 |
})
|
| 505 |
|
| 506 |
+
snippet = (ctx_wrapped.replace("\n", " ")[:360] + "…") if len(ctx_wrapped) > 360 else ctx_wrapped.replace("\n", " ")
|
| 507 |
evidence.append(f"- **{r.get('file','')}** (p.{r.get('page','')}): {snippet}")
|
| 508 |
|
| 509 |
+
# ✅ Compact table (no long context column)
|
| 510 |
+
results_df = pd.DataFrame(rows, columns=["file","page","score","organ","endpoints","enzymes","pathways","preview"])
|
| 511 |
evidence_md = "### Evidence used\n" + "\n".join(evidence[:8])
|
| 512 |
|
| 513 |
# grounded mini-summary
|
| 514 |
mini_summary = "(mini-summary unavailable)"
|
| 515 |
try:
|
| 516 |
client = get_client(api_key)
|
| 517 |
+
payload = [{"file": x["file"], "page": x["page"], "preview": x["preview"]} for x in rows[:8]]
|
| 518 |
|
| 519 |
system_msg = (
|
| 520 |
"You are a literature assistant for toxicology researchers. "
|
|
|
|
| 534 |
return results_df, mini_md, evidence_md
|
| 535 |
|
| 536 |
|
| 537 |
+
def on_select_result(df: pd.DataFrame, idx: dict, query: str, evt: gr.SelectData):
|
| 538 |
+
if df is None or df.empty:
|
| 539 |
+
return "", "", "", ""
|
| 540 |
+
|
| 541 |
+
# evt.index may be (row, col) or int depending on gradio version
|
| 542 |
+
row_i = evt.index[0] if isinstance(evt.index, (list, tuple)) else int(evt.index)
|
| 543 |
+
|
| 544 |
+
r = df.iloc[int(row_i)]
|
| 545 |
+
file = str(r.get("file", ""))
|
| 546 |
+
page = int(r.get("page", 0))
|
| 547 |
+
citation = f"{file} p.{page}"
|
| 548 |
+
|
| 549 |
+
rec = next((x for x in (idx.get("pages", []) or []) if x.get("file")==file and int(x.get("page",0))==page), None)
|
| 550 |
+
if not rec:
|
| 551 |
+
meta = f"**{citation}**"
|
| 552 |
+
return meta, citation, "(page text not found)", ""
|
| 553 |
+
|
| 554 |
+
ctx = expanded_context(rec.get("text",""), query, n_sentences=5)
|
| 555 |
+
ctx = hard_wrap(ctx, width=110)
|
| 556 |
+
full_txt = hard_wrap(rec.get("text",""), width=110)
|
| 557 |
+
|
| 558 |
+
meta = f"**{citation}** | organ: **{r.get('organ','')}** | score: **{r.get('score','')}**"
|
| 559 |
+
return meta, citation, ctx, full_txt
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
def citation_ready(citation: str):
|
| 563 |
+
c = (citation or "").strip()
|
| 564 |
+
if not c:
|
| 565 |
+
return "Select a result row first."
|
| 566 |
+
return f"✅ Citation ready: {c} (copy from the box above)"
|
| 567 |
+
|
| 568 |
+
|
| 569 |
# =============================
|
| 570 |
# Tab plugin (Option A)
|
| 571 |
# =============================
|
|
|
|
| 574 |
"## Literature Explorer (Pilot)\n"
|
| 575 |
f"- Limits: **max {MAX_PDFS} PDFs**, **max {MAX_PAGES_PER_PDF} pages/PDF**\n"
|
| 576 |
"- Text-based PDFs only (not scanned/image PDFs).\n"
|
| 577 |
+
"- Search is **page-level**; “3–5 lines” is approximated as **3–5 sentences**.\n"
|
| 578 |
)
|
| 579 |
|
| 580 |
idx_state = gr.State(empty_index())
|
|
|
|
| 589 |
build_btn = gr.Button("Build Search Index", variant="primary")
|
| 590 |
index_status = gr.Textbox(label="Index status", interactive=False)
|
| 591 |
papers_df = gr.Dataframe(label="Indexed papers", interactive=False, wrap=True)
|
| 592 |
+
|
| 593 |
+
# ✅ Table 2 now present/absent per paper
|
| 594 |
+
endpoint_matrix_df = gr.Dataframe(label="Endpoint correlation (present/absent per paper)", interactive=False, wrap=True)
|
| 595 |
|
| 596 |
with gr.Group():
|
| 597 |
gr.Markdown("### Search across indexed papers")
|
|
|
|
| 607 |
search_btn = gr.Button("Search", variant="secondary")
|
| 608 |
|
| 609 |
mini_summary_md = gr.Markdown()
|
| 610 |
+
|
| 611 |
+
# ✅ Table 3 compact (no long context)
|
| 612 |
+
results_df = gr.Dataframe(label="Search results (compact, page-level)", interactive=False, wrap=True)
|
| 613 |
+
|
| 614 |
+
# ✅ Selected result viewer (context moved out of table)
|
| 615 |
+
selected_meta = gr.Markdown()
|
| 616 |
+
citation_box = gr.Textbox(label="Citation (copy/paste)", interactive=False)
|
| 617 |
+
copy_btn = gr.Button("Copy citation (fills box)", variant="secondary")
|
| 618 |
+
copy_status = gr.Textbox(label="Copy status", interactive=False)
|
| 619 |
+
|
| 620 |
+
selected_context = gr.Textbox(label="Selected result context (3–5 sentences)", lines=6, interactive=False)
|
| 621 |
+
|
| 622 |
+
with gr.Accordion("Full page text (optional)", open=False):
|
| 623 |
+
full_page_text = gr.Textbox(label="Full page text", lines=14, interactive=False)
|
| 624 |
+
|
| 625 |
evidence_md = gr.Markdown()
|
| 626 |
|
| 627 |
build_btn.click(
|
|
|
|
| 634 |
fn=search,
|
| 635 |
inputs=[query, idx_state, api_key, embedding_model, summary_model, endpoint_filter, organ_filter, enzyme_filter, pathway_filter, top_k],
|
| 636 |
outputs=[results_df, mini_summary_md, evidence_md]
|
| 637 |
+
)
|
| 638 |
+
|
| 639 |
+
results_df.select(
|
| 640 |
+
fn=on_select_result,
|
| 641 |
+
inputs=[results_df, idx_state, query],
|
| 642 |
+
outputs=[selected_meta, citation_box, selected_context, full_page_text]
|
| 643 |
+
)
|
| 644 |
+
|
| 645 |
+
copy_btn.click(
|
| 646 |
+
fn=citation_ready,
|
| 647 |
+
inputs=[citation_box],
|
| 648 |
+
outputs=[copy_status]
|
| 649 |
)
|