fierce74's picture
changed app.py
162940b
"""
app.py β€” Gradio entrypoint for the
Microbiome-Immunotherapy Clinical Decision Support System
Startup sequence:
1. Download / verify ChromaDB from HuggingFace (chroma_loader)
2. Load MedGemma + PubMedBERT once into memory (ReportAssembler.__init__)
3. Launch Gradio UI
Usage:
python app.py
"""
import json
import logging
import sys
from pathlib import Path
from typing import Generator, Tuple
import gradio as gr
# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s β€” %(name)s β€” %(levelname)s β€” %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Step 1: Ensure ChromaDB is available locally before anything else
# ---------------------------------------------------------------------------
logger.info("=" * 70)
logger.info("Microbiome-Immunotherapy CDS β€” starting up")
logger.info("=" * 70)
from src.chroma_loader import ensure_chroma_db
ensure_chroma_db()
# ---------------------------------------------------------------------------
# Step 2: Load models once (expensive β€” happens here, not per request)
# ---------------------------------------------------------------------------
logger.info("Loading models β€” this may take a few minutes on first run...")
from src.report_assembler import ReportAssembler
assembler = ReportAssembler()
logger.info("Models loaded successfully.")
# ---------------------------------------------------------------------------
# Step 3: Discover input files in data/input/ (.txt and .json)
# ---------------------------------------------------------------------------
INPUT_DIR = Path("data/sample_input")
def _discover_input_files() -> dict:
"""
Scan data/input/ for .txt and .json files.
Returns a dict mapping display label -> absolute path string.
Labels include the file type tag so the user knows what they're selecting,
e.g. {"patient_ehr_1 [EHR]": "/abs/.../patient_ehr_1.txt",
"patient_001 [JSON]": "/abs/.../patient_001.json"}
"""
files = {}
for ext, tag in (("*.txt", "EHR"), ("*.json", "JSON")):
for p in sorted(INPUT_DIR.glob(ext)):
label = f"{p.stem} [{tag}]"
files[label] = str(p.resolve())
return files
INPUT_FILES = _discover_input_files()
if not INPUT_FILES:
logger.warning(
f"No input files found in {INPUT_DIR}. "
"Add .txt (EHR) or .json (patient schema) files there before generating reports."
)
# ---------------------------------------------------------------------------
# Step 4: Helper β€” load and pretty-print a file for the input preview
# ---------------------------------------------------------------------------
def load_input_preview(label: str) -> Tuple[str, str]:
"""
Read the selected file and return (content, language) for gr.Code.
JSON files are pretty-printed; .txt files are returned as-is.
Returns:
(content: str, language: str) β€” language is "json" or "text"
"""
if not label:
return "", "text"
path = INPUT_FILES.get(label)
if not path:
return f"File not found for selection: '{label}'", "text"
try:
raw = Path(path).read_text(encoding="utf-8")
except Exception as exc:
return f"Could not read file: {exc}", "text"
if path.endswith(".json"):
try:
pretty = json.dumps(json.loads(raw), indent=2)
return pretty, "json"
except json.JSONDecodeError:
return raw, "text"
return raw, "text"
# ---------------------------------------------------------------------------
# Step 5: Report generation function (generator β€” streams section by section)
# ---------------------------------------------------------------------------
def generate_report(label: str) -> Generator[Tuple[str, str], None, None]:
"""
Gradio generator function. Routes .txt files through EHR extraction
and .json files directly through the patient-data loader.
Yields:
Tuple of (report_markdown: str, status_message: str).
"""
if not label:
yield "", "⚠️ Please select a patient file."
return
path = INPUT_FILES.get(label)
if not path:
yield "", f"⚠️ File not found for selection: '{label}'"
return
logger.info(f"Report requested for: {label} β†’ {path}")
is_json = path.endswith(".json")
# ------------------------------------------------------------------
# Load patient data
# ------------------------------------------------------------------
if is_json:
yield "", "⏳ Loading structured patient JSON..."
try:
patient_data = assembler.load_patient_data(path)
except Exception as exc:
logger.error(f"JSON load failed: {exc}", exc_info=True)
yield "", f"❌ Failed to load patient JSON: {exc}"
return
else:
yield "", f"⏳ Extracting structured data from EHR: {Path(path).stem}..."
try:
patient_data = assembler.load_patient_data_from_ehr(path)
except Exception as exc:
logger.error(f"EHR extraction failed: {exc}", exc_info=True)
yield "", f"❌ EHR extraction failed: {exc}"
return
# ------------------------------------------------------------------
# Stream the report section by section
# ------------------------------------------------------------------
try:
for report_so_far, status in assembler.generate_full_report_streaming(patient_data):
yield report_so_far, status
except Exception as exc:
logger.error(f"Report generation failed: {exc}", exc_info=True)
yield "", f"❌ Report generation failed: {exc}"
return
def on_file_selected(label: str) -> Tuple[str, str, str]:
"""
Triggered when the dropdown selection changes.
Loads the input preview and clears the report output.
Returns:
(preview_content: str, report_content: str, status_message: str)
"""
content, _ = load_input_preview(label)
return (
content,
"",
"File loaded. Click Generate Report to begin.",
)
# ---------------------------------------------------------------------------
# Step 6: Build the Gradio UI
# ---------------------------------------------------------------------------
DISCLAIMER_HTML = """
<div style="
background: #f8f9fa;
border: 1px solid #dee2e6;
border-radius: 8px;
padding: 16px 20px;
margin-bottom: 8px;
font-size: 0.91em;
color: #343a40;
line-height: 1.7;
">
<div style="margin-bottom: 10px;">
<strong>About This System</strong><br>
This tool retrieves evidence from a curated RAG knowledge base of peer-reviewed
microbiome–immunotherapy literature, stored as a ChromaDB vector database and
hosted publicly on
<a href="https://huggingface.co/datasets/fierce74/RAG-Immunotherapy-Microbiome-CDS" target="_blank">
HuggingFace Datasets</a>.
It accepts two input types: <strong>free-text EHR reports (.txt)</strong> β€” from which
patient data is automatically extracted by MedGemma β€” and
<strong>structured JSON files</strong> following the patient schema template, which are
loaded directly. All patient inputs available in this demo are
<strong>fully synthetic</strong> and were generated by a large language model for
demonstration purposes only; they do not represent real individuals or clinical cases.
</div>
<div style="margin-bottom: 10px;">
<strong>Generation Time</strong><br>
Each of the six report sections is generated independently through a full RAG retrieval
and MedGemma inference cycle. Expect approximately <strong>15 minutes</strong> for a
complete report on GPU hardware; sections will appear on screen as they complete.
</div>
<div style="
background: #fff3cd;
border: 1px solid #ffc107;
border-radius: 5px;
padding: 10px 14px;
color: #664d03;
">
<strong>Clinical Decision Support Tool β€” For Healthcare Professional Use Only.</strong>
This system is intended as a <em>decision support aid</em> for qualified oncologists
and clinical teams. It does not constitute medical advice and must be interpreted
in conjunction with comprehensive clinical evaluation and professional judgement.
</div>
</div>
"""
TITLE_HTML = """
<div style="text-align: center; padding: 10px 0 4px 0;">
<h1 style="font-size: 1.5em; margin: 0; color: #1a1a2e;">
🧬 Microbiome–Immunotherapy Clinical Decision Support System
</h1>
<p style="color: #555; margin: 4px 0 0 0; font-size: 0.95em;">
Evidence-based microbiome analytics to support cancer immunotherapy decisions
</p>
</div>
"""
with gr.Blocks(
title="Microbiome-Immunotherapy CDS",
) as demo:
# -----------------------------------------------------------------------
# Header
# -----------------------------------------------------------------------
gr.HTML(TITLE_HTML)
gr.HTML(DISCLAIMER_HTML)
# -----------------------------------------------------------------------
# Main layout: left control panel | right tabbed output
# -----------------------------------------------------------------------
with gr.Row(equal_height=False):
# -------------------------------------------------------------------
# LEFT: Controls
# -------------------------------------------------------------------
with gr.Column(scale=1, min_width=260):
gr.Markdown("### Patient Selection")
file_dropdown = gr.Dropdown(
choices=list(INPUT_FILES.keys()),
label="Select Patient File",
info="EHR (.txt) or structured JSON files from data/sample_input/",
interactive=True,
value=None,
)
generate_btn = gr.Button(
"Generate Report",
variant="primary",
size="lg",
)
gr.Markdown("---")
gr.Markdown("### Status")
status_box = gr.Textbox(
value="Select a patient file to begin.",
label="",
interactive=False,
lines=2,
max_lines=3,
)
gr.Markdown("---")
gr.Markdown(
"<small style='color:#888;'>"
"**Model:** MedGemma 1.5 4B &nbsp;|&nbsp; "
"**RAG:** PubMedBERT + ChromaDB<br>"
"**Evidence base:** Peer-reviewed literature on microbiome Γ— immunotherapy"
"</small>"
)
# -------------------------------------------------------------------
# RIGHT: Tabbed panel β€” Input Preview | Clinical Report
# -------------------------------------------------------------------
with gr.Column(scale=3):
with gr.Tabs():
# ── Tab 1: Input Preview ────────────────────────────────────
with gr.Tab("πŸ“„ Input Preview"):
gr.Markdown(
"<small style='color:#888;'>File contents are shown here "
"when a patient file is selected from the dropdown.</small>"
)
input_preview = gr.Textbox(
value="",
label="",
interactive=False,
lines=35,
max_lines=60,
)
# ── Tab 2: Clinical Report ──────────────────────────────────
with gr.Tab("πŸ“‹ Clinical Report"):
gr.Markdown(
"<small style='color:#888;'>The report is generated section "
"by section β€” content appears as each section completes.</small>"
)
report_output = gr.Markdown(
value="*The generated report will appear here.*",
label="",
height=780,
)
# -----------------------------------------------------------------------
# Event wiring
# -----------------------------------------------------------------------
# Selecting a file: load preview, clear report output
file_dropdown.change(
fn=on_file_selected,
inputs=[file_dropdown],
outputs=[input_preview, report_output, status_box],
)
# Generate button: stream report section by section into the Report tab
generate_btn.click(
fn=generate_report,
inputs=[file_dropdown],
outputs=[report_output, status_box],
)
# ---------------------------------------------------------------------------
# Launch
# ---------------------------------------------------------------------------
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"),
)