| """ |
| app.py β Gradio entrypoint for the |
| Microbiome-Immunotherapy Clinical Decision Support System |
| |
| Startup sequence: |
| 1. Download / verify ChromaDB from HuggingFace (chroma_loader) |
| 2. Load MedGemma + PubMedBERT once into memory (ReportAssembler.__init__) |
| 3. Launch Gradio UI |
| |
| Usage: |
| python app.py |
| """ |
|
|
| import json |
| import logging |
| import sys |
| from pathlib import Path |
| from typing import Generator, Tuple |
|
|
| import gradio as gr |
|
|
| |
| |
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s β %(name)s β %(levelname)s β %(message)s", |
| handlers=[logging.StreamHandler(sys.stdout)], |
| ) |
| logger = logging.getLogger(__name__) |
|
|
| |
| |
| |
| logger.info("=" * 70) |
| logger.info("Microbiome-Immunotherapy CDS β starting up") |
| logger.info("=" * 70) |
|
|
| from src.chroma_loader import ensure_chroma_db |
| ensure_chroma_db() |
|
|
| |
| |
| |
| logger.info("Loading models β this may take a few minutes on first run...") |
| from src.report_assembler import ReportAssembler |
| assembler = ReportAssembler() |
| logger.info("Models loaded successfully.") |
|
|
| |
| |
| |
| INPUT_DIR = Path("data/sample_input") |
|
|
| def _discover_input_files() -> dict: |
| """ |
| Scan data/input/ for .txt and .json files. |
| Returns a dict mapping display label -> absolute path string. |
| Labels include the file type tag so the user knows what they're selecting, |
| e.g. {"patient_ehr_1 [EHR]": "/abs/.../patient_ehr_1.txt", |
| "patient_001 [JSON]": "/abs/.../patient_001.json"} |
| """ |
| files = {} |
| for ext, tag in (("*.txt", "EHR"), ("*.json", "JSON")): |
| for p in sorted(INPUT_DIR.glob(ext)): |
| label = f"{p.stem} [{tag}]" |
| files[label] = str(p.resolve()) |
| return files |
|
|
| INPUT_FILES = _discover_input_files() |
|
|
| if not INPUT_FILES: |
| logger.warning( |
| f"No input files found in {INPUT_DIR}. " |
| "Add .txt (EHR) or .json (patient schema) files there before generating reports." |
| ) |
|
|
| |
| |
| |
|
|
| def load_input_preview(label: str) -> Tuple[str, str]: |
| """ |
| Read the selected file and return (content, language) for gr.Code. |
| JSON files are pretty-printed; .txt files are returned as-is. |
| |
| Returns: |
| (content: str, language: str) β language is "json" or "text" |
| """ |
| if not label: |
| return "", "text" |
|
|
| path = INPUT_FILES.get(label) |
| if not path: |
| return f"File not found for selection: '{label}'", "text" |
|
|
| try: |
| raw = Path(path).read_text(encoding="utf-8") |
| except Exception as exc: |
| return f"Could not read file: {exc}", "text" |
|
|
| if path.endswith(".json"): |
| try: |
| pretty = json.dumps(json.loads(raw), indent=2) |
| return pretty, "json" |
| except json.JSONDecodeError: |
| return raw, "text" |
|
|
| return raw, "text" |
|
|
|
|
| |
| |
| |
|
|
| def generate_report(label: str) -> Generator[Tuple[str, str], None, None]: |
| """ |
| Gradio generator function. Routes .txt files through EHR extraction |
| and .json files directly through the patient-data loader. |
| |
| Yields: |
| Tuple of (report_markdown: str, status_message: str). |
| """ |
| if not label: |
| yield "", "β οΈ Please select a patient file." |
| return |
|
|
| path = INPUT_FILES.get(label) |
| if not path: |
| yield "", f"β οΈ File not found for selection: '{label}'" |
| return |
|
|
| logger.info(f"Report requested for: {label} β {path}") |
| is_json = path.endswith(".json") |
|
|
| |
| |
| |
| if is_json: |
| yield "", "β³ Loading structured patient JSON..." |
| try: |
| patient_data = assembler.load_patient_data(path) |
| except Exception as exc: |
| logger.error(f"JSON load failed: {exc}", exc_info=True) |
| yield "", f"β Failed to load patient JSON: {exc}" |
| return |
| else: |
| yield "", f"β³ Extracting structured data from EHR: {Path(path).stem}..." |
| try: |
| patient_data = assembler.load_patient_data_from_ehr(path) |
| except Exception as exc: |
| logger.error(f"EHR extraction failed: {exc}", exc_info=True) |
| yield "", f"β EHR extraction failed: {exc}" |
| return |
|
|
| |
| |
| |
| try: |
| for report_so_far, status in assembler.generate_full_report_streaming(patient_data): |
| yield report_so_far, status |
| except Exception as exc: |
| logger.error(f"Report generation failed: {exc}", exc_info=True) |
| yield "", f"β Report generation failed: {exc}" |
| return |
|
|
|
|
| def on_file_selected(label: str) -> Tuple[str, str, str]: |
| """ |
| Triggered when the dropdown selection changes. |
| Loads the input preview and clears the report output. |
| |
| Returns: |
| (preview_content: str, report_content: str, status_message: str) |
| """ |
| content, _ = load_input_preview(label) |
| return ( |
| content, |
| "", |
| "File loaded. Click Generate Report to begin.", |
| ) |
|
|
|
|
| |
| |
| |
|
|
| DISCLAIMER_HTML = """ |
| <div style=" |
| background: #f8f9fa; |
| border: 1px solid #dee2e6; |
| border-radius: 8px; |
| padding: 16px 20px; |
| margin-bottom: 8px; |
| font-size: 0.91em; |
| color: #343a40; |
| line-height: 1.7; |
| "> |
| <div style="margin-bottom: 10px;"> |
| <strong>About This System</strong><br> |
| This tool retrieves evidence from a curated RAG knowledge base of peer-reviewed |
| microbiomeβimmunotherapy literature, stored as a ChromaDB vector database and |
| hosted publicly on |
| <a href="https://huggingface.co/datasets/fierce74/RAG-Immunotherapy-Microbiome-CDS" target="_blank"> |
| HuggingFace Datasets</a>. |
| It accepts two input types: <strong>free-text EHR reports (.txt)</strong> β from which |
| patient data is automatically extracted by MedGemma β and |
| <strong>structured JSON files</strong> following the patient schema template, which are |
| loaded directly. All patient inputs available in this demo are |
| <strong>fully synthetic</strong> and were generated by a large language model for |
| demonstration purposes only; they do not represent real individuals or clinical cases. |
| </div> |
| <div style="margin-bottom: 10px;"> |
| <strong>Generation Time</strong><br> |
| Each of the six report sections is generated independently through a full RAG retrieval |
| and MedGemma inference cycle. Expect approximately <strong>15 minutes</strong> for a |
| complete report on GPU hardware; sections will appear on screen as they complete. |
| </div> |
| <div style=" |
| background: #fff3cd; |
| border: 1px solid #ffc107; |
| border-radius: 5px; |
| padding: 10px 14px; |
| color: #664d03; |
| "> |
| <strong>Clinical Decision Support Tool β For Healthcare Professional Use Only.</strong> |
| This system is intended as a <em>decision support aid</em> for qualified oncologists |
| and clinical teams. It does not constitute medical advice and must be interpreted |
| in conjunction with comprehensive clinical evaluation and professional judgement. |
| </div> |
| </div> |
| """ |
| TITLE_HTML = """ |
| <div style="text-align: center; padding: 10px 0 4px 0;"> |
| <h1 style="font-size: 1.5em; margin: 0; color: #1a1a2e;"> |
| 𧬠MicrobiomeβImmunotherapy Clinical Decision Support System |
| </h1> |
| <p style="color: #555; margin: 4px 0 0 0; font-size: 0.95em;"> |
| Evidence-based microbiome analytics to support cancer immunotherapy decisions |
| </p> |
| </div> |
| """ |
|
|
| with gr.Blocks( |
| title="Microbiome-Immunotherapy CDS", |
| ) as demo: |
|
|
| |
| |
| |
| gr.HTML(TITLE_HTML) |
| gr.HTML(DISCLAIMER_HTML) |
|
|
| |
| |
| |
| with gr.Row(equal_height=False): |
|
|
| |
| |
| |
| with gr.Column(scale=1, min_width=260): |
|
|
| gr.Markdown("### Patient Selection") |
|
|
| file_dropdown = gr.Dropdown( |
| choices=list(INPUT_FILES.keys()), |
| label="Select Patient File", |
| info="EHR (.txt) or structured JSON files from data/sample_input/", |
| interactive=True, |
| value=None, |
| ) |
|
|
| generate_btn = gr.Button( |
| "Generate Report", |
| variant="primary", |
| size="lg", |
| ) |
|
|
| gr.Markdown("---") |
| gr.Markdown("### Status") |
|
|
| status_box = gr.Textbox( |
| value="Select a patient file to begin.", |
| label="", |
| interactive=False, |
| lines=2, |
| max_lines=3, |
| ) |
|
|
| gr.Markdown("---") |
| gr.Markdown( |
| "<small style='color:#888;'>" |
| "**Model:** MedGemma 1.5 4B | " |
| "**RAG:** PubMedBERT + ChromaDB<br>" |
| "**Evidence base:** Peer-reviewed literature on microbiome Γ immunotherapy" |
| "</small>" |
| ) |
|
|
| |
| |
| |
| with gr.Column(scale=3): |
|
|
| with gr.Tabs(): |
|
|
| |
| with gr.Tab("π Input Preview"): |
| gr.Markdown( |
| "<small style='color:#888;'>File contents are shown here " |
| "when a patient file is selected from the dropdown.</small>" |
| ) |
| input_preview = gr.Textbox( |
| value="", |
| label="", |
| interactive=False, |
| lines=35, |
| max_lines=60, |
| ) |
|
|
| |
| with gr.Tab("π Clinical Report"): |
| gr.Markdown( |
| "<small style='color:#888;'>The report is generated section " |
| "by section β content appears as each section completes.</small>" |
| ) |
| report_output = gr.Markdown( |
| value="*The generated report will appear here.*", |
| label="", |
| height=780, |
| ) |
|
|
| |
| |
| |
|
|
| |
| file_dropdown.change( |
| fn=on_file_selected, |
| inputs=[file_dropdown], |
| outputs=[input_preview, report_output, status_box], |
| ) |
|
|
| |
| generate_btn.click( |
| fn=generate_report, |
| inputs=[file_dropdown], |
| outputs=[report_output, status_box], |
| ) |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| show_error=True, |
| theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate"), |
| ) |