| | """ |
| | Report assembler - combines sections into final markdown report |
| | """ |
| |
|
| | import json |
| | import logging |
| | from datetime import datetime |
| | from pathlib import Path |
| | from typing import Dict |
| |
|
| | from . import config |
| | from .section_generators import SectionGenerator |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | class ReportAssembler: |
| | """Assembles complete clinical report from individual sections. |
| | |
| | Supports two input modes: |
| | - JSON: load_patient_data() / generate_and_save() (existing path) |
| | - EHR: load_patient_data_from_ehr() / generate_and_save_from_ehr() (new path) |
| | """ |
| | |
| | def __init__(self): |
| | self.generator = SectionGenerator() |
| | |
| | def load_patient_data(self, json_path: str) -> Dict: |
| | """Load patient JSON data from file""" |
| | logger.info(f"Loading patient data from {json_path}") |
| | |
| | with open(json_path, 'r') as f: |
| | patient_data = json.load(f) |
| | |
| | return patient_data |
| |
|
| | def load_patient_data_from_ehr(self, ehr_path: str) -> Dict: |
| | """Extract patient JSON from a raw EHR text file using MedGemma. |
| | |
| | Args: |
| | ehr_path: Path to the plain-text EHR report. |
| | |
| | Returns: |
| | Patient data dictionary matching the pipeline schema. |
| | """ |
| | |
| | from .ehr_extractor import EHRExtractor |
| |
|
| | logger.info(f"Extracting patient data from EHR: {ehr_path}") |
| | extractor = EHRExtractor() |
| | return extractor.extract_from_file(ehr_path) |
| | |
| | def generate_full_report(self, patient_data: Dict) -> str: |
| | """ |
| | Generate complete clinical report |
| | |
| | Args: |
| | patient_data: Patient JSON dictionary |
| | |
| | Returns: |
| | Complete report as markdown string |
| | """ |
| | logger.info("Starting full report generation") |
| | |
| | report_sections = [] |
| | |
| | |
| | logger.info("Generating preamble") |
| | preamble = self.generator.generate_preamble(patient_data) |
| | report_sections.append(preamble) |
| | |
| | |
| | section_1 = self.generator.generate_section_1(patient_data) |
| | if section_1: |
| | report_sections.append(section_1) |
| | |
| | |
| | section_2 = self.generator.generate_section_2(patient_data) |
| | if section_2: |
| | report_sections.append(section_2) |
| | |
| | |
| | section_3 = self.generator.generate_section_3(patient_data) |
| | if section_3: |
| | report_sections.append(section_3) |
| | |
| | |
| | section_4 = self.generator.generate_section_4(patient_data) |
| | if section_4: |
| | report_sections.append(section_4) |
| | |
| | |
| | section_5 = self.generator.generate_section_5(patient_data) |
| | if section_5: |
| | report_sections.append(section_5) |
| | |
| | |
| | section_6 = self.generator.generate_section_6(patient_data) |
| | report_sections.append(section_6) |
| | |
| | |
| | references = self._generate_references_section() |
| | report_sections.append(references) |
| | |
| | |
| | footer = self._generate_footer() |
| | report_sections.append(footer) |
| | |
| | |
| | full_report = "\n".join(report_sections) |
| | |
| | logger.info("Report generation complete") |
| | return full_report |
| | |
| | def generate_full_report_streaming(self, patient_data: Dict): |
| | """ |
| | Generate the complete clinical report section by section, yielding the |
| | cumulative markdown string after each section completes. |
| | |
| | Designed for Gradio generator functions: each yield replaces the current |
| | content of the output gr.Markdown component, so the clinician sees the |
| | report grow in real time. |
| | |
| | Args: |
| | patient_data: Patient JSON dictionary. |
| | |
| | Yields: |
| | Tuple of (cumulative_report: str, status_message: str) after each |
| | section is appended. |
| | """ |
| | logger.info("Starting streaming report generation") |
| | accumulated = "" |
| |
|
| | |
| | |
| | |
| | logger.info("Generating preamble") |
| | preamble = self.generator.generate_preamble(patient_data) |
| | accumulated += preamble + "\n" |
| | yield accumulated, "⏳ Generating Section 1: Microbiome Composition Profile..." |
| |
|
| | |
| | |
| | |
| | logger.info("Generating section 1") |
| | section_1 = self.generator.generate_section_1(patient_data) |
| | if section_1: |
| | accumulated += section_1 + "\n" |
| | yield accumulated, "⏳ Generating Section 2: Metabolite Landscape..." |
| |
|
| | |
| | |
| | |
| | logger.info("Generating section 2") |
| | section_2 = self.generator.generate_section_2(patient_data) |
| | if section_2: |
| | accumulated += section_2 + "\n" |
| | yield accumulated, "⏳ Generating Section 3: Drug–Microbiome Interaction Outlook..." |
| |
|
| | |
| | |
| | |
| | logger.info("Generating section 3") |
| | section_3 = self.generator.generate_section_3(patient_data) |
| | if section_3: |
| | accumulated += section_3 + "\n" |
| | yield accumulated, "⏳ Generating Section 4: Confounding Factors..." |
| |
|
| | |
| | |
| | |
| | logger.info("Generating section 4") |
| | section_4 = self.generator.generate_section_4(patient_data) |
| | if section_4: |
| | accumulated += section_4 + "\n" |
| | yield accumulated, "⏳ Generating Section 5: Intervention Considerations..." |
| |
|
| | |
| | |
| | |
| | logger.info("Generating section 5") |
| | section_5 = self.generator.generate_section_5(patient_data) |
| | if section_5: |
| | accumulated += section_5 + "\n" |
| | yield accumulated, "⏳ Generating Section 6: Data Quality & Limitations..." |
| |
|
| | |
| | |
| | |
| | logger.info("Generating section 6") |
| | section_6 = self.generator.generate_section_6(patient_data) |
| | accumulated += section_6 + "\n" |
| | yield accumulated, "⏳ Compiling references and finalising report..." |
| |
|
| | |
| | |
| | |
| | logger.info("Generating references and footer") |
| | references = self._generate_references_section() |
| | footer = self._generate_footer() |
| | accumulated += references + footer |
| |
|
| | logger.info("Streaming report generation complete") |
| | yield accumulated, "✅ Report complete" |
| |
|
| |
|
| | |
| | def _generate_references_section(self) -> str: |
| | """Generate references section from all citations and titles used""" |
| | |
| | references_data = self.generator.get_all_citations() |
| | |
| | if not references_data: |
| | return "" |
| | |
| | references = "## References\n\n" |
| | references += "The following peer-reviewed publications were cited in this report:\n\n" |
| | |
| | for i, (citation, title) in enumerate(references_data, 1): |
| | if title and title != citation: |
| | references += f"{i}. {citation}: {title}\n" |
| | else: |
| | references += f"{i}. {citation}\n" |
| | |
| | references += "\n" |
| | return references |
| | |
| | def _generate_footer(self) -> str: |
| | """Generate report footer with metadata""" |
| | timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
| | |
| | footer = f"""--- |
| | |
| | **Report Generated:** {timestamp} |
| | **Model:** MedGemma 1.5 4B |
| | **System:** Microbiome-Immunotherapy Clinical Decision Support v1.0 |
| | |
| | *This report is intended for use by qualified healthcare professionals as a clinical decision support tool. It does not constitute medical advice and should be interpreted in conjunction with comprehensive clinical evaluation.* |
| | """ |
| | return footer |
| | |
| | def save_report(self, report: str, patient_id: str, output_dir: str = None) -> str: |
| | """ |
| | Save report to markdown file |
| | |
| | Args: |
| | report: Complete report markdown string |
| | patient_id: Patient identifier for filename |
| | output_dir: Output directory (uses config default if not provided) |
| | |
| | Returns: |
| | Path to saved report file |
| | """ |
| | if output_dir is None: |
| | output_dir = config.OUTPUT_DIR |
| | |
| | |
| | output_path = Path(output_dir) |
| | output_path.mkdir(parents=True, exist_ok=True) |
| | |
| | |
| | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
| | filename = f"microbiome_ici_report_{patient_id}_{timestamp}.md" |
| | filepath = output_path / filename |
| | |
| | |
| | with open(filepath, 'w') as f: |
| | f.write(report) |
| | |
| | logger.info(f"Report saved to: {filepath}") |
| | return str(filepath) |
| | |
| | def generate_and_save(self, patient_json_path: str, output_dir: str = None) -> str: |
| | """ |
| | Complete workflow: load data, generate report, save to file |
| | |
| | Args: |
| | patient_json_path: Path to patient JSON file |
| | output_dir: Optional output directory override |
| | |
| | Returns: |
| | Path to saved report file |
| | """ |
| | |
| | patient_data = self.load_patient_data(patient_json_path) |
| | patient_id = patient_data["patient"]["id"] |
| | |
| | |
| | report = self.generate_full_report(patient_data) |
| | |
| | |
| | output_path = self.save_report(report, patient_id, output_dir) |
| | |
| | return output_path |
| |
|
| | def generate_and_save_from_ehr( |
| | self, |
| | ehr_path: str, |
| | output_dir: str = None, |
| | save_json_path: str = None, |
| | ) -> str: |
| | """ |
| | Complete EHR workflow: extract JSON from EHR, generate report, save to file. |
| | |
| | Args: |
| | ehr_path: Path to the plain-text EHR report. |
| | output_dir: Optional output directory override. |
| | save_json_path: If provided, save the extracted patient JSON to this path |
| | so it can be inspected or reused without re-running extraction. |
| | |
| | Returns: |
| | Path to the saved report markdown file. |
| | """ |
| | |
| | patient_data = self.load_patient_data_from_ehr(ehr_path) |
| | patient_id = patient_data["patient"]["id"] |
| |
|
| | |
| | if save_json_path: |
| | import json as _json |
| | from pathlib import Path as _Path |
| | _Path(save_json_path).parent.mkdir(parents=True, exist_ok=True) |
| | with open(save_json_path, "w", encoding="utf-8") as f: |
| | _json.dump(patient_data, f, indent=2) |
| | logger.info(f"Extracted patient JSON saved to: {save_json_path}") |
| |
|
| | |
| | report = self.generate_full_report(patient_data) |
| |
|
| | |
| | output_path = self.save_report(report, patient_id, output_dir) |
| |
|
| | return output_path |
| |
|