""" Report assembler - combines sections into final markdown report """ import json import logging from datetime import datetime from pathlib import Path from typing import Dict from . import config from .section_generators import SectionGenerator logger = logging.getLogger(__name__) class ReportAssembler: """Assembles complete clinical report from individual sections. Supports two input modes: - JSON: load_patient_data() / generate_and_save() (existing path) - EHR: load_patient_data_from_ehr() / generate_and_save_from_ehr() (new path) """ def __init__(self): self.generator = SectionGenerator() def load_patient_data(self, json_path: str) -> Dict: """Load patient JSON data from file""" logger.info(f"Loading patient data from {json_path}") with open(json_path, 'r') as f: patient_data = json.load(f) return patient_data def load_patient_data_from_ehr(self, ehr_path: str) -> Dict: """Extract patient JSON from a raw EHR text file using MedGemma. Args: ehr_path: Path to the plain-text EHR report. Returns: Patient data dictionary matching the pipeline schema. """ # Imported here so the JSON-only code path has zero extra import cost from .ehr_extractor import EHRExtractor logger.info(f"Extracting patient data from EHR: {ehr_path}") extractor = EHRExtractor() return extractor.extract_from_file(ehr_path) def generate_full_report(self, patient_data: Dict) -> str: """ Generate complete clinical report Args: patient_data: Patient JSON dictionary Returns: Complete report as markdown string """ logger.info("Starting full report generation") report_sections = [] # Section 0: Preamble (always included, not LLM-generated) logger.info("Generating preamble") preamble = self.generator.generate_preamble(patient_data) report_sections.append(preamble) # Section 1: Microbiome Composition Profile section_1 = self.generator.generate_section_1(patient_data) if section_1: report_sections.append(section_1) # Section 2: Metabolite Landscape section_2 = self.generator.generate_section_2(patient_data) if section_2: report_sections.append(section_2) # Section 3: Drug-Microbiome Interaction Outlook section_3 = self.generator.generate_section_3(patient_data) if section_3: report_sections.append(section_3) # Section 4: Confounding Factors section_4 = self.generator.generate_section_4(patient_data) if section_4: report_sections.append(section_4) # Section 5: Intervention Considerations section_5 = self.generator.generate_section_5(patient_data) if section_5: report_sections.append(section_5) # Section 6: Data Quality & Limitations (always included) section_6 = self.generator.generate_section_6(patient_data) report_sections.append(section_6) # References section references = self._generate_references_section() report_sections.append(references) # Footer footer = self._generate_footer() report_sections.append(footer) # Combine all sections full_report = "\n".join(report_sections) logger.info("Report generation complete") return full_report def generate_full_report_streaming(self, patient_data: Dict): """ Generate the complete clinical report section by section, yielding the cumulative markdown string after each section completes. Designed for Gradio generator functions: each yield replaces the current content of the output gr.Markdown component, so the clinician sees the report grow in real time. Args: patient_data: Patient JSON dictionary. Yields: Tuple of (cumulative_report: str, status_message: str) after each section is appended. """ logger.info("Starting streaming report generation") accumulated = "" # ------------------------------------------------------------------ # Section 0: Preamble (no LLM — instant) # ------------------------------------------------------------------ logger.info("Generating preamble") preamble = self.generator.generate_preamble(patient_data) accumulated += preamble + "\n" yield accumulated, "⏳ Generating Section 1: Microbiome Composition Profile..." # ------------------------------------------------------------------ # Section 1: Microbiome Composition Profile # ------------------------------------------------------------------ logger.info("Generating section 1") section_1 = self.generator.generate_section_1(patient_data) if section_1: accumulated += section_1 + "\n" yield accumulated, "⏳ Generating Section 2: Metabolite Landscape..." # ------------------------------------------------------------------ # Section 2: Metabolite Landscape # ------------------------------------------------------------------ logger.info("Generating section 2") section_2 = self.generator.generate_section_2(patient_data) if section_2: accumulated += section_2 + "\n" yield accumulated, "⏳ Generating Section 3: Drug–Microbiome Interaction Outlook..." # ------------------------------------------------------------------ # Section 3: Drug–Microbiome Interaction Outlook # ------------------------------------------------------------------ logger.info("Generating section 3") section_3 = self.generator.generate_section_3(patient_data) if section_3: accumulated += section_3 + "\n" yield accumulated, "⏳ Generating Section 4: Confounding Factors..." # ------------------------------------------------------------------ # Section 4: Confounding Factors # ------------------------------------------------------------------ logger.info("Generating section 4") section_4 = self.generator.generate_section_4(patient_data) if section_4: accumulated += section_4 + "\n" yield accumulated, "⏳ Generating Section 5: Intervention Considerations..." # ------------------------------------------------------------------ # Section 5: Intervention Considerations # ------------------------------------------------------------------ logger.info("Generating section 5") section_5 = self.generator.generate_section_5(patient_data) if section_5: accumulated += section_5 + "\n" yield accumulated, "⏳ Generating Section 6: Data Quality & Limitations..." # ------------------------------------------------------------------ # Section 6: Data Quality & Limitations (always included) # ------------------------------------------------------------------ logger.info("Generating section 6") section_6 = self.generator.generate_section_6(patient_data) accumulated += section_6 + "\n" yield accumulated, "⏳ Compiling references and finalising report..." # ------------------------------------------------------------------ # References + Footer (no LLM — instant) # ------------------------------------------------------------------ logger.info("Generating references and footer") references = self._generate_references_section() footer = self._generate_footer() accumulated += references + footer logger.info("Streaming report generation complete") yield accumulated, "✅ Report complete" def _generate_references_section(self) -> str: """Generate references section from all citations and titles used""" # get_all_citations now returns List[tuple] i.e. [(citation, title), ...] references_data = self.generator.get_all_citations() if not references_data: return "" references = "## References\n\n" references += "The following peer-reviewed publications were cited in this report:\n\n" for i, (citation, title) in enumerate(references_data, 1): if title and title != citation: references += f"{i}. {citation}: {title}\n" else: references += f"{i}. {citation}\n" references += "\n" return references def _generate_footer(self) -> str: """Generate report footer with metadata""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") footer = f"""--- **Report Generated:** {timestamp} **Model:** MedGemma 1.5 4B **System:** Microbiome-Immunotherapy Clinical Decision Support v1.0 *This report is intended for use by qualified healthcare professionals as a clinical decision support tool. It does not constitute medical advice and should be interpreted in conjunction with comprehensive clinical evaluation.* """ return footer def save_report(self, report: str, patient_id: str, output_dir: str = None) -> str: """ Save report to markdown file Args: report: Complete report markdown string patient_id: Patient identifier for filename output_dir: Output directory (uses config default if not provided) Returns: Path to saved report file """ if output_dir is None: output_dir = config.OUTPUT_DIR # Create output directory if it doesn't exist output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) # Generate filename timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"microbiome_ici_report_{patient_id}_{timestamp}.md" filepath = output_path / filename # Save report with open(filepath, 'w') as f: f.write(report) logger.info(f"Report saved to: {filepath}") return str(filepath) def generate_and_save(self, patient_json_path: str, output_dir: str = None) -> str: """ Complete workflow: load data, generate report, save to file Args: patient_json_path: Path to patient JSON file output_dir: Optional output directory override Returns: Path to saved report file """ # Load patient data patient_data = self.load_patient_data(patient_json_path) patient_id = patient_data["patient"]["id"] # Generate report report = self.generate_full_report(patient_data) # Save report output_path = self.save_report(report, patient_id, output_dir) return output_path def generate_and_save_from_ehr( self, ehr_path: str, output_dir: str = None, save_json_path: str = None, ) -> str: """ Complete EHR workflow: extract JSON from EHR, generate report, save to file. Args: ehr_path: Path to the plain-text EHR report. output_dir: Optional output directory override. save_json_path: If provided, save the extracted patient JSON to this path so it can be inspected or reused without re-running extraction. Returns: Path to the saved report markdown file. """ # Step 1: Extract patient data from EHR patient_data = self.load_patient_data_from_ehr(ehr_path) patient_id = patient_data["patient"]["id"] # Step 2: Optionally save the extracted JSON if save_json_path: import json as _json from pathlib import Path as _Path _Path(save_json_path).parent.mkdir(parents=True, exist_ok=True) with open(save_json_path, "w", encoding="utf-8") as f: _json.dump(patient_data, f, indent=2) logger.info(f"Extracted patient JSON saved to: {save_json_path}") # Step 3: Generate report report = self.generate_full_report(patient_data) # Step 4: Save report output_path = self.save_report(report, patient_id, output_dir) return output_path