Microbiome-Immunotherapy-CDS / src /report_assembler.py
fierce74's picture
Add application file
7529164
"""
Report assembler - combines sections into final markdown report
"""
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict
from . import config
from .section_generators import SectionGenerator
logger = logging.getLogger(__name__)
class ReportAssembler:
"""Assembles complete clinical report from individual sections.
Supports two input modes:
- JSON: load_patient_data() / generate_and_save() (existing path)
- EHR: load_patient_data_from_ehr() / generate_and_save_from_ehr() (new path)
"""
def __init__(self):
self.generator = SectionGenerator()
def load_patient_data(self, json_path: str) -> Dict:
"""Load patient JSON data from file"""
logger.info(f"Loading patient data from {json_path}")
with open(json_path, 'r') as f:
patient_data = json.load(f)
return patient_data
def load_patient_data_from_ehr(self, ehr_path: str) -> Dict:
"""Extract patient JSON from a raw EHR text file using MedGemma.
Args:
ehr_path: Path to the plain-text EHR report.
Returns:
Patient data dictionary matching the pipeline schema.
"""
# Imported here so the JSON-only code path has zero extra import cost
from .ehr_extractor import EHRExtractor
logger.info(f"Extracting patient data from EHR: {ehr_path}")
extractor = EHRExtractor()
return extractor.extract_from_file(ehr_path)
def generate_full_report(self, patient_data: Dict) -> str:
"""
Generate complete clinical report
Args:
patient_data: Patient JSON dictionary
Returns:
Complete report as markdown string
"""
logger.info("Starting full report generation")
report_sections = []
# Section 0: Preamble (always included, not LLM-generated)
logger.info("Generating preamble")
preamble = self.generator.generate_preamble(patient_data)
report_sections.append(preamble)
# Section 1: Microbiome Composition Profile
section_1 = self.generator.generate_section_1(patient_data)
if section_1:
report_sections.append(section_1)
# Section 2: Metabolite Landscape
section_2 = self.generator.generate_section_2(patient_data)
if section_2:
report_sections.append(section_2)
# Section 3: Drug-Microbiome Interaction Outlook
section_3 = self.generator.generate_section_3(patient_data)
if section_3:
report_sections.append(section_3)
# Section 4: Confounding Factors
section_4 = self.generator.generate_section_4(patient_data)
if section_4:
report_sections.append(section_4)
# Section 5: Intervention Considerations
section_5 = self.generator.generate_section_5(patient_data)
if section_5:
report_sections.append(section_5)
# Section 6: Data Quality & Limitations (always included)
section_6 = self.generator.generate_section_6(patient_data)
report_sections.append(section_6)
# References section
references = self._generate_references_section()
report_sections.append(references)
# Footer
footer = self._generate_footer()
report_sections.append(footer)
# Combine all sections
full_report = "\n".join(report_sections)
logger.info("Report generation complete")
return full_report
def generate_full_report_streaming(self, patient_data: Dict):
"""
Generate the complete clinical report section by section, yielding the
cumulative markdown string after each section completes.
Designed for Gradio generator functions: each yield replaces the current
content of the output gr.Markdown component, so the clinician sees the
report grow in real time.
Args:
patient_data: Patient JSON dictionary.
Yields:
Tuple of (cumulative_report: str, status_message: str) after each
section is appended.
"""
logger.info("Starting streaming report generation")
accumulated = ""
# ------------------------------------------------------------------
# Section 0: Preamble (no LLM — instant)
# ------------------------------------------------------------------
logger.info("Generating preamble")
preamble = self.generator.generate_preamble(patient_data)
accumulated += preamble + "\n"
yield accumulated, "⏳ Generating Section 1: Microbiome Composition Profile..."
# ------------------------------------------------------------------
# Section 1: Microbiome Composition Profile
# ------------------------------------------------------------------
logger.info("Generating section 1")
section_1 = self.generator.generate_section_1(patient_data)
if section_1:
accumulated += section_1 + "\n"
yield accumulated, "⏳ Generating Section 2: Metabolite Landscape..."
# ------------------------------------------------------------------
# Section 2: Metabolite Landscape
# ------------------------------------------------------------------
logger.info("Generating section 2")
section_2 = self.generator.generate_section_2(patient_data)
if section_2:
accumulated += section_2 + "\n"
yield accumulated, "⏳ Generating Section 3: Drug–Microbiome Interaction Outlook..."
# ------------------------------------------------------------------
# Section 3: Drug–Microbiome Interaction Outlook
# ------------------------------------------------------------------
logger.info("Generating section 3")
section_3 = self.generator.generate_section_3(patient_data)
if section_3:
accumulated += section_3 + "\n"
yield accumulated, "⏳ Generating Section 4: Confounding Factors..."
# ------------------------------------------------------------------
# Section 4: Confounding Factors
# ------------------------------------------------------------------
logger.info("Generating section 4")
section_4 = self.generator.generate_section_4(patient_data)
if section_4:
accumulated += section_4 + "\n"
yield accumulated, "⏳ Generating Section 5: Intervention Considerations..."
# ------------------------------------------------------------------
# Section 5: Intervention Considerations
# ------------------------------------------------------------------
logger.info("Generating section 5")
section_5 = self.generator.generate_section_5(patient_data)
if section_5:
accumulated += section_5 + "\n"
yield accumulated, "⏳ Generating Section 6: Data Quality & Limitations..."
# ------------------------------------------------------------------
# Section 6: Data Quality & Limitations (always included)
# ------------------------------------------------------------------
logger.info("Generating section 6")
section_6 = self.generator.generate_section_6(patient_data)
accumulated += section_6 + "\n"
yield accumulated, "⏳ Compiling references and finalising report..."
# ------------------------------------------------------------------
# References + Footer (no LLM — instant)
# ------------------------------------------------------------------
logger.info("Generating references and footer")
references = self._generate_references_section()
footer = self._generate_footer()
accumulated += references + footer
logger.info("Streaming report generation complete")
yield accumulated, "✅ Report complete"
def _generate_references_section(self) -> str:
"""Generate references section from all citations and titles used"""
# get_all_citations now returns List[tuple] i.e. [(citation, title), ...]
references_data = self.generator.get_all_citations()
if not references_data:
return ""
references = "## References\n\n"
references += "The following peer-reviewed publications were cited in this report:\n\n"
for i, (citation, title) in enumerate(references_data, 1):
if title and title != citation:
references += f"{i}. {citation}: {title}\n"
else:
references += f"{i}. {citation}\n"
references += "\n"
return references
def _generate_footer(self) -> str:
"""Generate report footer with metadata"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
footer = f"""---
**Report Generated:** {timestamp}
**Model:** MedGemma 1.5 4B
**System:** Microbiome-Immunotherapy Clinical Decision Support v1.0
*This report is intended for use by qualified healthcare professionals as a clinical decision support tool. It does not constitute medical advice and should be interpreted in conjunction with comprehensive clinical evaluation.*
"""
return footer
def save_report(self, report: str, patient_id: str, output_dir: str = None) -> str:
"""
Save report to markdown file
Args:
report: Complete report markdown string
patient_id: Patient identifier for filename
output_dir: Output directory (uses config default if not provided)
Returns:
Path to saved report file
"""
if output_dir is None:
output_dir = config.OUTPUT_DIR
# Create output directory if it doesn't exist
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# Generate filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"microbiome_ici_report_{patient_id}_{timestamp}.md"
filepath = output_path / filename
# Save report
with open(filepath, 'w') as f:
f.write(report)
logger.info(f"Report saved to: {filepath}")
return str(filepath)
def generate_and_save(self, patient_json_path: str, output_dir: str = None) -> str:
"""
Complete workflow: load data, generate report, save to file
Args:
patient_json_path: Path to patient JSON file
output_dir: Optional output directory override
Returns:
Path to saved report file
"""
# Load patient data
patient_data = self.load_patient_data(patient_json_path)
patient_id = patient_data["patient"]["id"]
# Generate report
report = self.generate_full_report(patient_data)
# Save report
output_path = self.save_report(report, patient_id, output_dir)
return output_path
def generate_and_save_from_ehr(
self,
ehr_path: str,
output_dir: str = None,
save_json_path: str = None,
) -> str:
"""
Complete EHR workflow: extract JSON from EHR, generate report, save to file.
Args:
ehr_path: Path to the plain-text EHR report.
output_dir: Optional output directory override.
save_json_path: If provided, save the extracted patient JSON to this path
so it can be inspected or reused without re-running extraction.
Returns:
Path to the saved report markdown file.
"""
# Step 1: Extract patient data from EHR
patient_data = self.load_patient_data_from_ehr(ehr_path)
patient_id = patient_data["patient"]["id"]
# Step 2: Optionally save the extracted JSON
if save_json_path:
import json as _json
from pathlib import Path as _Path
_Path(save_json_path).parent.mkdir(parents=True, exist_ok=True)
with open(save_json_path, "w", encoding="utf-8") as f:
_json.dump(patient_data, f, indent=2)
logger.info(f"Extracted patient JSON saved to: {save_json_path}")
# Step 3: Generate report
report = self.generate_full_report(patient_data)
# Step 4: Save report
output_path = self.save_report(report, patient_id, output_dir)
return output_path