Spaces:

fierce74
/

Microbiome-Immunotherapy-CDS

Running

App Files Files Community

Microbiome-Immunotherapy-CDS / src /report_assembler.py

fierce74

Add application file

7529164 15 days ago

raw

history blame contribute delete

12.9 kB

	"""
	Report assembler - combines sections into final markdown report
	"""

	import json
	import logging
	from datetime import datetime
	from pathlib import Path
	from typing import Dict

	from . import config
	from .section_generators import SectionGenerator

	logger = logging.getLogger(__name__)


	class ReportAssembler:
	"""Assembles complete clinical report from individual sections.

	Supports two input modes:
	- JSON: load_patient_data() / generate_and_save() (existing path)
	- EHR: load_patient_data_from_ehr() / generate_and_save_from_ehr() (new path)
	"""

	def __init__(self):
	self.generator = SectionGenerator()

	def load_patient_data(self, json_path: str) -> Dict:
	"""Load patient JSON data from file"""
	logger.info(f"Loading patient data from {json_path}")

	with open(json_path, 'r') as f:
	patient_data = json.load(f)

	return patient_data

	def load_patient_data_from_ehr(self, ehr_path: str) -> Dict:
	"""Extract patient JSON from a raw EHR text file using MedGemma.

	Args:
	ehr_path: Path to the plain-text EHR report.

	Returns:
	Patient data dictionary matching the pipeline schema.
	"""
	# Imported here so the JSON-only code path has zero extra import cost
	from .ehr_extractor import EHRExtractor

	logger.info(f"Extracting patient data from EHR: {ehr_path}")
	extractor = EHRExtractor()
	return extractor.extract_from_file(ehr_path)

	def generate_full_report(self, patient_data: Dict) -> str:
	"""
	Generate complete clinical report

	Args:
	patient_data: Patient JSON dictionary

	Returns:
	Complete report as markdown string
	"""
	logger.info("Starting full report generation")

	report_sections = []

	# Section 0: Preamble (always included, not LLM-generated)
	logger.info("Generating preamble")
	preamble = self.generator.generate_preamble(patient_data)
	report_sections.append(preamble)

	# Section 1: Microbiome Composition Profile
	section_1 = self.generator.generate_section_1(patient_data)
	if section_1:
	report_sections.append(section_1)

	# Section 2: Metabolite Landscape
	section_2 = self.generator.generate_section_2(patient_data)
	if section_2:
	report_sections.append(section_2)

	# Section 3: Drug-Microbiome Interaction Outlook
	section_3 = self.generator.generate_section_3(patient_data)
	if section_3:
	report_sections.append(section_3)

	# Section 4: Confounding Factors
	section_4 = self.generator.generate_section_4(patient_data)
	if section_4:
	report_sections.append(section_4)

	# Section 5: Intervention Considerations
	section_5 = self.generator.generate_section_5(patient_data)
	if section_5:
	report_sections.append(section_5)

	# Section 6: Data Quality & Limitations (always included)
	section_6 = self.generator.generate_section_6(patient_data)
	report_sections.append(section_6)

	# References section
	references = self._generate_references_section()
	report_sections.append(references)

	# Footer
	footer = self._generate_footer()
	report_sections.append(footer)

	# Combine all sections
	full_report = "\n".join(report_sections)

	logger.info("Report generation complete")
	return full_report

	def generate_full_report_streaming(self, patient_data: Dict):
	"""
	Generate the complete clinical report section by section, yielding the
	cumulative markdown string after each section completes.

	Designed for Gradio generator functions: each yield replaces the current
	content of the output gr.Markdown component, so the clinician sees the
	report grow in real time.

	Args:
	patient_data: Patient JSON dictionary.

	Yields:
	Tuple of (cumulative_report: str, status_message: str) after each
	section is appended.
	"""
	logger.info("Starting streaming report generation")
	accumulated = ""

	# ------------------------------------------------------------------
	# Section 0: Preamble (no LLM — instant)
	# ------------------------------------------------------------------
	logger.info("Generating preamble")
	preamble = self.generator.generate_preamble(patient_data)
	accumulated += preamble + "\n"
	yield accumulated, "⏳ Generating Section 1: Microbiome Composition Profile..."

	# ------------------------------------------------------------------
	# Section 1: Microbiome Composition Profile
	# ------------------------------------------------------------------
	logger.info("Generating section 1")
	section_1 = self.generator.generate_section_1(patient_data)
	if section_1:
	accumulated += section_1 + "\n"
	yield accumulated, "⏳ Generating Section 2: Metabolite Landscape..."

	# ------------------------------------------------------------------
	# Section 2: Metabolite Landscape
	# ------------------------------------------------------------------
	logger.info("Generating section 2")
	section_2 = self.generator.generate_section_2(patient_data)
	if section_2:
	accumulated += section_2 + "\n"
	yield accumulated, "⏳ Generating Section 3: Drug–Microbiome Interaction Outlook..."

	# ------------------------------------------------------------------
	# Section 3: Drug–Microbiome Interaction Outlook
	# ------------------------------------------------------------------
	logger.info("Generating section 3")
	section_3 = self.generator.generate_section_3(patient_data)
	if section_3:
	accumulated += section_3 + "\n"
	yield accumulated, "⏳ Generating Section 4: Confounding Factors..."

	# ------------------------------------------------------------------
	# Section 4: Confounding Factors
	# ------------------------------------------------------------------
	logger.info("Generating section 4")
	section_4 = self.generator.generate_section_4(patient_data)
	if section_4:
	accumulated += section_4 + "\n"
	yield accumulated, "⏳ Generating Section 5: Intervention Considerations..."

	# ------------------------------------------------------------------
	# Section 5: Intervention Considerations
	# ------------------------------------------------------------------
	logger.info("Generating section 5")
	section_5 = self.generator.generate_section_5(patient_data)
	if section_5:
	accumulated += section_5 + "\n"
	yield accumulated, "⏳ Generating Section 6: Data Quality & Limitations..."

	# ------------------------------------------------------------------
	# Section 6: Data Quality & Limitations (always included)
	# ------------------------------------------------------------------
	logger.info("Generating section 6")
	section_6 = self.generator.generate_section_6(patient_data)
	accumulated += section_6 + "\n"
	yield accumulated, "⏳ Compiling references and finalising report..."

	# ------------------------------------------------------------------
	# References + Footer (no LLM — instant)
	# ------------------------------------------------------------------
	logger.info("Generating references and footer")
	references = self._generate_references_section()
	footer = self._generate_footer()
	accumulated += references + footer

	logger.info("Streaming report generation complete")
	yield accumulated, "✅ Report complete"



	def _generate_references_section(self) -> str:
	"""Generate references section from all citations and titles used"""
	# get_all_citations now returns List[tuple] i.e. [(citation, title), ...]
	references_data = self.generator.get_all_citations()

	if not references_data:
	return ""

	references = "## References\n\n"
	references += "The following peer-reviewed publications were cited in this report:\n\n"

	for i, (citation, title) in enumerate(references_data, 1):
	if title and title != citation:
	references += f"{i}. {citation}: {title}\n"
	else:
	references += f"{i}. {citation}\n"

	references += "\n"
	return references

	def _generate_footer(self) -> str:
	"""Generate report footer with metadata"""
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	footer = f"""---

	Report Generated: {timestamp}
	Model: MedGemma 1.5 4B
	System: Microbiome-Immunotherapy Clinical Decision Support v1.0

	This report is intended for use by qualified healthcare professionals as a clinical decision support tool. It does not constitute medical advice and should be interpreted in conjunction with comprehensive clinical evaluation.
	"""
	return footer

	def save_report(self, report: str, patient_id: str, output_dir: str = None) -> str:
	"""
	Save report to markdown file

	Args:
	report: Complete report markdown string
	patient_id: Patient identifier for filename
	output_dir: Output directory (uses config default if not provided)

	Returns:
	Path to saved report file
	"""
	if output_dir is None:
	output_dir = config.OUTPUT_DIR

	# Create output directory if it doesn't exist
	output_path = Path(output_dir)
	output_path.mkdir(parents=True, exist_ok=True)

	# Generate filename
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"microbiome_ici_report_{patient_id}_{timestamp}.md"
	filepath = output_path / filename

	# Save report
	with open(filepath, 'w') as f:
	f.write(report)

	logger.info(f"Report saved to: {filepath}")
	return str(filepath)

	def generate_and_save(self, patient_json_path: str, output_dir: str = None) -> str:
	"""
	Complete workflow: load data, generate report, save to file

	Args:
	patient_json_path: Path to patient JSON file
	output_dir: Optional output directory override

	Returns:
	Path to saved report file
	"""
	# Load patient data
	patient_data = self.load_patient_data(patient_json_path)
	patient_id = patient_data["patient"]["id"]

	# Generate report
	report = self.generate_full_report(patient_data)

	# Save report
	output_path = self.save_report(report, patient_id, output_dir)

	return output_path

	def generate_and_save_from_ehr(
	self,
	ehr_path: str,
	output_dir: str = None,
	save_json_path: str = None,
	) -> str:
	"""
	Complete EHR workflow: extract JSON from EHR, generate report, save to file.

	Args:
	ehr_path: Path to the plain-text EHR report.
	output_dir: Optional output directory override.
	save_json_path: If provided, save the extracted patient JSON to this path
	so it can be inspected or reused without re-running extraction.

	Returns:
	Path to the saved report markdown file.
	"""
	# Step 1: Extract patient data from EHR
	patient_data = self.load_patient_data_from_ehr(ehr_path)
	patient_id = patient_data["patient"]["id"]

	# Step 2: Optionally save the extracted JSON
	if save_json_path:
	import json as _json
	from pathlib import Path as _Path
	_Path(save_json_path).parent.mkdir(parents=True, exist_ok=True)
	with open(save_json_path, "w", encoding="utf-8") as f:
	_json.dump(patient_data, f, indent=2)
	logger.info(f"Extracted patient JSON saved to: {save_json_path}")

	# Step 3: Generate report
	report = self.generate_full_report(patient_data)

	# Step 4: Save report
	output_path = self.save_report(report, patient_id, output_dir)

	return output_path