| | """ |
| | Section generation functions for each report section |
| | """ |
| |
|
| | import logging |
| | from typing import Dict, Optional, List |
| |
|
| | from .models import get_medgemma |
| | from .rag import RAGRetriever |
| | from .prompts import build_prompt |
| | from . import config |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | class SectionGenerator: |
| | """Handles generation of individual report sections""" |
| | |
| | def __init__(self): |
| | self.llm = get_medgemma() |
| | self.rag = RAGRetriever() |
| | self.all_citations = {} |
| | |
| | def generate_preamble(self, patient_data: Dict) -> str: |
| | """ |
| | Generate Section 0: Clinical Preamble (auto-populated, no LLM) |
| | """ |
| | p = patient_data["patient"] |
| | c = patient_data["cancer"] |
| | i = patient_data["immunotherapy"] |
| | m = patient_data["microbiome"] |
| | |
| | |
| | metastases_str = ", ".join(c["metastases"]) if c["metastases"] else "none" |
| | |
| | |
| | therapy_type = i.get("therapy_type", "ICI") |
| | |
| | preamble = f"""# Microbiome-Immunotherapy Clinical Report |
| | |
| | **Patient ID:** {p['id']} |
| | **Age:** {p['age']} years |
| | **Gender:** {p['gender']} |
| | |
| | ## Clinical Context |
| | |
| | **Cancer Diagnosis:** {c['stage']} {c['type']}""" |
| | |
| | if c.get('subtype'): |
| | preamble += f" ({c['subtype']})" |
| | |
| | preamble += f""" |
| | **Primary Site:** {c['primary_site']} |
| | **Metastases:** {metastases_str} |
| | **Diagnosis Date:** {c['diagnosis_date']} |
| | |
| | **Tumor Biomarkers:** |
| | - PD-L1 Expression: {c['biomarkers']['pdl1_expression']} |
| | - Tumor Mutational Burden (TMB): {c['biomarkers']['tmb']} |
| | - Microsatellite Instability (MSI): {c['biomarkers']['msi_status']} |
| | |
| | ## Planned Immunotherapy |
| | |
| | **Therapy Type:** {therapy_type} |
| | **Drug:** {i['drug_name']} ({i['drug_class']}) |
| | **Treatment Setting:** {i['treatment_setting']} |
| | **Line of Therapy:** {i['line_of_therapy']} |
| | **Planned Start Date:** {i['planned_start_date']} |
| | """ |
| | |
| | |
| | if therapy_type == "ACT" and i.get("act_details"): |
| | act = i["act_details"] |
| | preamble += f""" |
| | **ACT Details:** |
| | - ACT Type: {act.get('act_type', 'N/A')} |
| | - Target Antigen: {act.get('target_antigen', 'N/A')} |
| | - Cell Source: {act.get('cell_source', 'N/A')} |
| | - Preconditioning Regimen: {act.get('preconditioning_regimen', 'N/A')} |
| | - T-Cell Harvest Date: {act.get('t_cell_harvest_date', 'N/A')} |
| | - Expected CRS Risk: {act.get('expected_crs_risk', 'N/A')} |
| | - Expected Neurotoxicity Risk: {act.get('expected_neurotoxicity_risk', 'N/A')} |
| | """ |
| | |
| | preamble += f""" |
| | ## Microbiome Profile Overview |
| | |
| | **Sample Date:** {m['sample_date']} |
| | **Sequencing Method:** {m['sequencing_method']} |
| | |
| | This report summarizes gut microbiome findings relevant to anticipated immunotherapy response based on current evidence from peer-reviewed literature. |
| | |
| | --- |
| | """ |
| | return preamble |
| | |
| | def generate_section_1(self, patient_data: Dict) -> Optional[str]: |
| | """ |
| | Generate Section 1: Microbiome Diversity & Composition Profile |
| | """ |
| | logger.info("Generating Section 1: Microbiome Diversity & Composition Profile") |
| | |
| | |
| | chunks = self.rag.retrieve_for_section_1(patient_data) |
| | |
| | if not chunks: |
| | logger.warning("No evidence retrieved for Section 1, omitting section") |
| | return None |
| | |
| | |
| | for citation, title in self.rag.get_unique_citation_metadata(chunks): |
| | self.all_citations[citation] = title |
| | |
| | |
| | evidence = self.rag.format_chunks_for_llm(chunks) |
| | |
| | |
| | key_bacteria = patient_data["microbiome"]["key_bacteria"] |
| | detected_taxa_lines = [] |
| | for taxon, abundance in key_bacteria.items(): |
| | if abundance is not None and abundance > 0: |
| | taxon_display = taxon.replace("_", " ").title() |
| | detected_taxa_lines.append(f"- {taxon_display}: {abundance}%") |
| | |
| | detected_taxa_str = "\n".join(detected_taxa_lines) if detected_taxa_lines else "None detected above threshold" |
| | |
| | |
| | diversity = patient_data["microbiome"]["diversity"] |
| | prompt = build_prompt( |
| | "section_1", |
| | patient_data, |
| | evidence, |
| | cancer_stage=patient_data["cancer"]["stage"], |
| | shannon_index=diversity["shannon_index"], |
| | simpson_index=diversity["simpson_index"], |
| | observed_species=diversity["observed_species"], |
| | detected_taxa=detected_taxa_str, |
| | ) |
| | |
| | |
| | content = self.llm.generate(prompt, max_new_tokens=config.SECTION_MAX_NEW_TOKENS["section_1"]) |
| | |
| | return f"## 1. Microbiome Diversity & Composition Profile\n\n{content}\n\n" |
| | |
| | def generate_section_2(self, patient_data: Dict) -> Optional[str]: |
| | """ |
| | Generate Section 2: Metabolite Landscape |
| | """ |
| | logger.info("Generating Section 2: Metabolite Landscape") |
| | |
| | |
| | metabolites = patient_data["microbiome"]["metabolites"] |
| | has_scfa = any(v is not None for v in metabolites["scfa"].values()) |
| | has_metabolites = has_scfa or metabolites["bile_acids_available"] or metabolites["tryptophan_metabolites_available"] |
| | |
| | if not has_metabolites: |
| | logger.info("No metabolite data available, omitting Section 2") |
| | return None |
| | |
| | |
| | chunks = self.rag.retrieve_for_section_2(patient_data) |
| | |
| | if not chunks: |
| | logger.warning("No evidence retrieved for Section 2, omitting section") |
| | return None |
| | |
| | |
| | for citation, title in self.rag.get_unique_citation_metadata(chunks): |
| | self.all_citations[citation] = title |
| | |
| | |
| | evidence = self.rag.format_chunks_for_llm(chunks) |
| | |
| | |
| | metabolite_lines = [] |
| | |
| | if has_scfa: |
| | metabolite_lines.append("**Short-Chain Fatty Acids:**") |
| | scfa = metabolites["scfa"] |
| | if scfa["butyrate_uM"] is not None: |
| | metabolite_lines.append(f"- Butyrate: {scfa['butyrate_uM']} μM") |
| | if scfa["propionate_uM"] is not None: |
| | metabolite_lines.append(f"- Propionate: {scfa['propionate_uM']} μM") |
| | if scfa["acetate_uM"] is not None: |
| | metabolite_lines.append(f"- Acetate: {scfa['acetate_uM']} μM") |
| | |
| | if metabolites["bile_acids_available"]: |
| | metabolite_lines.append("**Bile Acids:** Analysis available") |
| | |
| | if metabolites["tryptophan_metabolites_available"]: |
| | metabolite_lines.append("**Tryptophan Metabolites:** Analysis available") |
| | |
| | metabolite_data_str = "\n".join(metabolite_lines) |
| | |
| | |
| | prompt = build_prompt( |
| | "section_2", |
| | patient_data, |
| | evidence, |
| | metabolite_data=metabolite_data_str, |
| | ) |
| | |
| | |
| | content = self.llm.generate(prompt, max_new_tokens=config.SECTION_MAX_NEW_TOKENS["section_2"]) |
| | |
| | return f"## 2. Metabolite Landscape\n\n{content}\n\n" |
| | |
| | def generate_section_3(self, patient_data: Dict) -> Optional[str]: |
| | """ |
| | Generate Section 3: Drug–Microbiome Interaction Outlook |
| | Supports both ICI and ACT therapies |
| | """ |
| | logger.info("Generating Section 3: Drug–Microbiome Interaction Outlook") |
| | |
| | |
| | chunks = self.rag.retrieve_for_section_3(patient_data) |
| | |
| | if not chunks: |
| | logger.warning("No evidence retrieved for Section 3, omitting section") |
| | return None |
| | |
| | |
| | for citation, title in self.rag.get_unique_citation_metadata(chunks): |
| | self.all_citations[citation] = title |
| | |
| | |
| | evidence = self.rag.format_chunks_for_llm(chunks) |
| | |
| | |
| | diversity = patient_data["microbiome"]["diversity"] |
| | key_bacteria = patient_data["microbiome"]["key_bacteria"] |
| | |
| | |
| | detected_taxa = [ |
| | (k.replace("_", " ").title(), v) |
| | for k, v in key_bacteria.items() |
| | if v is not None and v > 0 |
| | ] |
| | detected_taxa.sort(key=lambda x: x[1], reverse=True) |
| | key_taxa_summary = ", ".join([f"{t} ({a}%)" for t, a in detected_taxa[:5]]) |
| | |
| | |
| | metabolites = patient_data["microbiome"]["metabolites"] |
| | metabolite_flags = [] |
| | if any(v is not None for v in metabolites["scfa"].values()): |
| | metabolite_flags.append("SCFAs measured") |
| | if metabolites["bile_acids_available"]: |
| | metabolite_flags.append("bile acids available") |
| | if metabolites["tryptophan_metabolites_available"]: |
| | metabolite_flags.append("tryptophan metabolites available") |
| | metabolite_summary = ", ".join(metabolite_flags) if metabolite_flags else "limited metabolite data" |
| | |
| | |
| | therapy_type = patient_data["immunotherapy"].get("therapy_type", "ICI") |
| | |
| | |
| | if therapy_type == "ACT": |
| | act_details = patient_data["immunotherapy"].get("act_details", {}) |
| | prompt = build_prompt( |
| | "section_3", |
| | patient_data, |
| | evidence, |
| | cancer_stage=patient_data["cancer"]["stage"], |
| | act_type=act_details.get("act_type", "CAR-T"), |
| | target_antigen=act_details.get("target_antigen", "CD19"), |
| | cell_source=act_details.get("cell_source", "autologous"), |
| | crs_risk=act_details.get("expected_crs_risk", "unknown"), |
| | neurotoxicity_risk=act_details.get("expected_neurotoxicity_risk", "unknown"), |
| | line_of_therapy=patient_data["immunotherapy"]["line_of_therapy"], |
| | shannon_index=diversity["shannon_index"], |
| | simpson_index=diversity["simpson_index"], |
| | key_taxa_summary=key_taxa_summary, |
| | metabolite_summary=metabolite_summary, |
| | ) |
| | else: |
| | biomarkers = patient_data["cancer"]["biomarkers"] |
| | prompt = build_prompt( |
| | "section_3", |
| | patient_data, |
| | evidence, |
| | cancer_stage=patient_data["cancer"]["stage"], |
| | pdl1=biomarkers["pdl1_expression"], |
| | tmb=biomarkers["tmb"], |
| | msi=biomarkers["msi_status"], |
| | line_of_therapy=patient_data["immunotherapy"]["line_of_therapy"], |
| | shannon_index=diversity["shannon_index"], |
| | simpson_index=diversity["simpson_index"], |
| | key_taxa_summary=key_taxa_summary, |
| | metabolite_summary=metabolite_summary, |
| | ) |
| | |
| | |
| | content = self.llm.generate(prompt, max_new_tokens=config.SECTION_MAX_NEW_TOKENS["section_3"]) |
| | |
| | |
| | if therapy_type == "ACT": |
| | section_title = "3. Microbiome–ACT Interaction Outlook" |
| | else: |
| | section_title = "3. Drug–Microbiome Interaction Outlook" |
| | |
| | return f"## {section_title}\n\n{content}\n\n" |
| | |
| | def generate_section_4(self, patient_data: Dict) -> Optional[str]: |
| | """ |
| | Generate Section 4: Confounding Factors |
| | """ |
| | logger.info("Generating Section 4: Confounding Factors") |
| | |
| | |
| | meds = patient_data["medications"] |
| | prior = patient_data["prior_treatments"] |
| | |
| | has_confounders = ( |
| | meds["antibiotic_history"]["recent_antibiotics"] or |
| | meds["ppi_use"]["currently_on_ppi"] or |
| | prior["chemotherapy"]["received"] or |
| | prior["prior_immunotherapy"]["received"] or |
| | len(patient_data.get("comorbidities", [])) > 0 |
| | ) |
| | |
| | if not has_confounders: |
| | logger.info("No confounding factors present, omitting Section 4") |
| | return None |
| | |
| | |
| | chunks = self.rag.retrieve_for_section_4(patient_data) |
| | |
| | if not chunks: |
| | logger.warning("No evidence retrieved for Section 4, omitting section") |
| | return None |
| | |
| | |
| | for citation, title in self.rag.get_unique_citation_metadata(chunks): |
| | self.all_citations[citation] = title |
| | |
| | |
| | evidence = self.rag.format_chunks_for_llm(chunks) |
| | |
| | |
| | confounding_lines = [] |
| | |
| | |
| | if meds["antibiotic_history"]["recent_antibiotics"]: |
| | confounding_lines.append("**Antibiotic Exposure:**") |
| | for exp in meds["antibiotic_history"]["exposures"]: |
| | confounding_lines.append( |
| | f"- {exp['antibiotic_name']} ({exp['antibiotic_class']}): " |
| | f"{exp['start_date']} to {exp['end_date']} " |
| | f"({exp['days_before_ici']} days before ICI start)" |
| | ) |
| | |
| | |
| | if meds["ppi_use"]["currently_on_ppi"]: |
| | ppi = meds["ppi_use"] |
| | confounding_lines.append(f"**Proton Pump Inhibitor Use:**") |
| | confounding_lines.append(f"- {ppi['ppi_name']}, duration: {ppi['duration_months']} months") |
| | |
| | |
| | if prior["chemotherapy"]["received"]: |
| | chemo = prior["chemotherapy"] |
| | regimens_str = ", ".join(chemo["regimens"]) |
| | confounding_lines.append(f"**Prior Chemotherapy:**") |
| | confounding_lines.append(f"- Regimens: {regimens_str}") |
| | confounding_lines.append(f"- Response: {chemo['response']}") |
| | |
| | |
| | if prior["prior_immunotherapy"]["received"]: |
| | prior_ici = prior["prior_immunotherapy"] |
| | drugs_str = ", ".join(prior_ici["drugs"]) |
| | confounding_lines.append(f"**Prior Immunotherapy:**") |
| | confounding_lines.append(f"- Drugs: {drugs_str}") |
| | confounding_lines.append(f"- Response: {prior_ici['response']}") |
| | |
| | |
| | if patient_data.get("comorbidities"): |
| | comorbidities_str = ", ".join(patient_data["comorbidities"]) |
| | confounding_lines.append(f"**Comorbidities:** {comorbidities_str}") |
| | |
| | confounding_data_str = "\n".join(confounding_lines) |
| | |
| | |
| | prompt = build_prompt( |
| | "section_4", |
| | patient_data, |
| | evidence, |
| | confounding_data=confounding_data_str, |
| | ) |
| | |
| | |
| | content = self.llm.generate(prompt, max_new_tokens=config.SECTION_MAX_NEW_TOKENS["section_4"]) |
| | |
| | return f"## 4. Confounding Factors\n\n{content}\n\n" |
| | |
| | def generate_section_5(self, patient_data: Dict) -> Optional[str]: |
| | """ |
| | Generate Section 5: Microbiota-Modulation Intervention Considerations |
| | """ |
| | logger.info("Generating Section 5: Microbiota-Modulation Intervention Considerations") |
| | |
| | |
| | intervention_chunks = self.rag.retrieve_for_section_5(patient_data) |
| | |
| | |
| | total_chunks = sum(len(chunks) for chunks in intervention_chunks.values()) |
| | if total_chunks == 0: |
| | logger.warning("No intervention evidence retrieved for Section 5, omitting section") |
| | return None |
| | |
| | |
| | for chunks in intervention_chunks.values(): |
| | for citation, title in self.rag.get_unique_citation_metadata(chunks): |
| | self.all_citations[citation] = title |
| | |
| | |
| | evidence_str = "## Diet and Prebiotics Evidence\n\n" |
| | evidence_str += self.rag.format_chunks_for_llm(intervention_chunks.get("diet", [])) |
| | evidence_str += "\n\n## Probiotics Evidence\n\n" |
| | evidence_str += self.rag.format_chunks_for_llm(intervention_chunks.get("probiotics", [])) |
| | |
| | |
| | key_bacteria = patient_data["microbiome"]["key_bacteria"] |
| | detected_taxa = [ |
| | k.replace("_", " ").title() |
| | for k, v in key_bacteria.items() |
| | if v is not None and v > 0 |
| | ] |
| | microbiome_summary = f"Detected taxa: {', '.join(detected_taxa[:5])}" |
| | |
| | |
| | prompt = build_prompt( |
| | "section_5", |
| | patient_data, |
| | evidence_str, |
| | microbiome_summary=microbiome_summary, |
| | ) |
| | |
| | |
| | content = self.llm.generate(prompt, max_new_tokens=config.SECTION_MAX_NEW_TOKENS["section_5"]) |
| | |
| | return f"## 5. Microbiota-Modulation Intervention Considerations\n\n{content}\n\n" |
| | |
| | def generate_section_6(self, patient_data: Dict) -> str: |
| | """ |
| | Generate Section 6: Data Quality & Interpretive Limitations |
| | """ |
| | logger.info("Generating Section 6: Data Quality & Interpretive Limitations") |
| | |
| | |
| | data_quality = patient_data["microbiome"]["data_quality"] |
| | metabolites = patient_data["microbiome"]["metabolites"] |
| | |
| | |
| | dq_lines = [ |
| | f"**Sequencing Method:** {patient_data['microbiome']['sequencing_method']}", |
| | f"**Data Completeness:** {data_quality['completeness']}", |
| | f"**Data Source:** {data_quality['source']}", |
| | ] |
| | |
| | if data_quality.get("limitations"): |
| | dq_lines.append(f"**Limitations:** {', '.join(data_quality['limitations'])}") |
| | |
| | |
| | missing_metabolites = [] |
| | if not any(v is not None for v in metabolites["scfa"].values()): |
| | missing_metabolites.append("Short-chain fatty acids") |
| | if not metabolites["bile_acids_available"]: |
| | missing_metabolites.append("Bile acids") |
| | if not metabolites["tryptophan_metabolites_available"]: |
| | missing_metabolites.append("Tryptophan metabolites") |
| | |
| | if missing_metabolites: |
| | dq_lines.append(f"**Missing Metabolite Data:** {', '.join(missing_metabolites)}") |
| | |
| | data_quality_str = "\n".join(dq_lines) |
| | |
| | |
| | from .prompts import SECTION_6_PROMPT |
| | prompt = SECTION_6_PROMPT.format(data_quality=data_quality_str) |
| | |
| | |
| | content = self.llm.generate(prompt, max_new_tokens=config.SECTION_MAX_NEW_TOKENS["section_6"]) |
| | |
| | from .prompts import SECTION_6_FIXED_CAVEATS |
| | full_content = f"{SECTION_6_FIXED_CAVEATS}\n\n{content}" |
| | return f"## 6. Data Quality & Interpretive Limitations\n\n{full_content}\n\n" |
| | |
| | def get_all_citations(self) -> List[tuple]: |
| | """Return sorted list of all unique (citation, title) tuples used in the report""" |
| | return sorted(list(self.all_citations.items())) |
| |
|