Preformu / report /generator.py
Kevinshh's picture
Upload generator.py
45aaa43 verified
"""
Report Generation Module.
This module handles the generation of professional HTML reports from
normalized analysis results. It also supports PDF export.
Design Philosophy:
- Reports should look professionally designed
- All content should be bilingual (CN/EN) where appropriate
- No mention of "AI", "model", or "auto-generated"
- Clear distinction between data-backed and assumed conclusions
"""
import os
from typing import Optional, Dict, Any
from pathlib import Path
from datetime import datetime
from jinja2 import Environment, FileSystemLoader, select_autoescape
from schemas.canonical_schema import AnalysisResult, RiskLevel
class ReportGenerator:
"""
Generates professional reports from normalized analysis results.
This class:
1. Loads the HTML template
2. Prepares data for rendering
3. Generates HTML output
4. Optionally exports to PDF
"""
def __init__(self, template_dir: Optional[str] = None):
"""
Initialize the report generator.
Args:
template_dir: Directory containing report templates.
Defaults to the templates folder.
"""
if template_dir is None:
# Default to templates directory relative to this file
template_dir = str(
Path(__file__).parent.parent / "templates"
)
self.template_dir = template_dir
# Initialize Jinja2 environment
self.env = Environment(
loader=FileSystemLoader(template_dir),
autoescape=select_autoescape(['html', 'xml']),
)
# Add custom filters
self.env.filters['risk_class'] = self._risk_to_css_class
self.env.filters['risk_width'] = self._risk_to_width
def generate_html(self, result: AnalysisResult) -> str:
"""
Generate HTML report from analysis result.
Args:
result: The normalized AnalysisResult
Returns:
Complete HTML string
"""
template = self.env.get_template("report_template.html")
# Prepare template context
context = self._prepare_context(result)
# Render template
html = template.render(**context)
return html
def save_html(
self,
result: AnalysisResult,
output_path: str
) -> str:
"""
Generate and save HTML report to file.
Args:
result: The normalized AnalysisResult
output_path: Path to save the HTML file
Returns:
Path to the saved file
"""
html = self.generate_html(result)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html)
return output_path
def generate_pdf(
self,
result: AnalysisResult,
output_path: str
) -> Optional[str]:
"""
Generate PDF report from analysis result.
Args:
result: The normalized AnalysisResult
output_path: Path to save the PDF file
Returns:
Path to the saved PDF file, or None if failed
"""
try:
from weasyprint import HTML
html = self.generate_html(result)
# Generate PDF
HTML(string=html, base_url=self.template_dir).write_pdf(output_path)
return output_path
except ImportError:
print("Warning: weasyprint not installed. PDF generation disabled.")
return None
except Exception as e:
print(f"Error generating PDF: {e}")
return None
def _prepare_context(self, result: AnalysisResult) -> Dict[str, Any]:
"""
Prepare the template context from AnalysisResult.
This transforms the structured data into template-friendly format.
"""
# Calculate risk levels for the chart
risk_levels = self._calculate_risk_chart_data(result)
# Translate excipient name
excipient_name_en = self._get_english_excipient_name(
result.excipient_name
)
return {
# Report metadata
"report_id": result.report_id,
"date": result.date,
# API information
"api_name": result.api_name,
"api_smiles": result.api_smiles,
"structure_image": None, # TODO: Implement structure rendering
# Reactive groups
"reactive_groups": result.reactive_groups,
# Physicochemical properties
"physicochemical": result.physicochemical,
# Excipient information
"excipient_name": result.excipient_name,
"excipient_name_en": excipient_name_en,
"excipient_profile": result.excipient_profile,
# Interactions
"interactions": result.interactions,
# Formulation strategies
"formulation_strategies": result.formulation_strategies,
# Risk chart data
"maillard_risk_class": risk_levels.get("maillard", {}).get("class", "low"),
"maillard_risk_width": risk_levels.get("maillard", {}).get("width", 20),
"hygro_risk_class": risk_levels.get("hygroscopicity", {}).get("class", "low"),
"hygro_risk_width": risk_levels.get("hygroscopicity", {}).get("width", 20),
"chem_risk_class": risk_levels.get("chemisorption", {}).get("class", "low"),
"chem_risk_width": risk_levels.get("chemisorption", {}).get("width", 20),
"oxid_risk_class": risk_levels.get("oxidation", {}).get("class", "medium"),
"oxid_risk_width": risk_levels.get("oxidation", {}).get("width", 60),
"hydro_risk_class": risk_levels.get("hydrolysis", {}).get("class", "low"),
"hydro_risk_width": risk_levels.get("hydrolysis", {}).get("width", 20),
# Disclaimer content
"assumptions": result.assumptions,
"limitations": result.limitations,
}
def _calculate_risk_chart_data(
self,
result: AnalysisResult
) -> Dict[str, Dict[str, Any]]:
"""
Calculate risk chart visualization data from interactions.
Maps interaction types to their risk levels and visual widths.
"""
# Default values
risk_data = {
"maillard": {"class": "low", "width": 20},
"hygroscopicity": {"class": "low", "width": 30},
"chemisorption": {"class": "low", "width": 25},
"oxidation": {"class": "low", "width": 20},
"hydrolysis": {"class": "low", "width": 20},
}
# Map interaction types to chart keys
type_mapping = {
"美拉德反应": "maillard",
"氧化反应": "oxidation",
"水解反应": "hydrolysis",
"吸附作用": "chemisorption",
}
# Risk level to width mapping
width_mapping = {
RiskLevel.NONE: 15,
RiskLevel.LOW: 30,
RiskLevel.MEDIUM: 60,
RiskLevel.HIGH: 90,
}
# Risk level to CSS class mapping
class_mapping = {
RiskLevel.NONE: "low",
RiskLevel.LOW: "low",
RiskLevel.MEDIUM: "medium",
RiskLevel.HIGH: "high",
}
# Update from actual interactions
for interaction in result.interactions:
cn_name = interaction.reaction_type.cn
chart_key = type_mapping.get(cn_name)
if chart_key:
risk_data[chart_key] = {
"class": class_mapping.get(interaction.risk_level, "low"),
"width": width_mapping.get(interaction.risk_level, 30),
}
return risk_data
def _get_english_excipient_name(self, cn_name: str) -> str:
"""Get English name for common excipients."""
translations = {
"无水磷酸氢钙": "DCP Anhydrous",
"磷酸氢钙": "Dibasic Calcium Phosphate",
"乳糖": "Lactose",
"微晶纤维素": "Microcrystalline Cellulose (MCC)",
"硬脂酸镁": "Magnesium Stearate",
"淀粉": "Starch",
"甘露醇": "Mannitol",
"交联羧甲纤维素钠": "Croscarmellose Sodium",
}
return translations.get(cn_name, cn_name)
@staticmethod
def _risk_to_css_class(risk_level: RiskLevel) -> str:
"""Convert RiskLevel to CSS class name."""
mapping = {
RiskLevel.NONE: "low",
RiskLevel.LOW: "low",
RiskLevel.MEDIUM: "medium",
RiskLevel.HIGH: "high",
}
return mapping.get(risk_level, "medium")
@staticmethod
def _risk_to_width(risk_level: RiskLevel) -> int:
"""Convert RiskLevel to percentage width for charts."""
mapping = {
RiskLevel.NONE: 15,
RiskLevel.LOW: 30,
RiskLevel.MEDIUM: 60,
RiskLevel.HIGH: 90,
}
return mapping.get(risk_level, 50)
def create_sample_report() -> str:
"""
Create a sample report for testing/demonstration.
Returns:
HTML string of the sample report
"""
from schemas.canonical_schema import (
BilingualText,
ReactiveGroup,
PhysicochemicalProperties,
ExcipientProfile,
InteractionMechanism,
FormulationStrategy,
ImpurityProfile,
PropertyType,
ConfidenceLevel,
)
# Create sample data
result = AnalysisResult(
report_id="PRE-2025-X89",
date="2025-12-28",
api_name="Compound C12CC3...",
api_smiles="C12CC3(CCN(C4=NC=C(SC5C=CN=C(N)C=5Cl)N=C4)CC3)[C@H](N)C1=CC=CN=2",
excipient_name="无水磷酸氢钙",
reactive_groups=[
ReactiveGroup(
name=BilingualText(cn="伯胺基团", en="Primary Amine"),
property_type=PropertyType.BASIC,
potential_reactions=[
BilingualText(cn="美拉德反应", en="Maillard"),
BilingualText(cn="氧化脱氨", en="Oxidation"),
],
),
ReactiveGroup(
name=BilingualText(cn="硫醚基团", en="Thioether"),
property_type=PropertyType.NEUTRAL,
potential_reactions=[
BilingualText(cn="氧化成亚砜", en="Sulfoxide"),
BilingualText(cn="氧化成砜", en="Sulfone"),
],
),
],
physicochemical=PhysicochemicalProperties(
acidity_basicity=BilingualText(cn="碱性", en="Basic"),
logp=3.5,
h_bond_donors=2,
h_bond_acceptors=6,
),
excipient_profile=ExcipientProfile(
name=BilingualText(cn="无水磷酸氢钙", en="DCP Anhydrous"),
formula="CaHPO₄",
key_properties=[
"微环境pH约为6.5-7.5",
"低吸湿性(<1% at 90% RH)",
"适合直接压片工艺",
],
impurity_profile=ImpurityProfile(
fe_ppm=10.58,
mn_ppm=1.18,
),
),
interactions=[
InteractionMechanism(
reaction_type=BilingualText(cn="美拉德反应", en="Maillard Reaction"),
risk_level=RiskLevel.NONE,
mechanism_analysis="DCP不含还原糖或醛基,不具备美拉德反应条件。",
expert_notes="无需担心此反应途径",
confidence=ConfidenceLevel.HIGH,
),
InteractionMechanism(
reaction_type=BilingualText(cn="氧化反应", en="Oxidation"),
risk_level=RiskLevel.MEDIUM,
mechanism_analysis="API含硫醚基团(-S-)易富电子,DCP中的微量金属离子(Fe²⁺, Cu²⁺)可在固态下充当催化剂,通过电子转移机制加速硫醚氧化为亚砜。",
expert_notes="需关注DCP批次中金属离子含量,建议选择Low Metal Grade规格",
confidence=ConfidenceLevel.MEDIUM,
),
InteractionMechanism(
reaction_type=BilingualText(cn="酸碱反应", en="Acid-Base"),
risk_level=RiskLevel.LOW,
mechanism_analysis="API为碱性,处于DCP微环境pH中性(6.5-7.5)时稳定。",
expert_notes="两者酸碱性质相容,但需控制制剂微环境",
confidence=ConfidenceLevel.HIGH,
),
InteractionMechanism(
reaction_type=BilingualText(cn="吸附作用", en="Adsorption"),
risk_level=RiskLevel.LOW,
mechanism_analysis="DCP比表面积较小,对药物的吸附能力有限。",
expert_notes="常规制剂工艺下影响可控",
confidence=ConfidenceLevel.MEDIUM,
),
],
formulation_strategies=[
FormulationStrategy(
title="辅料选择优化",
description="鉴于-S-的氧化敏感性,建议采购\"Low Metal Grade\"(低金属级)的无水磷酸氢钙。",
),
FormulationStrategy(
title="稳定剂添加",
description="建议在处方筛选中考察0.05%-0.1% EDTA二钠(作为金属离子螯合剂)对相关杂质增长的控制效果。",
),
FormulationStrategy(
title="工艺考量",
description="该API结构较大,建议应用DCP无水物进行Direct Compression(直接压片工艺),避免湿法制粒过程因API的碱性导致凝胶与酯类辅料发生API与酶或酸碱相关的副反应。",
),
],
assumptions=[
"分析基于SMILES结构推断",
"假设正常制剂工艺条件",
],
limitations=[
"具体批次数据需COA确认",
"相容性结论需稳定性试验(Stress Testing)验证",
],
)
generator = ReportGenerator()
return generator.generate_html(result)