Spaces:

Kevinshh
/

Preformu

Sleeping

App Files Files Community

Preformu / report /generator.py

Kevinshh

Upload generator.py

45aaa43 verified 4 months ago

raw

history blame contribute delete

15.1 kB

	"""
	Report Generation Module.

	This module handles the generation of professional HTML reports from
	normalized analysis results. It also supports PDF export.

	Design Philosophy:
	- Reports should look professionally designed
	- All content should be bilingual (CN/EN) where appropriate
	- No mention of "AI", "model", or "auto-generated"
	- Clear distinction between data-backed and assumed conclusions
	"""

	import os
	from typing import Optional, Dict, Any
	from pathlib import Path
	from datetime import datetime

	from jinja2 import Environment, FileSystemLoader, select_autoescape

	from schemas.canonical_schema import AnalysisResult, RiskLevel


	class ReportGenerator:
	"""
	Generates professional reports from normalized analysis results.

	This class:
	1. Loads the HTML template
	2. Prepares data for rendering
	3. Generates HTML output
	4. Optionally exports to PDF
	"""

	def __init__(self, template_dir: Optional[str] = None):
	"""
	Initialize the report generator.

	Args:
	template_dir: Directory containing report templates.
	Defaults to the templates folder.
	"""
	if template_dir is None:
	# Default to templates directory relative to this file
	template_dir = str(
	Path(__file__).parent.parent / "templates"
	)

	self.template_dir = template_dir

	# Initialize Jinja2 environment
	self.env = Environment(
	loader=FileSystemLoader(template_dir),
	autoescape=select_autoescape(['html', 'xml']),
	)

	# Add custom filters
	self.env.filters['risk_class'] = self._risk_to_css_class
	self.env.filters['risk_width'] = self._risk_to_width

	def generate_html(self, result: AnalysisResult) -> str:
	"""
	Generate HTML report from analysis result.

	Args:
	result: The normalized AnalysisResult

	Returns:
	Complete HTML string
	"""
	template = self.env.get_template("report_template.html")

	# Prepare template context
	context = self._prepare_context(result)

	# Render template
	html = template.render(**context)

	return html

	def save_html(
	self,
	result: AnalysisResult,
	output_path: str
	) -> str:
	"""
	Generate and save HTML report to file.

	Args:
	result: The normalized AnalysisResult
	output_path: Path to save the HTML file

	Returns:
	Path to the saved file
	"""
	html = self.generate_html(result)

	with open(output_path, 'w', encoding='utf-8') as f:
	f.write(html)

	return output_path

	def generate_pdf(
	self,
	result: AnalysisResult,
	output_path: str
	) -> Optional[str]:
	"""
	Generate PDF report from analysis result.

	Args:
	result: The normalized AnalysisResult
	output_path: Path to save the PDF file

	Returns:
	Path to the saved PDF file, or None if failed
	"""
	try:
	from weasyprint import HTML

	html = self.generate_html(result)

	# Generate PDF
	HTML(string=html, base_url=self.template_dir).write_pdf(output_path)

	return output_path

	except ImportError:
	print("Warning: weasyprint not installed. PDF generation disabled.")
	return None
	except Exception as e:
	print(f"Error generating PDF: {e}")
	return None

	def _prepare_context(self, result: AnalysisResult) -> Dict[str, Any]:
	"""
	Prepare the template context from AnalysisResult.

	This transforms the structured data into template-friendly format.
	"""
	# Calculate risk levels for the chart
	risk_levels = self._calculate_risk_chart_data(result)

	# Translate excipient name
	excipient_name_en = self._get_english_excipient_name(
	result.excipient_name
	)

	return {
	# Report metadata
	"report_id": result.report_id,
	"date": result.date,

	# API information
	"api_name": result.api_name,
	"api_smiles": result.api_smiles,
	"structure_image": None, # TODO: Implement structure rendering

	# Reactive groups
	"reactive_groups": result.reactive_groups,

	# Physicochemical properties
	"physicochemical": result.physicochemical,

	# Excipient information
	"excipient_name": result.excipient_name,
	"excipient_name_en": excipient_name_en,
	"excipient_profile": result.excipient_profile,

	# Interactions
	"interactions": result.interactions,

	# Formulation strategies
	"formulation_strategies": result.formulation_strategies,

	# Risk chart data
	"maillard_risk_class": risk_levels.get("maillard", {}).get("class", "low"),
	"maillard_risk_width": risk_levels.get("maillard", {}).get("width", 20),
	"hygro_risk_class": risk_levels.get("hygroscopicity", {}).get("class", "low"),
	"hygro_risk_width": risk_levels.get("hygroscopicity", {}).get("width", 20),
	"chem_risk_class": risk_levels.get("chemisorption", {}).get("class", "low"),
	"chem_risk_width": risk_levels.get("chemisorption", {}).get("width", 20),
	"oxid_risk_class": risk_levels.get("oxidation", {}).get("class", "medium"),
	"oxid_risk_width": risk_levels.get("oxidation", {}).get("width", 60),
	"hydro_risk_class": risk_levels.get("hydrolysis", {}).get("class", "low"),
	"hydro_risk_width": risk_levels.get("hydrolysis", {}).get("width", 20),

	# Disclaimer content
	"assumptions": result.assumptions,
	"limitations": result.limitations,
	}

	def _calculate_risk_chart_data(
	self,
	result: AnalysisResult
	) -> Dict[str, Dict[str, Any]]:
	"""
	Calculate risk chart visualization data from interactions.

	Maps interaction types to their risk levels and visual widths.
	"""
	# Default values
	risk_data = {
	"maillard": {"class": "low", "width": 20},
	"hygroscopicity": {"class": "low", "width": 30},
	"chemisorption": {"class": "low", "width": 25},
	"oxidation": {"class": "low", "width": 20},
	"hydrolysis": {"class": "low", "width": 20},
	}

	# Map interaction types to chart keys
	type_mapping = {
	"美拉德反应": "maillard",
	"氧化反应": "oxidation",
	"水解反应": "hydrolysis",
	"吸附作用": "chemisorption",
	}

	# Risk level to width mapping
	width_mapping = {
	RiskLevel.NONE: 15,
	RiskLevel.LOW: 30,
	RiskLevel.MEDIUM: 60,
	RiskLevel.HIGH: 90,
	}

	# Risk level to CSS class mapping
	class_mapping = {
	RiskLevel.NONE: "low",
	RiskLevel.LOW: "low",
	RiskLevel.MEDIUM: "medium",
	RiskLevel.HIGH: "high",
	}

	# Update from actual interactions
	for interaction in result.interactions:
	cn_name = interaction.reaction_type.cn
	chart_key = type_mapping.get(cn_name)

	if chart_key:
	risk_data[chart_key] = {
	"class": class_mapping.get(interaction.risk_level, "low"),
	"width": width_mapping.get(interaction.risk_level, 30),
	}

	return risk_data

	def _get_english_excipient_name(self, cn_name: str) -> str:
	"""Get English name for common excipients."""
	translations = {
	"无水磷酸氢钙": "DCP Anhydrous",
	"磷酸氢钙": "Dibasic Calcium Phosphate",
	"乳糖": "Lactose",
	"微晶纤维素": "Microcrystalline Cellulose (MCC)",
	"硬脂酸镁": "Magnesium Stearate",
	"淀粉": "Starch",
	"甘露醇": "Mannitol",
	"交联羧甲纤维素钠": "Croscarmellose Sodium",
	}
	return translations.get(cn_name, cn_name)

	@staticmethod
	def _risk_to_css_class(risk_level: RiskLevel) -> str:
	"""Convert RiskLevel to CSS class name."""
	mapping = {
	RiskLevel.NONE: "low",
	RiskLevel.LOW: "low",
	RiskLevel.MEDIUM: "medium",
	RiskLevel.HIGH: "high",
	}
	return mapping.get(risk_level, "medium")

	@staticmethod
	def _risk_to_width(risk_level: RiskLevel) -> int:
	"""Convert RiskLevel to percentage width for charts."""
	mapping = {
	RiskLevel.NONE: 15,
	RiskLevel.LOW: 30,
	RiskLevel.MEDIUM: 60,
	RiskLevel.HIGH: 90,
	}
	return mapping.get(risk_level, 50)


	def create_sample_report() -> str:
	"""
	Create a sample report for testing/demonstration.

	Returns:
	HTML string of the sample report
	"""
	from schemas.canonical_schema import (
	BilingualText,
	ReactiveGroup,
	PhysicochemicalProperties,
	ExcipientProfile,
	InteractionMechanism,
	FormulationStrategy,
	ImpurityProfile,
	PropertyType,
	ConfidenceLevel,
	)

	# Create sample data
	result = AnalysisResult(
	report_id="PRE-2025-X89",
	date="2025-12-28",
	api_name="Compound C12CC3...",
	api_smiles="C12CC3(CCN(C4=NC=C(SC5C=CN=C(N)C=5Cl)N=C4)CC3)[C@H](N)C1=CC=CN=2",
	excipient_name="无水磷酸氢钙",

	reactive_groups=[
	ReactiveGroup(
	name=BilingualText(cn="伯胺基团", en="Primary Amine"),
	property_type=PropertyType.BASIC,
	potential_reactions=[
	BilingualText(cn="美拉德反应", en="Maillard"),
	BilingualText(cn="氧化脱氨", en="Oxidation"),
	],
	),
	ReactiveGroup(
	name=BilingualText(cn="硫醚基团", en="Thioether"),
	property_type=PropertyType.NEUTRAL,
	potential_reactions=[
	BilingualText(cn="氧化成亚砜", en="Sulfoxide"),
	BilingualText(cn="氧化成砜", en="Sulfone"),
	],
	),
	],

	physicochemical=PhysicochemicalProperties(
	acidity_basicity=BilingualText(cn="碱性", en="Basic"),
	logp=3.5,
	h_bond_donors=2,
	h_bond_acceptors=6,
	),

	excipient_profile=ExcipientProfile(
	name=BilingualText(cn="无水磷酸氢钙", en="DCP Anhydrous"),
	formula="CaHPO₄",
	key_properties=[
	"微环境pH约为6.5-7.5",
	"低吸湿性(<1% at 90% RH)",
	"适合直接压片工艺",
	],
	impurity_profile=ImpurityProfile(
	fe_ppm=10.58,
	mn_ppm=1.18,
	),
	),

	interactions=[
	InteractionMechanism(
	reaction_type=BilingualText(cn="美拉德反应", en="Maillard Reaction"),
	risk_level=RiskLevel.NONE,
	mechanism_analysis="DCP不含还原糖或醛基，不具备美拉德反应条件。",
	expert_notes="无需担心此反应途径",
	confidence=ConfidenceLevel.HIGH,
	),
	InteractionMechanism(
	reaction_type=BilingualText(cn="氧化反应", en="Oxidation"),
	risk_level=RiskLevel.MEDIUM,
	mechanism_analysis="API含硫醚基团(-S-)易富电子，DCP中的微量金属离子(Fe²⁺, Cu²⁺)可在固态下充当催化剂，通过电子转移机制加速硫醚氧化为亚砜。",
	expert_notes="需关注DCP批次中金属离子含量，建议选择Low Metal Grade规格",
	confidence=ConfidenceLevel.MEDIUM,
	),
	InteractionMechanism(
	reaction_type=BilingualText(cn="酸碱反应", en="Acid-Base"),
	risk_level=RiskLevel.LOW,
	mechanism_analysis="API为碱性，处于DCP微环境pH中性(6.5-7.5)时稳定。",
	expert_notes="两者酸碱性质相容，但需控制制剂微环境",
	confidence=ConfidenceLevel.HIGH,
	),
	InteractionMechanism(
	reaction_type=BilingualText(cn="吸附作用", en="Adsorption"),
	risk_level=RiskLevel.LOW,
	mechanism_analysis="DCP比表面积较小，对药物的吸附能力有限。",
	expert_notes="常规制剂工艺下影响可控",
	confidence=ConfidenceLevel.MEDIUM,
	),
	],

	formulation_strategies=[
	FormulationStrategy(
	title="辅料选择优化",
	description="鉴于-S-的氧化敏感性，建议采购\"Low Metal Grade\"(低金属级)的无水磷酸氢钙。",
	),
	FormulationStrategy(
	title="稳定剂添加",
	description="建议在处方筛选中考察0.05%-0.1% EDTA二钠(作为金属离子螯合剂)对相关杂质增长的控制效果。",
	),
	FormulationStrategy(
	title="工艺考量",
	description="该API结构较大，建议应用DCP无水物进行Direct Compression(直接压片工艺)，避免湿法制粒过程因API的碱性导致凝胶与酯类辅料发生API与酶或酸碱相关的副反应。",
	),
	],

	assumptions=[
	"分析基于SMILES结构推断",
	"假设正常制剂工艺条件",
	],

	limitations=[
	"具体批次数据需COA确认",
	"相容性结论需稳定性试验(Stress Testing)验证",
	],
	)

	generator = ReportGenerator()
	return generator.generate_html(result)