Kevinshh commited on
Commit
45aaa43
·
verified ·
1 Parent(s): 213ee82

Upload generator.py

Browse files
Files changed (1) hide show
  1. report/generator.py +410 -0
report/generator.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Report Generation Module.
3
+
4
+ This module handles the generation of professional HTML reports from
5
+ normalized analysis results. It also supports PDF export.
6
+
7
+ Design Philosophy:
8
+ - Reports should look professionally designed
9
+ - All content should be bilingual (CN/EN) where appropriate
10
+ - No mention of "AI", "model", or "auto-generated"
11
+ - Clear distinction between data-backed and assumed conclusions
12
+ """
13
+
14
+ import os
15
+ from typing import Optional, Dict, Any
16
+ from pathlib import Path
17
+ from datetime import datetime
18
+
19
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
20
+
21
+ from schemas.canonical_schema import AnalysisResult, RiskLevel
22
+
23
+
24
+ class ReportGenerator:
25
+ """
26
+ Generates professional reports from normalized analysis results.
27
+
28
+ This class:
29
+ 1. Loads the HTML template
30
+ 2. Prepares data for rendering
31
+ 3. Generates HTML output
32
+ 4. Optionally exports to PDF
33
+ """
34
+
35
+ def __init__(self, template_dir: Optional[str] = None):
36
+ """
37
+ Initialize the report generator.
38
+
39
+ Args:
40
+ template_dir: Directory containing report templates.
41
+ Defaults to the templates folder.
42
+ """
43
+ if template_dir is None:
44
+ # Default to templates directory relative to this file
45
+ template_dir = str(
46
+ Path(__file__).parent.parent / "templates"
47
+ )
48
+
49
+ self.template_dir = template_dir
50
+
51
+ # Initialize Jinja2 environment
52
+ self.env = Environment(
53
+ loader=FileSystemLoader(template_dir),
54
+ autoescape=select_autoescape(['html', 'xml']),
55
+ )
56
+
57
+ # Add custom filters
58
+ self.env.filters['risk_class'] = self._risk_to_css_class
59
+ self.env.filters['risk_width'] = self._risk_to_width
60
+
61
+ def generate_html(self, result: AnalysisResult) -> str:
62
+ """
63
+ Generate HTML report from analysis result.
64
+
65
+ Args:
66
+ result: The normalized AnalysisResult
67
+
68
+ Returns:
69
+ Complete HTML string
70
+ """
71
+ template = self.env.get_template("report_template.html")
72
+
73
+ # Prepare template context
74
+ context = self._prepare_context(result)
75
+
76
+ # Render template
77
+ html = template.render(**context)
78
+
79
+ return html
80
+
81
+ def save_html(
82
+ self,
83
+ result: AnalysisResult,
84
+ output_path: str
85
+ ) -> str:
86
+ """
87
+ Generate and save HTML report to file.
88
+
89
+ Args:
90
+ result: The normalized AnalysisResult
91
+ output_path: Path to save the HTML file
92
+
93
+ Returns:
94
+ Path to the saved file
95
+ """
96
+ html = self.generate_html(result)
97
+
98
+ with open(output_path, 'w', encoding='utf-8') as f:
99
+ f.write(html)
100
+
101
+ return output_path
102
+
103
+ def generate_pdf(
104
+ self,
105
+ result: AnalysisResult,
106
+ output_path: str
107
+ ) -> Optional[str]:
108
+ """
109
+ Generate PDF report from analysis result.
110
+
111
+ Args:
112
+ result: The normalized AnalysisResult
113
+ output_path: Path to save the PDF file
114
+
115
+ Returns:
116
+ Path to the saved PDF file, or None if failed
117
+ """
118
+ try:
119
+ from weasyprint import HTML
120
+
121
+ html = self.generate_html(result)
122
+
123
+ # Generate PDF
124
+ HTML(string=html, base_url=self.template_dir).write_pdf(output_path)
125
+
126
+ return output_path
127
+
128
+ except ImportError:
129
+ print("Warning: weasyprint not installed. PDF generation disabled.")
130
+ return None
131
+ except Exception as e:
132
+ print(f"Error generating PDF: {e}")
133
+ return None
134
+
135
+ def _prepare_context(self, result: AnalysisResult) -> Dict[str, Any]:
136
+ """
137
+ Prepare the template context from AnalysisResult.
138
+
139
+ This transforms the structured data into template-friendly format.
140
+ """
141
+ # Calculate risk levels for the chart
142
+ risk_levels = self._calculate_risk_chart_data(result)
143
+
144
+ # Translate excipient name
145
+ excipient_name_en = self._get_english_excipient_name(
146
+ result.excipient_name
147
+ )
148
+
149
+ return {
150
+ # Report metadata
151
+ "report_id": result.report_id,
152
+ "date": result.date,
153
+
154
+ # API information
155
+ "api_name": result.api_name,
156
+ "api_smiles": result.api_smiles,
157
+ "structure_image": None, # TODO: Implement structure rendering
158
+
159
+ # Reactive groups
160
+ "reactive_groups": result.reactive_groups,
161
+
162
+ # Physicochemical properties
163
+ "physicochemical": result.physicochemical,
164
+
165
+ # Excipient information
166
+ "excipient_name": result.excipient_name,
167
+ "excipient_name_en": excipient_name_en,
168
+ "excipient_profile": result.excipient_profile,
169
+
170
+ # Interactions
171
+ "interactions": result.interactions,
172
+
173
+ # Formulation strategies
174
+ "formulation_strategies": result.formulation_strategies,
175
+
176
+ # Risk chart data
177
+ "maillard_risk_class": risk_levels.get("maillard", {}).get("class", "low"),
178
+ "maillard_risk_width": risk_levels.get("maillard", {}).get("width", 20),
179
+ "hygro_risk_class": risk_levels.get("hygroscopicity", {}).get("class", "low"),
180
+ "hygro_risk_width": risk_levels.get("hygroscopicity", {}).get("width", 20),
181
+ "chem_risk_class": risk_levels.get("chemisorption", {}).get("class", "low"),
182
+ "chem_risk_width": risk_levels.get("chemisorption", {}).get("width", 20),
183
+ "oxid_risk_class": risk_levels.get("oxidation", {}).get("class", "medium"),
184
+ "oxid_risk_width": risk_levels.get("oxidation", {}).get("width", 60),
185
+ "hydro_risk_class": risk_levels.get("hydrolysis", {}).get("class", "low"),
186
+ "hydro_risk_width": risk_levels.get("hydrolysis", {}).get("width", 20),
187
+
188
+ # Disclaimer content
189
+ "assumptions": result.assumptions,
190
+ "limitations": result.limitations,
191
+ }
192
+
193
+ def _calculate_risk_chart_data(
194
+ self,
195
+ result: AnalysisResult
196
+ ) -> Dict[str, Dict[str, Any]]:
197
+ """
198
+ Calculate risk chart visualization data from interactions.
199
+
200
+ Maps interaction types to their risk levels and visual widths.
201
+ """
202
+ # Default values
203
+ risk_data = {
204
+ "maillard": {"class": "low", "width": 20},
205
+ "hygroscopicity": {"class": "low", "width": 30},
206
+ "chemisorption": {"class": "low", "width": 25},
207
+ "oxidation": {"class": "low", "width": 20},
208
+ "hydrolysis": {"class": "low", "width": 20},
209
+ }
210
+
211
+ # Map interaction types to chart keys
212
+ type_mapping = {
213
+ "美拉德反应": "maillard",
214
+ "氧化反应": "oxidation",
215
+ "水解反应": "hydrolysis",
216
+ "吸附作用": "chemisorption",
217
+ }
218
+
219
+ # Risk level to width mapping
220
+ width_mapping = {
221
+ RiskLevel.NONE: 15,
222
+ RiskLevel.LOW: 30,
223
+ RiskLevel.MEDIUM: 60,
224
+ RiskLevel.HIGH: 90,
225
+ }
226
+
227
+ # Risk level to CSS class mapping
228
+ class_mapping = {
229
+ RiskLevel.NONE: "low",
230
+ RiskLevel.LOW: "low",
231
+ RiskLevel.MEDIUM: "medium",
232
+ RiskLevel.HIGH: "high",
233
+ }
234
+
235
+ # Update from actual interactions
236
+ for interaction in result.interactions:
237
+ cn_name = interaction.reaction_type.cn
238
+ chart_key = type_mapping.get(cn_name)
239
+
240
+ if chart_key:
241
+ risk_data[chart_key] = {
242
+ "class": class_mapping.get(interaction.risk_level, "low"),
243
+ "width": width_mapping.get(interaction.risk_level, 30),
244
+ }
245
+
246
+ return risk_data
247
+
248
+ def _get_english_excipient_name(self, cn_name: str) -> str:
249
+ """Get English name for common excipients."""
250
+ translations = {
251
+ "无水磷酸氢钙": "DCP Anhydrous",
252
+ "磷酸氢钙": "Dibasic Calcium Phosphate",
253
+ "乳糖": "Lactose",
254
+ "微晶纤维素": "Microcrystalline Cellulose (MCC)",
255
+ "硬脂酸镁": "Magnesium Stearate",
256
+ "淀粉": "Starch",
257
+ "甘露醇": "Mannitol",
258
+ "交联羧甲纤维素钠": "Croscarmellose Sodium",
259
+ }
260
+ return translations.get(cn_name, cn_name)
261
+
262
+ @staticmethod
263
+ def _risk_to_css_class(risk_level: RiskLevel) -> str:
264
+ """Convert RiskLevel to CSS class name."""
265
+ mapping = {
266
+ RiskLevel.NONE: "low",
267
+ RiskLevel.LOW: "low",
268
+ RiskLevel.MEDIUM: "medium",
269
+ RiskLevel.HIGH: "high",
270
+ }
271
+ return mapping.get(risk_level, "medium")
272
+
273
+ @staticmethod
274
+ def _risk_to_width(risk_level: RiskLevel) -> int:
275
+ """Convert RiskLevel to percentage width for charts."""
276
+ mapping = {
277
+ RiskLevel.NONE: 15,
278
+ RiskLevel.LOW: 30,
279
+ RiskLevel.MEDIUM: 60,
280
+ RiskLevel.HIGH: 90,
281
+ }
282
+ return mapping.get(risk_level, 50)
283
+
284
+
285
+ def create_sample_report() -> str:
286
+ """
287
+ Create a sample report for testing/demonstration.
288
+
289
+ Returns:
290
+ HTML string of the sample report
291
+ """
292
+ from schemas.canonical_schema import (
293
+ BilingualText,
294
+ ReactiveGroup,
295
+ PhysicochemicalProperties,
296
+ ExcipientProfile,
297
+ InteractionMechanism,
298
+ FormulationStrategy,
299
+ ImpurityProfile,
300
+ PropertyType,
301
+ ConfidenceLevel,
302
+ )
303
+
304
+ # Create sample data
305
+ result = AnalysisResult(
306
+ report_id="PRE-2025-X89",
307
+ date="2025-12-28",
308
+ api_name="Compound C12CC3...",
309
+ api_smiles="C12CC3(CCN(C4=NC=C(SC5C=CN=C(N)C=5Cl)N=C4)CC3)[C@H](N)C1=CC=CN=2",
310
+ excipient_name="无水磷酸氢钙",
311
+
312
+ reactive_groups=[
313
+ ReactiveGroup(
314
+ name=BilingualText(cn="伯胺基团", en="Primary Amine"),
315
+ property_type=PropertyType.BASIC,
316
+ potential_reactions=[
317
+ BilingualText(cn="美拉德反应", en="Maillard"),
318
+ BilingualText(cn="氧化脱氨", en="Oxidation"),
319
+ ],
320
+ ),
321
+ ReactiveGroup(
322
+ name=BilingualText(cn="硫醚基团", en="Thioether"),
323
+ property_type=PropertyType.NEUTRAL,
324
+ potential_reactions=[
325
+ BilingualText(cn="氧化成亚砜", en="Sulfoxide"),
326
+ BilingualText(cn="氧化成砜", en="Sulfone"),
327
+ ],
328
+ ),
329
+ ],
330
+
331
+ physicochemical=PhysicochemicalProperties(
332
+ acidity_basicity=BilingualText(cn="碱性", en="Basic"),
333
+ logp=3.5,
334
+ h_bond_donors=2,
335
+ h_bond_acceptors=6,
336
+ ),
337
+
338
+ excipient_profile=ExcipientProfile(
339
+ name=BilingualText(cn="无水磷酸氢钙", en="DCP Anhydrous"),
340
+ formula="CaHPO₄",
341
+ key_properties=[
342
+ "微环境pH约为6.5-7.5",
343
+ "低吸湿性(<1% at 90% RH)",
344
+ "适合直接压片工艺",
345
+ ],
346
+ impurity_profile=ImpurityProfile(
347
+ fe_ppm=10.58,
348
+ mn_ppm=1.18,
349
+ ),
350
+ ),
351
+
352
+ interactions=[
353
+ InteractionMechanism(
354
+ reaction_type=BilingualText(cn="美拉德反应", en="Maillard Reaction"),
355
+ risk_level=RiskLevel.NONE,
356
+ mechanism_analysis="DCP不含还原糖或醛基,不具备美拉德反应条件。",
357
+ expert_notes="无需担心此反应途径",
358
+ confidence=ConfidenceLevel.HIGH,
359
+ ),
360
+ InteractionMechanism(
361
+ reaction_type=BilingualText(cn="氧化反应", en="Oxidation"),
362
+ risk_level=RiskLevel.MEDIUM,
363
+ mechanism_analysis="API含硫醚基团(-S-)易富电子,DCP中的微量金属离子(Fe²⁺, Cu²⁺)可在固态下充当催化剂,通过电子转移机制加速硫醚氧化为亚砜。",
364
+ expert_notes="需关注DCP批次中金属离子含量,建议选择Low Metal Grade规格",
365
+ confidence=ConfidenceLevel.MEDIUM,
366
+ ),
367
+ InteractionMechanism(
368
+ reaction_type=BilingualText(cn="酸碱反应", en="Acid-Base"),
369
+ risk_level=RiskLevel.LOW,
370
+ mechanism_analysis="API为碱性,处于DCP微环境pH中性(6.5-7.5)时稳定。",
371
+ expert_notes="两者酸碱性质相容,但需控制制剂微环境",
372
+ confidence=ConfidenceLevel.HIGH,
373
+ ),
374
+ InteractionMechanism(
375
+ reaction_type=BilingualText(cn="吸附作用", en="Adsorption"),
376
+ risk_level=RiskLevel.LOW,
377
+ mechanism_analysis="DCP比表面积较小,对药物的吸附能力有限。",
378
+ expert_notes="常规制剂工艺下影响可控",
379
+ confidence=ConfidenceLevel.MEDIUM,
380
+ ),
381
+ ],
382
+
383
+ formulation_strategies=[
384
+ FormulationStrategy(
385
+ title="辅料选择优化",
386
+ description="鉴于-S-的氧化敏感性,建议采购\"Low Metal Grade\"(低金属级)的无水磷酸氢钙。",
387
+ ),
388
+ FormulationStrategy(
389
+ title="稳定剂添加",
390
+ description="建议在处方筛选中考察0.05%-0.1% EDTA二钠(作为金属离子螯合剂)对相关杂质增长的控制效果。",
391
+ ),
392
+ FormulationStrategy(
393
+ title="工艺考量",
394
+ description="该API结构较大,建议应用DCP无水物进行Direct Compression(直接压片工艺),避免湿法制粒过程因API的碱性导致凝胶与酯类辅料发生API与酶或酸碱相关的副反应。",
395
+ ),
396
+ ],
397
+
398
+ assumptions=[
399
+ "分析基于SMILES结构推断",
400
+ "假设正常制剂工艺条件",
401
+ ],
402
+
403
+ limitations=[
404
+ "具体批次数据需COA确认",
405
+ "相容性���论需稳定性试验(Stress Testing)验证",
406
+ ],
407
+ )
408
+
409
+ generator = ReportGenerator()
410
+ return generator.generate_html(result)