Spaces:

Kevinshh
/

Preformu

Sleeping

App Files Files Community

Kevinshh commited on Jan 2

Commit

214ccb1

verified ·

1 Parent(s): b328949

Upload professional_analyzer.py

Browse files

Files changed (1) hide show

layers/professional_analyzer.py +1126 -0

layers/professional_analyzer.py ADDED Viewed

	@@ -0,0 +1,1126 @@

+"""
+Professional Compatibility Analyzer.
+This module implements the dual-phase analysis approach:
+Phase 1: Deep chemical reasoning (structure-bound analysis)
+Phase 2: Professional report writing (CMC documentation style)
+This replaces the simple template-filling approach with genuine
+expert-level pharmaceutical analysis.
+"""
+import asyncio
+from typing import Optional, Dict, Any, Tuple
+from datetime import datetime
+import os
+from prompts.professional_prompts import ProfessionalPrompts
+from layers.model_invoker import ModelInvoker
+from utils.molecule_renderer import MoleculeRenderer
+from utils.data_parser import DataParser
+from prompts.stability_modeling import StabilityModelingPrompts
+class ProfessionalAnalyzer:
+    """
+    Dual-phase professional compatibility analyzer.
+    Phase 1: Deep reasoning - Chemical mechanism analysis
+    Phase 2: Report writing - CMC documentation formatting
+    """
+    def __init__(self, model_invoker: Optional[ModelInvoker] = None):
+        """
+        Initialize the analyzer.
+        Args:
+            model_invoker: Model invoker instance. Creates new one if not provided.
+        """
+        self.model_invoker = model_invoker or ModelInvoker()
+        self.renderer = MoleculeRenderer()
+    def analyze_stability_advanced(
+        self,
+        goal: str,
+        file_paths: list,
+        api_info: str = "",
+        excipient_info: str = "",
+        progress_callback=None,
+    ) -> Dict[str, Any]:
+        """
+        Advanced stability analysis with scientific modeling (4-Phase Pipeline).
+        This method implements quantitative prediction based on:
+        - Degradation kinetics modeling (Zero-order/First-order)
+        - Arrhenius acceleration factor
+        - Statistical inference (R², SE, 95% CI)
+        - Trend transfer across formulations
+        Args:
+            goal: User's analysis goal
+            file_paths: List of file paths to analyze
+            api_info: Optional API background info
+            excipient_info: Optional excipient info
+            progress_callback: Progress callback
+        Returns:
+            Result dictionary with quantitative predictions
+        """
+        result = {
+            "success": False,
+            "error": None,
+            "report_output": "",
+            "phase1_data": "",
+            "phase2_kinetics": "",
+            "phase3_arrhenius": "",
+            "phase4_predictions": "",
+            "report_id": self._generate_report_id(),
+            "date": datetime.now().strftime("%Y-%m-%d"),
+        }
+        try:
+            # Parse Data Files
+            if progress_callback:
+                progress_callback(0.05, "正在解析数据文件...")
+            data_contents = []
+            for fp in file_paths:
+                content = DataParser.parse_file(fp)
+                data_contents.append(f"--- File: {os.path.basename(fp)} ---\n{content}\n")
+            full_data_content = "\n".join(data_contents)
+            if not full_data_content.strip():
+                result["error"] = "未能从上传文件中解析出有效数据"
+                return result
+            # ==================================================================
+            # Phase 1: Data Extraction & Validation
+            # ==================================================================
+            if progress_callback:
+                progress_callback(0.15, "Phase 1/4: 数据提取与验证...")
+            sys1, usr1 = StabilityModelingPrompts.get_phase1_prompt(
+                document_content=full_data_content,
+                analysis_goal=goal
+            )
+            resp1 = self.model_invoker.invoke(sys1, usr1, temperature=0.2)
+            if not resp1.success:
+                result["error"] = f"Phase 1 failed: {resp1.error}"
+                return result
+            result["phase1_data"] = resp1.content
+            # ==================================================================
+            # Phase 2: Kinetics Modeling
+            # ==================================================================
+            if progress_callback:
+                progress_callback(0.35, "Phase 2/4: 动力学建模 (计算k, R²)...")
+            sys2, usr2 = StabilityModelingPrompts.get_phase2_prompt(
+                extracted_data=resp1.content,
+                analysis_goal=goal
+            )
+            resp2 = self.model_invoker.invoke(sys2, usr2, temperature=0.1)
+            if not resp2.success:
+                result["error"] = f"Phase 2 failed: {resp2.error}"
+                return result
+            result["phase2_kinetics"] = resp2.content
+            # ==================================================================
+            # Phase 3: Arrhenius \u0026 Trend Transfer
+            # ==================================================================
+            if progress_callback:
+                progress_callback(0.60, "Phase 3/4: Arrhenius 计算与趋势迁移...")
+            sys3, usr3 = StabilityModelingPrompts.get_phase3_prompt(
+                kinetics_results=resp2.content,
+                analysis_goal=goal
+            )
+            resp3 = self.model_invoker.invoke(sys3, usr3, temperature=0.1)
+            if not resp3.success:
+                result["error"] = f"Phase 3 failed: {resp3.error}"
+                return result
+            result["phase3_arrhenius"] = resp3.content
+            # ==================================================================
+            # Phase 4: Prediction \u0026 Uncertainty
+            # ==================================================================
+            if progress_callback:
+                progress_callback(0.85, "Phase 4/4: 定量预测与不确定性评估...")
+            sys4, usr4 = StabilityModelingPrompts.get_phase4_prompt(
+                arrhenius_results=resp3.content,
+                original_data=resp1.content,  # Pass extracted data for visualization
+                analysis_goal=goal
+            )
+            resp4 = self.model_invoker.invoke(sys4, usr4, temperature=0.1)
+            if not resp4.success:
+                result["error"] = f"Phase 4 failed: {resp4.error}"
+                # Fallback to Phase 3 results
+                result["report_output"] = resp3.content
+            else:
+                result["phase4_predictions"] = resp4.content
+                result["report_output"] = resp4.content
+                result["success"] = True
+            if progress_callback:
+                progress_callback(1.0, "科学建模完成!")
+        except Exception as e:
+            result["error"] = str(e)
+            import traceback
+            traceback.print_exc()
+        return result
+    def format_stability_report(
+        self,
+        analysis_result: Dict[str, Any],
+        goal: str,
+    ) -> str:
+        """
+        Format the stability analysis result into HTML.
+        Args:
+            analysis_result: Result from analyze_stability() or analyze_stability_advanced()
+            goal: Analysis goal
+        Returns:
+            HTML string
+        """
+        import re
+        # Clean the report output
+        report_text = analysis_result.get("report_output", "")
+        # Basic markdown cleaning
+        report_text = re.sub(r'\*\*([^*]+)\*\*', r'<b>\1</b>', report_text)
+        report_text = re.sub(r'#{1,6}\s*(.*)', r'<h3>\1</h3>', report_text)
+        # Convert newlines to paragraphs
+        paras = [p.strip() for p in report_text.split('\n') if p.strip()]
+        content_html = ""
+        for p in paras:
+            if p.startswith('<h3>'):
+               content_html += p
+            else:
+               content_html += f'<p>{p}</p>'
+        html = f'''<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <style>
+        body {{ font-family: 'Segoe UI', Arial, sans-serif; line-height: 1.6; color: #333; }}
+        .header {{ border-bottom: 2px solid #003366; padding-bottom: 10px; margin-bottom: 20px; }}
+        .title {{ font-size: 24px; color: #003366; font-weight: bold; }}
+        .meta {{ background: #f5f7fa; padding: 10px; border-radius: 4px; font-size: 12px; margin-bottom: 20px; }}
+        .content {{ background: white; padding: 20px; border: 1px solid #e0e4e8; border-radius: 8px; }}
+        h3 {{ color: #0066cc; margin-top: 20px; border-bottom: 1px solid #eee; padding-bottom: 5px; }}
+        p {{ margin-bottom: 10px; text-align: justify; }}
+    </style>
+</head>
+<body>
+    <div class="header">
+        <div class="title">药物稳定性研究报告</div>
+        <div style="color:#666;font-size:14px;">Pharmaceutical Stability Analysis Report</div>
+    </div>
+    <div class="meta">
+        <b>REPORT ID:</b> {analysis_result.get("report_id", "N/A")} |
+        <b>DATE:</b> {analysis_result.get("date", "N/A")} |
+        <b>GOAL:</b> {goal}
+    </div>
+    <div class="content">
+        {content_html}
+    </div>
+    <div style="margin-top:20px;font-size:10px;color:#999;text-align:center;">
+        Generated by Drug Stability Skill | AI-Assisted Data Analysis
+    </div>
+</body>
+</html>'''
+        return html
+    def analyze_stability(
+        self,
+        goal: str,
+        file_paths: list,
+        api_info: str = "",
+        excipient_info: str = "",
+        progress_callback=None,
+    ) -> Dict[str, Any]:
+        """
+        Run the stability data analysis pipeline.
+        Args:
+            goal: User's analysis goal
+            file_paths: List of file paths to analyze
+            api_info: Optional API background info
+            excipient_info: Optional excipient info
+            progress_callback: Progress callback
+        Returns:
+            Result dictionary
+        """
+        result = {
+            "success": False,
+            "error": None,
+            "report_output": "",
+            "analysis_summary": "",
+            "data_content": "",
+            "report_id": self._generate_report_id(),
+            "date": datetime.now().strftime("%Y-%m-%d"),
+        }
+        try:
+            # Step 1: Parse Data Files
+            if progress_callback:
+                progress_callback(0.1, "正在解析上传的数据文件...")
+            data_contents = []
+            for fp in file_paths:
+                content = DataParser.parse_file(fp)
+                data_contents.append(f"--- File: {fp.split(os.sep)[-1]} ---\n{content}\n")
+            full_data_content = "\n".join(data_contents)
+            result["data_content"] = full_data_content
+            if not full_data_content.strip():
+                result["error"] = "未能从上传文件中解析出有效数据"
+                return result
+            # Step 2: Phase 1 - Statistical Analysis
+            if progress_callback:
+                progress_callback(0.3, "Phase 1: 统计评估与趋势分析...")
+            stat_system, stat_user = ProfessionalPrompts.get_stability_data_prompt(
+                goal=goal,
+                data_content=full_data_content,
+                api_info=api_info,
+                excipient_info=excipient_info
+            )
+            stat_response = self.model_invoker.invoke(stat_system, stat_user, temperature=0.3)
+            if not stat_response.success:
+                result["error"] = f"Statistical analysis failed: {stat_response.error}"
+                return result
+            result["analysis_summary"] = stat_response.content
+            # Step 3: Phase 2 - Report Generation
+            if progress_callback:
+                progress_callback(0.7, "Phase 2: 撰写稳定性总结报告...")
+            # Use a snippet of data for context (first 2000 chars)
+            data_snippet = full_data_content[:2000] + "..." if len(full_data_content) > 2000 else full_data_content
+            rep_system, rep_user = ProfessionalPrompts.get_stability_report_prompt(
+                goal=goal,
+                analysis_summary=stat_response.content,
+                data_snippet=data_snippet
+            )
+            rep_response = self.model_invoker.invoke(rep_system, rep_user, temperature=0.2)
+            if not rep_response.success:
+                result["report_output"] = stat_response.content  # Fallback
+                result["error"] = f"Report generation failed: {rep_response.error}"
+            else:
+                result["report_output"] = rep_response.content
+                result["success"] = True
+            if progress_callback:
+                progress_callback(1.0, "分析完成!")
+        except Exception as e:
+            result["error"] = str(e)
+            import traceback
+            traceback.print_exc()
+        return result
+    def analyze(
+        self,
+        smiles: str,
+        excipient_name: str,
+        api_name: Optional[str] = None,
+        excipient_type: str = "填充剂",
+        excipient_properties: str = "",
+        progress_callback=None,
+    ) -> Dict[str, Any]:
+        """
+        Run the full dual-phase analysis.
+        Args:
+            smiles: API SMILES notation
+            excipient_name: Excipient name
+            api_name: Optional API name (uses SMILES if not provided)
+            excipient_type: Type of excipient (filler, binder, etc.)
+            excipient_properties: Known properties of the excipient
+            progress_callback: Optional callback for progress updates
+        Returns:
+            Dictionary containing:
+            - reasoning_output: Raw reasoning from Phase 1
+            - report_output: Formatted report from Phase 2
+            - structure_image: Data URI for molecular structure
+            - properties: Calculated molecular properties
+            - functional_groups: Identified reactive groups
+            - report_id: Generated report ID
+            - date: Analysis date
+        """
+        result = {
+            "success": False,
+            "error": None,
+            "smiles": smiles,  # Save SMILES for HTML report
+            "reasoning_output": "",
+            "report_output": "",
+            "structure_image": None,
+            "properties": {},
+            "functional_groups": [],
+            "functional_groups_summary": "",
+            "report_id": self._generate_report_id(),
+            "date": datetime.now().strftime("%Y-%m-%d"),
+        }
+        api_name = api_name or smiles[:50]
+        try:
+            # Step 1: Molecular analysis with RDKit
+            if progress_callback:
+                progress_callback(0.1, "正在解析分子结构...")
+            props = self.renderer.calculate_properties(smiles)
+            if props:
+                result["properties"] = props
+            else:
+                # Use placeholder values if RDKit not available
+                props = {"molecular_weight": 0, "logp": 0, "hbd": 0, "hba": 0}
+            # Step 2: Identify functional groups
+            if progress_callback:
+                progress_callback(0.15, "正在识别反应活性基团...")
+            functional_groups = self.renderer.identify_functional_groups(smiles)
+            result["functional_groups"] = functional_groups
+            groups_summary = self.renderer.get_functional_groups_summary(smiles)
+            result["functional_groups_summary"] = groups_summary
+            # Step 3: Generate structure image
+            if progress_callback:
+                progress_callback(0.2, "正在生成分子结构图...")
+            structure_image = self.renderer.get_data_uri(smiles)
+            result["structure_image"] = structure_image
+            # Step 4: Phase 1 - Deep Reasoning
+            if progress_callback:
+                progress_callback(0.3, "Phase 1: 深度机理推理中...")
+            reasoning_system, reasoning_user = ProfessionalPrompts.get_reasoning_prompt(
+                smiles=smiles,
+                functional_groups=groups_summary,
+                mw=props.get("molecular_weight", 0),
+                logp=props.get("logp", 0),
+                hbd=props.get("hbd", 0),
+                hba=props.get("hba", 0),
+                excipient_name=excipient_name,
+                excipient_type=excipient_type,
+                excipient_properties=excipient_properties,
+            )
+            reasoning_response = self.model_invoker.invoke(
+                reasoning_system,
+                reasoning_user,
+                temperature=0.3,
+            )
+            if not reasoning_response.success:
+                result["error"] = f"Phase 1 failed: {reasoning_response.error}"
+                return result
+            result["reasoning_output"] = reasoning_response.content
+            # Step 5: Phase 2 - Professional Report Writing
+            if progress_callback:
+                progress_callback(0.7, "Phase 2: 专业报告撰写中...")
+            writing_system, writing_user = ProfessionalPrompts.get_writing_prompt(
+                api_name=api_name,
+                excipient_name=excipient_name,
+                report_id=result["report_id"],
+                date=result["date"],
+                reasoning_content=reasoning_response.content,
+            )
+            writing_response = self.model_invoker.invoke(
+                writing_system,
+                writing_user,
+                temperature=0.2,  # Lower temperature for more consistent formatting
+            )
+            if not writing_response.success:
+                # Fall back to reasoning output if writing phase fails
+                result["report_output"] = reasoning_response.content
+                result["error"] = f"Phase 2 failed (using raw reasoning): {writing_response.error}"
+            else:
+                result["report_output"] = writing_response.content
+            if progress_callback:
+                progress_callback(1.0, "分析完成!")
+            result["success"] = True
+        except Exception as e:
+            result["error"] = str(e)
+            import traceback
+            traceback.print_exc()
+        return result
+    def _generate_report_id(self) -> str:
+        """Generate a unique report ID."""
+        import random
+        timestamp = datetime.now().strftime("%Y%m%d")
+        seq = f"{random.randint(100, 999)}"
+        return f"PRE-{timestamp}-{seq}"
+    def format_html_report(
+        self,
+        analysis_result: Dict[str, Any],
+        api_name: str,
+        excipient_name: str,
+    ) -> str:
+        """
+        Format the analysis result into a professional HTML report.
+        This creates a clean, professional report suitable for pharmaceutical R&D.
+        Incorporates QbD visualization principles.
+        Args:
+            analysis_result: Result from analyze() method
+            api_name: API name
+            excipient_name: Excipient name
+        Returns:
+            HTML string
+        """
+        import re
+        # Clean the report output - remove any residual Markdown
+        report_text = analysis_result.get("report_output", "")
+        report_text = self._clean_markdown(report_text)
+        # Parse report sections for better formatting
+        sections_html = self._parse_report_sections_qbd(report_text)
+        # Build structure section
+        structure_img = analysis_result.get("structure_image", "")
+        smiles = analysis_result.get("smiles", api_name)
+        # Build functional groups HTML with reactions
+        groups = analysis_result.get("functional_groups", [])
+        groups_html = self._build_functional_groups_html(groups)
+        # Build properties table with enhanced info
+        props = analysis_result.get("properties", {})
+        props_html = self._build_properties_html(props)
+        # Build Risk Assessment Matrix (New QbD Component)
+        risk_matrix_html = self._build_risk_matrix(groups)
+        html = f'''<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>药物-辅料相容性评估报告 | {api_name}</title>
+    <style>
+        :root {{
+            --primary-color: #003366; /* Deep pharmaceutical blue */
+            --secondary-color: #0066cc;
+            --accent-color: #0099ff;
+            --text-primary: #333333;
+            --text-secondary: #666666;
+            --bg-light: #f5f7fa;
+            --border-color: #e0e4e8;
+            --risk-none: #28a745;
+            --risk-low: #17a2b8;
+            --risk-medium: #ffc107;
+            --risk-high: #dc3545;
+        }}
+        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+        body {{
+            font-family: 'Segoe UI', 'Microsoft YaHei', Arial, sans-serif;
+            line-height: 1.6;
+            color: var(--text-primary);
+            background: #ffffff;
+            font-size: 14px; /* Optimized for A4 reading */
+        }}
+        /* A4 Page Setup mimicking proper PDF layout */
+        .report-page {{
+            width: 210mm;
+            min-height: 297mm;
+            margin: 0 auto;
+            background: white;
+            padding: 20mm;
+            box-shadow: 0 0 10px rgba(0,0,0,0.1);
+        }}
+        .report-header {{
+            border-bottom: 3px solid var(--primary-color);
+            padding-bottom: 15px;
+            margin-bottom: 25px;
+        }}
+        .report-title {{
+            font-size: 24px;
+            font-weight: bold;
+            color: var(--primary-color);
+            text-transform: uppercase;
+            letter-spacing: 1px;
+        }}
+        .report-subtitle {{
+            font-size: 14px;
+            color: var(--text-secondary);
+            margin-top: 5px;
+            font-weight: 500;
+        }}
+        .report-meta-grid {{
+            display: grid;
+            grid-template-columns: repeat(4, 1fr);
+            gap: 15px;
+            margin-top: 15px;
+            background: var(--bg-light);
+            padding: 10px;
+            border-radius: 4px;
+            font-size: 12px;
+        }}
+        .meta-label {{ font-weight: bold; color: var(--secondary-color); display: block; }}
+        /* Section Styling */
+        .section {{ margin-bottom: 25px; page-break-inside: avoid; }}
+        .section-title {{
+            font-size: 16px;
+            font-weight: bold;
+            color: var(--primary-color);
+            margin-bottom: 15px;
+            padding-bottom: 5px;
+            border-bottom: 2px solid var(--border-color);
+            display: flex;
+            align-items: center;
+        }}
+        .section-number {{
+            background: var(--primary-color);
+            color: white;
+            width: 24px;
+            height: 24px;
+            border-radius: 50%;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            margin-right: 10px;
+            font-size: 12px;
+        }}
+        /* QbD Components */
+        .qbd-grid {{
+            display: grid;
+            grid-template-columns: 2fr 1fr;
+            gap: 20px;
+        }}
+        .structure-card {{
+            border: 1px solid var(--border-color);
+            border-radius: 8px;
+            padding: 15px;
+            text-align: center;
+            background: white;
+        }}
+        .structure-image {{ max-width: 100%; max-height: 180px; object-fit: contain; }}
+        .risk-matrix-container {{
+            border: 1px solid var(--border-color);
+            border-radius: 8px;
+            padding: 15px;
+            background: white;
+        }}
+        .risk-badge {{
+            display: inline-block;
+            padding: 3px 8px;
+            border-radius: 4px;
+            font-size: 11px;
+            font-weight: bold;
+            color: white;
+            margin-right: 5px;
+        }}
+        .bg-high {{ background-color: var(--risk-high); }}
+        .bg-medium {{ background-color: var(--risk-medium); text-color: #333; }}
+        .bg-low {{ background-color: var(--risk-low); }}
+        .bg-none {{ background-color: var(--risk-none); }}
+        /* Action Checklist Style for Control Strategy */
+        .action-list {{ list-style: none; }}
+        .action-item {{
+            margin-bottom: 8px;
+            padding: 8px 12px;
+            border-left: 3px solid transparent;
+            background: #fcfcfc;
+            border: 1px solid #eee;
+            border-radius: 4px;
+        }}
+        .action-must {{ border-left-color: var(--risk-high); background: #fff5f5; }}
+        .action-suggest {{ border-left-color: var(--risk-medium); background: #fffbf0; }}
+        .action-check {{ border-left-color: var(--risk-low); background: #f0f8ff; }}
+        .highlight-tag {{
+            font-size: 10px;
+            text-transform: uppercase;
+            padding: 2px 4px;
+            border-radius: 2px;
+            margin-right: 8px;
+            color: white;
+            font-weight: bold;
+        }}
+        /* Print Optimization */
+        @media print {{
+            body {{ background: white; }}
+            .report-page {{
+                width: 100%;
+                margin: 0;
+                padding: 0;
+                box-shadow: none;
+            }}
+            .section {{ break-inside: avoid; }}
+            h2 {{ break-after: avoid; }}
+        }}
+        .properties-table, .risk-table {{ width: 100%; border-collapse: collapse; font-size: 12px; }}
+        .properties-table th {{ text-align: left; color: var(--text-secondary); width: 40%; padding: 6px; }}
+        .properties-table td {{ padding: 6px; font-weight: 500; }}
+        /* Functional Group Cards - QbD Style */
+        .group-card {{
+            background: #f8f9fa;
+            border: 1px solid var(--border-color);
+            border-radius: 6px;
+            padding: 12px;
+            margin-bottom: 10px;
+        }}
+        .group-header {{
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 5px;
+        }}
+        .group-name {{
+            font-weight: 600;
+            color: var(--primary-color);
+            font-size: 13px;
+        }}
+        .group-property {{
+            font-size: 10px;
+            padding: 2px 6px;
+            border-radius: 3px;
+            font-weight: 500;
+        }}
+        .property-acidic {{ background: #ffe0e0; color: #c62828; }}
+        .property-basic {{ background: #e0f0ff; color: #1565c0; }}
+        .property-neutral {{ background: #e8e8e8; color: #666; }}
+        /* Reaction Tags - Overflow Prevention */
+        .group-reactions {{
+            display: flex;
+            flex-wrap: wrap;
+            gap: 4px;
+            margin-top: 8px;
+        }}
+        .reaction-tag {{
+            display: inline-block;
+            background: var(--secondary-color);
+            color: white;
+            padding: 2px 8px;
+            border-radius: 3px;
+            font-size: 10px;
+            white-space: nowrap;
+        }}
+        /* Section Content - Prevent Overflow */
+        .section-content {{
+            word-wrap: break-word;
+            overflow-wrap: break-word;
+        }}
+        p {{ margin-bottom: 10px; text-align: justify; word-wrap: break-word; }}
+    </style>
+</head>
+<body>
+    <div class="report-page">
+        <header class="report-header">
+            <h1 class="report-title">药物-辅料相容性评估报告</h1>
+            <p class="report-subtitle">Pharmaceutical Product Development - Compatibility Assessment Report</p>
+            <div class="report-meta-grid">
+                <div><span class="meta-label">REPORT ID</span>{analysis_result["report_id"]}</div>
+                <div><span class="meta-label">DATE</span>{analysis_result["date"]}</div>
+                <div><span class="meta-label">API</span>{api_name}</div>
+                <div><span class="meta-label">EXCIPIENT</span>{excipient_name}</div>
+            </div>
+        </header>
+        <!-- Section 1: Molecule & Risks (QbD Visuals) -->
+        <section class="section">
+            <div class="section-title">
+                <span class="section-number">01</span>
+                API结构特征与关键质量属性 (CQA Analysis)
+            </div>
+            <div class="qbd-grid">
+                <!-- Left: Structure & Properties -->
+                <div class="structure-card">
+                    {f'<img src="{structure_img}" alt="Molecular Structure" class="structure-image">' if structure_img else '<div style="padding:40px;color:#999;">Structure Generating...</div>'}
+                    <div style="font-family:monospace;font-size:10px;color:#666;margin-top:10px;word-break:break-all;">{smiles}</div>
+                    <div style="margin-top:15px;text-align:left;">
+                        {props_html}
+                    </div>
+                </div>
+                <!-- Right: Risk Matrix Summary -->
+                <div class="risk-matrix-container">
+                    <h4 style="color:var(--primary-color);margin-bottom:10px;border-bottom:1px solid #eee;padding-bottom:5px;">反应活性概览</h4>
+                    {groups_html}
+                    {risk_matrix_html}
+                </div>
+            </div>
+        </section>
+        <!-- Section 2: Excipient Analysis -->
+        {sections_html}
+        <footer style="margin-top:40px;border-top:1px solid #eee;padding-top:10px;font-size:10px;color:#999;text-align:center;">
+            Confidential - Pharmaceutical R&D Use Only | Generated by Drug Stability Skill
+        </footer>
+    </div>
+</body>
+</html>'''
+        return html
+    def _build_functional_groups_html(self, groups: list) -> str:
+        """Build HTML for functional groups with proper reaction tags."""
+        if not groups:
+            return '<div class="group-card"><div class="group-name">未检测到特征官能团</div></div>'
+        html_parts = []
+        for g in groups:
+            # Determine property class
+            prop_type = g.get("property_type", "中性")
+            if "酸" in prop_type:
+                prop_class = "property-acidic"
+            elif "碱" in prop_type:
+                prop_class = "property-basic"
+            else:
+                prop_class = "property-neutral"
+            # Build reaction tags
+            reactions = g.get("potential_reactions", [])
+            if reactions:
+                reactions_html = "".join([
+                    f'<span class="reaction-tag">{r}</span>' for r in reactions
+                ])
+            else:
+                reactions_html = '<span style="color:#999;font-size:11px;">暂无特定反应风险</span>'
+            count_badge = f' ×{g["count"]}' if g.get("count", 1) > 1 else ""
+            html_parts.append(f'''
+            <div class="group-card">
+                <div class="group-header">
+                    <span class="group-name">{g["name_cn"]}{count_badge}</span>
+                    <span class="group-property {prop_class}">{prop_type}</span>
+                </div>
+                <div style="font-size:12px;color:#666;margin-bottom:8px;">({g["name_en"]})</div>
+                <div class="group-reactions">{reactions_html}</div>
+            </div>
+            ''')
+        return "".join(html_parts)
+    def _build_properties_html(self, props: dict) -> str:
+        """Build enhanced properties table HTML."""
+        mw = props.get("molecular_weight", "-")
+        logp = props.get("logp", "-")
+        hbd = props.get("hbd", "-")
+        hba = props.get("hba", "-")
+        tpsa = props.get("tpsa", "-")
+        # Add lipophilicity assessment
+        lipophilicity = "-"
+        if isinstance(logp, (int, float)):
+            if logp < 1:
+                lipophilicity = "亲水性"
+            elif logp < 3:
+                lipophilicity = "中等亲脂性"
+            elif logp < 5:
+                lipophilicity = "亲脂性"
+            else:
+                lipophilicity = "高亲脂性"
+        return f'''
+        <table class="properties-table">
+            <tr><th>分子量 (MW)</th><td>{mw} g/mol</td></tr>
+            <tr><th>LogP</th><td>{logp} ({lipophilicity})</td></tr>
+            <tr><th>氢键供体 (HBD)</th><td>{hbd}</td></tr>
+            <tr><th>氢键受体 (HBA)</th><td>{hba}</td></tr>
+            <tr><th>TPSA</th><td>{tpsa} Å²</td></tr>
+        </table>
+        '''
+    def _parse_report_sections_qbd(self, text: str) -> str:
+        """Parse report text into QbD-styled sections with action checklists."""
+        import re
+        sections = re.split(r'\n(?=[一二三四五六七八九十]、)', text)
+        html_parts = []
+        for section in sections:
+            section = section.strip()
+            if not section: continue
+            lines = section.split('\n', 1)
+            title_raw = lines[0].strip()
+            content = lines[1].strip() if len(lines) > 1 else ""
+            # Extract section number (Chinese)
+            match = re.match(r'([一二三四��六七八九十])、(.*)', title_raw)
+            if match:
+                sec_num_cn = match.group(1)
+                title = match.group(2)
+                # Map Chinese numeral to int for display
+                cn_map = {'一':1, '二':2, '三':3, '四':4, '五':5, '六':6}
+                sec_num = f"{cn_map.get(sec_num_cn, 0):02d}"
+            else:
+                sec_num = "00"
+                title = title_raw
+            # Special handling for "Control Strategy" section
+            if "控制" in title or "策略" in title or "建议" in title:
+                content_html = self._format_control_strategy(content)
+            else:
+                # Normal paragraph formatting
+                paras = [p.strip() for p in content.split('\n') if p.strip()]
+                content_html = ""
+                for p in paras:
+                    if re.match(r'^[0-9]+\.[0-9]+', p):
+                         content_html += f'<h4 style="color:var(--secondary-color);margin:12px 0 5px 0;font-size:13px;">{p}</h4>'
+                    else:
+                        content_html += f'<p>{p}</p>'
+            html_parts.append(f'''
+            <section class="section">
+                <div class="section-title">
+                    <span class="section-number">{sec_num}</span>
+                    {title}
+                </div>
+                <div class="section-content">
+                    {content_html}
+                </div>
+            </section>
+            ''')
+        return "".join(html_parts)
+    def _format_control_strategy(self, text: str) -> str:
+        """Format control strategy as an action checklist."""
+        lines = [l.strip() for l in text.split('\n') if l.strip()]
+        html = '<ul class="action-list">'
+        for line in lines:
+            if line.startswith('[必须]'):
+                cls = 'action-must'
+                tag = '<span class="highlight-tag bg-high">必须 (Critical)</span>'
+                content = line.replace('[必须]', '').strip()
+            elif line.startswith('[建议]'):
+                cls = 'action-suggest'
+                tag = '<span class="highlight-tag bg-medium">建议 (Recommended)</span>'
+                content = line.replace('[建议]', '').strip()
+            elif line.startswith('[考察]'):
+                cls = 'action-check'
+                tag = '<span class="highlight-tag bg-low">考察 (Investigation)</span>'
+                content = line.replace('[考察]', '').strip()
+            else:
+                cls = 'action-item'
+                tag = ''
+                content = line
+            html += f'<li class="action-item {cls}">{tag}{content}</li>'
+        html += '</ul>'
+        return html
+    def _build_risk_matrix(self, groups: list) -> str:
+        """Build a comprehensive QbD-style visual risk assessment matrix."""
+        # Categorize risks by type
+        risk_summary = {
+            "oxidation": {"count": 0, "severity": "medium", "label": "氧化风险"},
+            "hydrolysis": {"count": 0, "severity": "medium", "label": "水解风险"},
+            "maillard": {"count": 0, "severity": "high", "label": "美拉德反应"},
+            "acid_base": {"count": 0, "severity": "low", "label": "酸碱反应"},
+            "adsorption": {"count": 0, "severity": "low", "label": "吸附作用"},
+        }
+        for g in groups:
+            for r in g.get("potential_reactions", []):
+                r_lower = r.lower()
+                if "氧化" in r or "oxidation" in r_lower:
+                    risk_summary["oxidation"]["count"] += 1
+                if "水解" in r or "hydrolysis" in r_lower:
+                    risk_summary["hydrolysis"]["count"] += 1
+                if "美拉德" in r or "maillard" in r_lower or "schiff" in r_lower:
+                    risk_summary["maillard"]["count"] += 1
+                if "酸" in r or "碱" in r or "acid" in r_lower or "base" in r_lower:
+                    risk_summary["acid_base"]["count"] += 1
+                if "吸附" in r or "adsorption" in r_lower:
+                    risk_summary["adsorption"]["count"] += 1
+        # Build risk bars
+        risk_bars_html = ""
+        for key, data in risk_summary.items():
+            if data["count"] > 0:
+                # Determine color based on severity
+                if data["severity"] == "high":
+                    color = "var(--risk-high)"
+                    width = min(data["count"] * 40, 100)
+                elif data["severity"] == "medium":
+                    color = "var(--risk-medium)"
+                    width = min(data["count"] * 30, 100)
+                else:
+                    color = "var(--risk-low)"
+                    width = min(data["count"] * 20, 100)
+                risk_bars_html += f'''
+                <div style="margin-bottom:8px;">
+                    <div style="display:flex;justify-content:space-between;font-size:11px;margin-bottom:3px;">
+                        <span>{data["label"]}</span>
+                        <span style="color:{color};font-weight:bold;">×{data["count"]}</span>
+                    </div>
+                    <div style="background:#eee;height:6px;border-radius:3px;overflow:hidden;">
+                        <div style="width:{width}%;background:{color};height:100%;"></div>
+                    </div>
+                </div>
+                '''
+        # Calculate overall risk score
+        total_risks = sum(d["count"] for d in risk_summary.values())
+        high_risk_count = sum(d["count"] for d in risk_summary.values() if d["severity"] == "high" and d["count"] > 0)
+        if high_risk_count > 0:
+            overall_risk = "HIGH"
+            risk_color = "var(--risk-high)"
+            risk_icon = "⚠️"
+        elif total_risks > 2:
+            overall_risk = "MEDIUM"
+            risk_color = "var(--risk-medium)"
+            risk_icon = "⚡"
+        elif total_risks > 0:
+            overall_risk = "LOW"
+            risk_color = "var(--risk-low)"
+            risk_icon = "✓"
+        else:
+            overall_risk = "MINIMAL"
+            risk_color = "var(--risk-none)"
+            risk_icon = "✓"
+        return f'''
+        <div style="margin-top:15px;">
+            <div style="background:{risk_color};color:white;padding:8px 12px;border-radius:4px;margin-bottom:12px;text-align:center;">
+                <span style="font-size:16px;">{risk_icon}</span>
+                <span style="font-weight:bold;margin-left:5px;">综合风险等级: {overall_risk}</span>
+            </div>
+            <div style="font-size:11px;color:#666;margin-bottom:10px;">
+                <div style="display:flex;justify-content:space-between;margin-bottom:3px;">
+                    <span>检测到活性基团:</span> <b>{len(groups)}</b>
+                </div>
+                <div style="display:flex;justify-content:space-between;">
+                    <span>潜在反应类型:</span> <b>{total_risks}</b>
+                </div>
+            </div>
+            <div style="border-top:1px solid #eee;padding-top:10px;">
+                <div style="font-size:11px;font-weight:bold;color:var(--primary-color);margin-bottom:8px;">风险分布</div>
+                {risk_bars_html if risk_bars_html else '<div style="color:#999;font-size:11px;">暂无高风险反应</div>'}
+            </div>
+        </div>
+        '''
+    def _clean_markdown(self, text: str) -> str:
+        """Remove any residual Markdown formatting from text."""
+        import re
+        # Remove bold markers (** and __)
+        text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
+        text = re.sub(r'__([^_]+)__', r'\1', text)
+        # Remove italic markers (* and _) - be careful not to break chemistry notation
+        text = re.sub(r'(?<!\w)\*([^*\n]+)\*(?!\w)', r'\1', text)
+        text = re.sub(r'(?<!\w)_([^_\n]+)_(?!\w)', r'\1', text)
+        # Remove strikethrough
+        text = re.sub(r'~~([^~]+)~~', r'\1', text)
+        # Remove headers (but preserve Chinese numeral headers)
+        text = re.sub(r'^#{1,6}\s*', '', text, flags=re.MULTILINE)
+        # Remove list markers (-, *, +) at start of line
+        text = re.sub(r'^\s*[-*+]\s+', '• ', text, flags=re.MULTILINE)
+        # Remove numbered list markers and replace with proper formatting
+        text = re.sub(r'^\s*(\d+)\.\s+', r'\1. ', text, flags=re.MULTILINE)
+        # Remove inline code backticks
+        text = re.sub(r'`([^`]+)`', r'\1', text)
+        # Remove blockquotes
+        text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
+        # Remove horizontal rules
+        text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE)
+        # Remove link formatting [text](url) -> text
+        text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
+        # Remove image formatting ![alt](url)
+        text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text)
+        # Clean up any remaining stray asterisks that are formatting (not chemistry)
+        # This targets isolated asterisks not part of chemical structures
+        text = re.sub(r'(?<![a-zA-Z])\*(?![a-zA-Z*])', '', text)
+        # Clean up multiple newlines
+        text = re.sub(r'\n{3,}', '\n\n', text)
+        # Clean up extra spaces
+        text = re.sub(r'  +', ' ', text)
+        return text.strip()
+# Create singleton instance
+professional_analyzer = ProfessionalAnalyzer()