| """
|
| Molecular Structure Renderer.
|
|
|
| This module provides molecular structure visualization using RDKit:
|
| - 2D structure rendering
|
| - Morgan fingerprint visualization
|
| - Molecular property calculation
|
|
|
| For web-based 3D visualization, we also support 3Dmol.js integration.
|
| """
|
|
|
| import io
|
| import base64
|
| from typing import Optional, Dict, Any, Tuple, List
|
| from dataclasses import dataclass
|
|
|
|
|
| @dataclass
|
| class MoleculeInfo:
|
| """Container for molecular information."""
|
| smiles: str
|
| name: Optional[str] = None
|
|
|
|
|
| molecular_weight: Optional[float] = None
|
| logp: Optional[float] = None
|
| hbd: Optional[int] = None
|
| hba: Optional[int] = None
|
| tpsa: Optional[float] = None
|
| rotatable_bonds: Optional[int] = None
|
|
|
|
|
| structure_2d_svg: Optional[str] = None
|
| structure_2d_png_base64: Optional[str] = None
|
| fingerprint_svg: Optional[str] = None
|
|
|
|
|
| class MoleculeRenderer:
|
| """
|
| Renders molecular structures using RDKit.
|
|
|
| Provides:
|
| - 2D structure images (SVG and PNG)
|
| - Morgan fingerprint bit visualization
|
| - Basic property calculations
|
| """
|
|
|
| def __init__(self):
|
| """Initialize the renderer and check RDKit availability."""
|
| self._rdkit_available = self._check_rdkit()
|
|
|
| def _check_rdkit(self) -> bool:
|
| """Check if RDKit is available."""
|
| try:
|
| from rdkit import Chem
|
| from rdkit.Chem import Draw
|
| return True
|
| except ImportError:
|
| print("Warning: RDKit not installed. Molecular rendering disabled.")
|
| print("Install with: pip install rdkit")
|
| return False
|
|
|
| @property
|
| def is_available(self) -> bool:
|
| """Check if rendering is available."""
|
| return self._rdkit_available
|
|
|
| def parse_smiles(self, smiles: str) -> Optional[Any]:
|
| """
|
| Parse SMILES string to RDKit molecule object with enhanced error handling.
|
|
|
| Handles:
|
| - Standard SMILES parsing
|
| - SMILES with encoding issues (URL encoding, whitespace)
|
| - Complex stereochemistry
|
| - Salts and mixtures
|
|
|
| Args:
|
| smiles: SMILES notation
|
|
|
| Returns:
|
| RDKit Mol object or None if invalid
|
| """
|
| if not self._rdkit_available:
|
| return None
|
|
|
| if not smiles or not smiles.strip():
|
| return None
|
|
|
| from rdkit import Chem
|
|
|
|
|
| clean_smiles = self._normalize_smiles(smiles)
|
|
|
|
|
| mol = Chem.MolFromSmiles(clean_smiles)
|
| if mol is not None:
|
| return mol
|
|
|
|
|
| try:
|
| mol = Chem.MolFromSmiles(clean_smiles, sanitize=False)
|
| if mol is not None:
|
|
|
| try:
|
| Chem.SanitizeMol(mol)
|
| return mol
|
| except:
|
|
|
| pass
|
| except:
|
| pass
|
|
|
|
|
| try:
|
| stripped_smiles = self._strip_stereochemistry(clean_smiles)
|
| mol = Chem.MolFromSmiles(stripped_smiles)
|
| if mol is not None:
|
| return mol
|
| except:
|
| pass
|
|
|
|
|
| if '.' in clean_smiles:
|
|
|
| fragments = clean_smiles.split('.')
|
| largest = max(fragments, key=len)
|
| mol = Chem.MolFromSmiles(largest)
|
| if mol is not None:
|
| return mol
|
|
|
| return None
|
|
|
| def _normalize_smiles(self, smiles: str) -> str:
|
| """Normalize SMILES string by cleaning common issues."""
|
| import re
|
| import urllib.parse
|
|
|
|
|
| if '%' in smiles:
|
| try:
|
| smiles = urllib.parse.unquote(smiles)
|
| except:
|
| pass
|
|
|
|
|
| smiles = smiles.strip().replace('\n', '').replace('\r', '').replace(' ', '')
|
|
|
|
|
| prefixes = ['SMILES:', 'smiles:', 'SMILES=', 'smiles=']
|
| for prefix in prefixes:
|
| if smiles.startswith(prefix):
|
| smiles = smiles[len(prefix):]
|
|
|
|
|
| smiles = smiles.replace('(', '(').replace(')', ')')
|
| smiles = smiles.replace('【', '[').replace('】', ']')
|
| smiles = smiles.replace('=', '=').replace('#', '#')
|
|
|
| return smiles
|
|
|
| def _strip_stereochemistry(self, smiles: str) -> str:
|
| """Strip stereochemistry from SMILES for fallback parsing."""
|
| import re
|
|
|
| smiles = re.sub(r'@+', '', smiles)
|
|
|
| smiles = re.sub(r'/|\\\\', '', smiles)
|
| return smiles
|
|
|
| def render_2d_svg(
|
| self,
|
| smiles: str,
|
| width: int = 400,
|
| height: int = 300,
|
| highlight_atoms: Optional[List[int]] = None,
|
| ) -> Optional[str]:
|
| """
|
| Render 2D structure as SVG.
|
|
|
| Args:
|
| smiles: SMILES notation
|
| width: Image width
|
| height: Image height
|
| highlight_atoms: Optional list of atom indices to highlight
|
|
|
| Returns:
|
| SVG string or None if failed
|
| """
|
| if not self._rdkit_available:
|
| return None
|
|
|
| from rdkit import Chem
|
| from rdkit.Chem import Draw
|
| from rdkit.Chem.Draw import rdMolDraw2D
|
|
|
| mol = self.parse_smiles(smiles)
|
| if mol is None:
|
| return None
|
|
|
|
|
| drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
|
|
|
|
|
| opts = drawer.drawOptions()
|
| opts.addStereoAnnotation = True
|
| opts.addAtomIndices = False
|
|
|
|
|
| if highlight_atoms:
|
| drawer.DrawMolecule(mol, highlightAtoms=highlight_atoms)
|
| else:
|
| drawer.DrawMolecule(mol)
|
|
|
| drawer.FinishDrawing()
|
| svg = drawer.GetDrawingText()
|
|
|
| return svg
|
|
|
| def render_2d_png_base64(
|
| self,
|
| smiles: str,
|
| width: int = 400,
|
| height: int = 300,
|
| ) -> Optional[str]:
|
| """
|
| Render 2D structure as PNG and return base64 encoded string.
|
|
|
| Args:
|
| smiles: SMILES notation
|
| width: Image width
|
| height: Image height
|
|
|
| Returns:
|
| Base64 encoded PNG string or None if failed
|
| """
|
| if not self._rdkit_available:
|
| return None
|
|
|
| from rdkit import Chem
|
| from rdkit.Chem import Draw
|
|
|
| mol = self.parse_smiles(smiles)
|
| if mol is None:
|
| return None
|
|
|
|
|
| img = Draw.MolToImage(mol, size=(width, height))
|
|
|
|
|
| buffer = io.BytesIO()
|
| img.save(buffer, format='PNG')
|
| buffer.seek(0)
|
|
|
| png_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
|
|
| return png_base64
|
|
|
| def get_data_uri(self, smiles: str, width: int = 400, height: int = 300) -> Optional[str]:
|
| """
|
| Get a data URI for embedding molecule image in HTML.
|
|
|
| Args:
|
| smiles: SMILES notation
|
| width: Image width
|
| height: Image height
|
|
|
| Returns:
|
| Data URI string or None
|
| """
|
| png_base64 = self.render_2d_png_base64(smiles, width, height)
|
| if png_base64:
|
| return f"data:image/png;base64,{png_base64}"
|
| return None
|
|
|
| def calculate_morgan_fingerprint(
|
| self,
|
| smiles: str,
|
| radius: int = 2,
|
| n_bits: int = 2048,
|
| ) -> Optional[List[int]]:
|
| """
|
| Calculate Morgan fingerprint (circular fingerprint).
|
|
|
| Args:
|
| smiles: SMILES notation
|
| radius: Fingerprint radius
|
| n_bits: Number of bits
|
|
|
| Returns:
|
| List of on-bit indices or None
|
| """
|
| if not self._rdkit_available:
|
| return None
|
|
|
| from rdkit import Chem
|
| from rdkit.Chem import AllChem
|
|
|
| mol = self.parse_smiles(smiles)
|
| if mol is None:
|
| return None
|
|
|
| fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
|
|
|
|
|
| on_bits = list(fp.GetOnBits())
|
|
|
| return on_bits
|
|
|
| def render_fingerprint_bits(
|
| self,
|
| smiles: str,
|
| radius: int = 2,
|
| highlight_bits: Optional[List[int]] = None,
|
| ) -> Optional[str]:
|
| """
|
| Render Morgan fingerprint bit visualization as SVG.
|
|
|
| Shows which atoms contribute to specific fingerprint bits.
|
|
|
| Args:
|
| smiles: SMILES notation
|
| radius: Morgan fingerprint radius
|
| highlight_bits: Specific bits to highlight
|
|
|
| Returns:
|
| SVG string or None
|
| """
|
| if not self._rdkit_available:
|
| return None
|
|
|
| from rdkit import Chem
|
| from rdkit.Chem import AllChem, Draw
|
| from rdkit.Chem.Draw import rdMolDraw2D
|
|
|
| mol = self.parse_smiles(smiles)
|
| if mol is None:
|
| return None
|
|
|
|
|
| bi = {}
|
| fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, bitInfo=bi)
|
|
|
|
|
| if highlight_bits:
|
| atoms_to_highlight = set()
|
| for bit in highlight_bits:
|
| if bit in bi:
|
| for atom_info in bi[bit]:
|
| center_atom, _ = atom_info
|
| atoms_to_highlight.add(center_atom)
|
| highlight_atoms = list(atoms_to_highlight)
|
| else:
|
| highlight_atoms = None
|
|
|
|
|
| return self.render_2d_svg(smiles, highlight_atoms=highlight_atoms)
|
|
|
| def calculate_properties(self, smiles: str) -> Optional[Dict[str, Any]]:
|
| """
|
| Calculate basic molecular properties.
|
|
|
| Args:
|
| smiles: SMILES notation
|
|
|
| Returns:
|
| Dictionary of properties or None
|
| """
|
| if not self._rdkit_available:
|
| return None
|
|
|
| from rdkit import Chem
|
| from rdkit.Chem import Descriptors, Lipinski
|
|
|
| mol = self.parse_smiles(smiles)
|
| if mol is None:
|
| return None
|
|
|
| return {
|
| "molecular_weight": round(Descriptors.MolWt(mol), 2),
|
| "logp": round(Descriptors.MolLogP(mol), 2),
|
| "hbd": Lipinski.NumHDonors(mol),
|
| "hba": Lipinski.NumHAcceptors(mol),
|
| "tpsa": round(Descriptors.TPSA(mol), 2),
|
| "rotatable_bonds": Lipinski.NumRotatableBonds(mol),
|
| "num_atoms": mol.GetNumAtoms(),
|
| "num_heavy_atoms": Lipinski.HeavyAtomCount(mol),
|
| "num_rings": Lipinski.RingCount(mol),
|
| "fraction_sp3": round(Lipinski.FractionCSP3(mol), 2),
|
| }
|
|
|
| def get_molecule_info(self, smiles: str, name: Optional[str] = None) -> MoleculeInfo:
|
| """
|
| Get comprehensive molecule information including rendered images.
|
|
|
| Args:
|
| smiles: SMILES notation
|
| name: Optional molecule name
|
|
|
| Returns:
|
| MoleculeInfo object with all available data
|
| """
|
| info = MoleculeInfo(smiles=smiles, name=name)
|
|
|
| if not self._rdkit_available:
|
| return info
|
|
|
|
|
| props = self.calculate_properties(smiles)
|
| if props:
|
| info.molecular_weight = props["molecular_weight"]
|
| info.logp = props["logp"]
|
| info.hbd = props["hbd"]
|
| info.hba = props["hba"]
|
| info.tpsa = props["tpsa"]
|
| info.rotatable_bonds = props["rotatable_bonds"]
|
|
|
|
|
| info.structure_2d_svg = self.render_2d_svg(smiles)
|
| info.structure_2d_png_base64 = self.render_2d_png_base64(smiles)
|
|
|
| return info
|
|
|
| def identify_functional_groups(self, smiles: str) -> List[Dict[str, Any]]:
|
| """
|
| Identify reactive functional groups in a molecule using SMARTS patterns.
|
|
|
| This is crucial for compatibility analysis as it identifies
|
| potential reactive sites in the API molecule.
|
|
|
| Args:
|
| smiles: SMILES notation
|
|
|
| Returns:
|
| List of identified functional groups with properties
|
| """
|
| if not self._rdkit_available:
|
| return []
|
|
|
| from rdkit import Chem
|
|
|
| mol = self.parse_smiles(smiles)
|
| if mol is None:
|
| return []
|
|
|
|
|
| functional_group_patterns = {
|
|
|
| "primary_amine": {
|
| "smarts": "[NX3H2;!$([NX3H2]-C=O)]",
|
| "name_cn": "伯胺基团",
|
| "name_en": "Primary Amine",
|
| "property_type": "碱性",
|
| "reactions": ["美拉德反应(Maillard Reaction)", "氧化脱氨(Oxidative Deamination)", "席夫碱形成(Schiff Base)"],
|
| },
|
| "secondary_amine": {
|
| "smarts": "[NX3H1;!$([NX3H1]-C=O)]([#6])([#6])",
|
| "name_cn": "仲胺基团",
|
| "name_en": "Secondary Amine",
|
| "property_type": "碱性",
|
| "reactions": ["美拉德反应(Maillard Reaction)", "N-氧化(N-Oxidation)"],
|
| },
|
| "tertiary_amine": {
|
| "smarts": "[NX3H0;!$([NX3]-C=O)]([#6])([#6])([#6])",
|
| "name_cn": "叔胺基团",
|
| "name_en": "Tertiary Amine",
|
| "property_type": "碱性",
|
| "reactions": ["N-氧化(N-Oxidation)"],
|
| },
|
|
|
| "thiol": {
|
| "smarts": "[SH]",
|
| "name_cn": "巯基",
|
| "name_en": "Thiol",
|
| "property_type": "中性/弱酸性",
|
| "reactions": ["氧化成二硫键(Disulfide Formation)", "金属配位(Metal Coordination)"],
|
| },
|
| "thioether": {
|
| "smarts": "[#6][SX2][#6]",
|
| "name_cn": "硫醚基团",
|
| "name_en": "Thioether",
|
| "property_type": "中性",
|
| "reactions": ["氧化成亚砜(Sulfoxide Formation)", "氧化成砜(Sulfone Formation)"],
|
| },
|
|
|
| "phenol": {
|
| "smarts": "[OX2H][c]",
|
| "name_cn": "酚羟基",
|
| "name_en": "Phenolic Hydroxyl",
|
| "property_type": "弱酸性",
|
| "reactions": ["氧化(Oxidation)", "光氧化(Photooxidation)", "醌形成(Quinone Formation)"],
|
| },
|
| "alcohol": {
|
| "smarts": "[OX2H][CX4]",
|
| "name_cn": "醇羟基",
|
| "name_en": "Aliphatic Hydroxyl",
|
| "property_type": "中性",
|
| "reactions": ["脱水(Dehydration)", "酯化(Esterification)"],
|
| },
|
|
|
| "aldehyde": {
|
| "smarts": "[CX3H1](=O)[#6]",
|
| "name_cn": "醛基",
|
| "name_en": "Aldehyde",
|
| "property_type": "中性/亲电",
|
| "reactions": ["美拉德反应(Maillard Reaction)", "氧化成羧酸(Oxidation to Carboxylic Acid)"],
|
| },
|
| "ketone": {
|
| "smarts": "[CX3](=O)([#6])[#6]",
|
| "name_cn": "酮基",
|
| "name_en": "Ketone",
|
| "property_type": "中性",
|
| "reactions": ["还原(Reduction)", "缩合反应(Condensation)"],
|
| },
|
|
|
| "carboxylic_acid": {
|
| "smarts": "[CX3](=O)[OX2H]",
|
| "name_cn": "羧基",
|
| "name_en": "Carboxylic Acid",
|
| "property_type": "酸性",
|
| "reactions": ["盐形成(Salt Formation)", "酰胺化(Amidation)"],
|
| },
|
| "ester": {
|
| "smarts": "[CX3](=O)[OX2][#6]",
|
| "name_cn": "酯基",
|
| "name_en": "Ester",
|
| "property_type": "中性",
|
| "reactions": ["水解(Hydrolysis)", "转酯化(Transesterification)"],
|
| },
|
| "amide": {
|
| "smarts": "[CX3](=O)[NX3]",
|
| "name_cn": "酰胺基",
|
| "name_en": "Amide",
|
| "property_type": "中性",
|
| "reactions": ["水解(Hydrolysis)"],
|
| },
|
| "lactone": {
|
| "smarts": "[#6]1~[#6]~[#6](=O)~[OX2]~1",
|
| "name_cn": "内酯环",
|
| "name_en": "Lactone",
|
| "property_type": "中性",
|
| "reactions": ["开环水解(Ring-opening Hydrolysis)"],
|
| },
|
|
|
| "pyridine": {
|
| "smarts": "c1ccncc1",
|
| "name_cn": "吡啶环",
|
| "name_en": "Pyridine",
|
| "property_type": "碱性",
|
| "reactions": ["N-氧化(N-Oxidation)", "质子化(Protonation)"],
|
| },
|
| "imidazole": {
|
| "smarts": "c1cnc[nH]1",
|
| "name_cn": "咪唑环",
|
| "name_en": "Imidazole",
|
| "property_type": "碱性/两性",
|
| "reactions": ["N-氧化(N-Oxidation)", "金属配位(Metal Coordination)"],
|
| },
|
|
|
| "nitrile": {
|
| "smarts": "[CX2]#N",
|
| "name_cn": "氰基",
|
| "name_en": "Nitrile",
|
| "property_type": "中性",
|
| "reactions": ["水解成酰胺/羧酸(Hydrolysis)"],
|
| },
|
| "allylic": {
|
| "smarts": "[CX4][CX3]=[CX3]",
|
| "name_cn": "烯丙位",
|
| "name_en": "Allylic Position",
|
| "property_type": "中性",
|
| "reactions": ["自氧化(Autoxidation)"],
|
| },
|
| "benzylic": {
|
| "smarts": "[CX4H2]c",
|
| "name_cn": "苄位",
|
| "name_en": "Benzylic Position",
|
| "property_type": "中性",
|
| "reactions": ["自氧化(Autoxidation)"],
|
| },
|
| }
|
|
|
| identified_groups = []
|
|
|
| for group_id, group_info in functional_group_patterns.items():
|
| pattern = Chem.MolFromSmarts(group_info["smarts"])
|
| if pattern is None:
|
| continue
|
|
|
| matches = mol.GetSubstructMatches(pattern)
|
| if matches:
|
| identified_groups.append({
|
| "id": group_id,
|
| "name_cn": group_info["name_cn"],
|
| "name_en": group_info["name_en"],
|
| "property_type": group_info["property_type"],
|
| "potential_reactions": group_info["reactions"],
|
| "count": len(matches),
|
| "atom_indices": [list(m) for m in matches],
|
| })
|
|
|
| return identified_groups
|
|
|
| def get_functional_groups_summary(self, smiles: str) -> str:
|
| """
|
| Get a formatted text summary of identified functional groups.
|
|
|
| Args:
|
| smiles: SMILES notation
|
|
|
| Returns:
|
| Formatted string for use in prompts
|
| """
|
| groups = self.identify_functional_groups(smiles)
|
|
|
| if not groups:
|
| return "未能识别到特征官能团,请人工确认分子结构"
|
|
|
| lines = []
|
| for g in groups:
|
| count_str = f"×{g['count']}" if g['count'] > 1 else ""
|
| lines.append(f"{g['name_cn']}({g['name_en']}){count_str} - {g['property_type']}")
|
|
|
| return ";".join(lines)
|
|
|
|
|
| def get_3dmol_script(smiles: str, container_id: str = "mol3d") -> str:
|
| """
|
| Generate JavaScript for 3Dmol.js visualization.
|
|
|
| This returns a script that can be embedded in HTML to show
|
| an interactive 3D molecular viewer.
|
|
|
| Args:
|
| smiles: SMILES notation
|
| container_id: HTML container element ID
|
|
|
| Returns:
|
| JavaScript code string
|
| """
|
|
|
|
|
|
|
|
|
| return f"""
|
| <script>
|
| (function() {{
|
| let viewer = $3Dmol.createViewer(document.getElementById('{container_id}'), {{
|
| backgroundColor: 'white'
|
| }});
|
|
|
| // Use PubChem to get 3D structure from SMILES
|
| // Alternatively, generate conformer with RDKit
|
| let smiles = '{smiles}';
|
|
|
| // Add molecule from SMILES (requires 3Dmol.js SmilesParser)
|
| viewer.addModel(smiles, 'smi');
|
| viewer.setStyle({{}}, {{stick: {{}}}});
|
| viewer.zoomTo();
|
| viewer.render();
|
| }})();
|
| </script>
|
| """
|
|
|
|
|
| def get_3dmol_html(
|
| smiles: str,
|
| width: int = 400,
|
| height: int = 300,
|
| ) -> str:
|
| """
|
| Generate complete HTML for 3Dmol.js visualization.
|
|
|
| Args:
|
| smiles: SMILES notation
|
| width: Viewer width
|
| height: Viewer height
|
|
|
| Returns:
|
| Complete HTML string
|
| """
|
| return f"""
|
| <div id="mol3d-container" style="width: {width}px; height: {height}px; position: relative;">
|
| <div id="mol3d" style="width: 100%; height: 100%;"></div>
|
| </div>
|
| <script src="https://3dmol.org/build/3Dmol-min.js"></script>
|
| {get_3dmol_script(smiles, 'mol3d')}
|
| """
|
|
|
|
|
|
|
| renderer = MoleculeRenderer()
|
|
|