Spaces:
Sleeping
Sleeping
| import io | |
| import json | |
| import re | |
| from collections import Counter | |
| import pandas as pd | |
| import streamlit as st | |
| from rdkit import Chem, RDLogger | |
| from rdkit.Chem import AllChem, Descriptors, Draw, rdMolDescriptors | |
| from streamlit.components.v1 import html | |
| from rdkit.Chem import Lipinski, Crippen | |
| from rdkit.Chem.rdMolDescriptors import CalcTPSA, CalcExactMolWt, CalcFractionCSP3, CalcNumRings, CalcNumAromaticRings | |
| from src.ui_style import apply_global_style, render_page_header | |
| RDLogger.DisableLog("rdApp.*") | |
| st.set_page_config(page_title="Molecular View", layout="wide") | |
| apply_global_style() | |
| render_page_header( | |
| title="Molecular Structure View", | |
| subtitle="Inspect 2D and 3D polymer structures and review repeat-unit descriptors.", | |
| badge="Molecular View", | |
| ) | |
| # ------------------------- | |
| # Polymer-safe helpers | |
| # ------------------------- | |
| def smiles_polymer_as_is(smiles: str) -> Chem.Mol | None: | |
| """2D mol: keep '*' attachment points as dummy atoms.""" | |
| s = (smiles or "").strip() | |
| if not s: | |
| return None | |
| try: | |
| return Chem.MolFromSmiles(s, sanitize=True) | |
| except Exception: | |
| return None | |
| def smiles_for_calc_and_3d(smiles: str) -> Chem.Mol | None: | |
| """ | |
| Mol used for calculation and 3D: | |
| - Minimal capping so RDKit can sanitize/embed: | |
| '*' -> [H] | |
| NOTE: This is only to make a chemically valid fragment for RDKit. | |
| """ | |
| s = (smiles or "").strip() | |
| if not s: | |
| return None | |
| try: | |
| s_cap = s.replace("*", "[H]") | |
| return Chem.MolFromSmiles(s_cap, sanitize=True) | |
| except Exception: | |
| return None | |
| def count_attachment_points(mol_poly_as_is: Chem.Mol) -> int: | |
| """Count '*' dummy atoms in polymer-as-is mol.""" | |
| return sum(1 for a in mol_poly_as_is.GetAtoms() if a.GetSymbol() == "*") | |
| def count_atoms_excluding_attachment_points(mol_poly_as_is: Chem.Mol) -> int: | |
| """Atom count for the repeat unit backbone (exclude '*' dummy atoms).""" | |
| return sum(1 for a in mol_poly_as_is.GetAtoms() if a.GetSymbol() != "*") | |
| def parse_formula(formula: str) -> dict[str, int]: | |
| """Parse e.g. 'C2H4Cl' -> {'C':2,'H':4,'Cl':1}.""" | |
| tokens = re.findall(r"([A-Z][a-z]?)(\d*)", formula) | |
| out: dict[str, int] = {} | |
| for elem, n in tokens: | |
| out[elem] = out.get(elem, 0) + (int(n) if n else 1) | |
| return out | |
| def format_formula(counts: dict[str, int]) -> str: | |
| """Format formula in Hill order (C, H, then alphabetical).""" | |
| def add(elem: str, n: int, parts: list[str]) -> None: | |
| if n <= 0: | |
| return | |
| parts.append(elem if n == 1 else f"{elem}{n}") | |
| parts: list[str] = [] | |
| if "C" in counts: | |
| add("C", counts["C"], parts) | |
| if "H" in counts: | |
| add("H", counts["H"], parts) | |
| for elem in sorted(k for k in counts.keys() if k not in {"C", "H"}): | |
| add(elem, counts[elem], parts) | |
| return "".join(parts) if parts else "" | |
| def repeat_unit_info_from_polymer_smiles(poly_smiles: str) -> dict | None: | |
| mol_poly = smiles_polymer_as_is(poly_smiles) | |
| if mol_poly is None: | |
| return None | |
| n_attach = count_attachment_points(mol_poly) | |
| mol_cap = smiles_for_calc_and_3d(poly_smiles) | |
| if mol_cap is None: | |
| return None | |
| mol_cap_h = Chem.AddHs(Chem.Mol(mol_cap)) | |
| capped_formula = rdMolDescriptors.CalcMolFormula(mol_cap_h) | |
| capped_mw = Descriptors.MolWt(mol_cap_h) | |
| counts = parse_formula(capped_formula) | |
| counts["H"] = counts.get("H", 0) - n_attach | |
| if counts.get("H", 0) <= 0: | |
| counts.pop("H", None) | |
| repeat_formula = format_formula(counts) | |
| repeat_mw = capped_mw - (n_attach * 1.008) | |
| # Correct atom count including hydrogen | |
| atoms_repeat = sum(counts.values()) | |
| return { | |
| "repeat_formula": repeat_formula, | |
| "repeat_mw": repeat_mw, | |
| "atoms_repeat": atoms_repeat, | |
| "n_attach": n_attach, | |
| "mol_cap": mol_cap, | |
| } | |
| # ------------------------- | |
| # 3D generation + viewer | |
| # ------------------------- | |
| def build_3d_molblock(mol: Chem.Mol) -> tuple[str | None, int]: | |
| """Build a 3D mol block (best-effort).""" | |
| m3d = Chem.Mol(mol) | |
| try: | |
| params = AllChem.ETKDGv2() | |
| params.randomSeed = 7 | |
| status = AllChem.EmbedMolecule(m3d, params) | |
| if status != 0: | |
| return None, 0 | |
| try: | |
| AllChem.UFFOptimizeMolecule(m3d, maxIters=200) | |
| except Exception: | |
| pass | |
| return Chem.MolToMolBlock(m3d), m3d.GetNumAtoms() | |
| except Exception: | |
| return None, 0 | |
| def render_3d_viewer(mol_block: str, height: int, style: str, background: str, viewer_id: str) -> None: | |
| style_map = { | |
| "Stick": "{stick:{radius:0.18}}", | |
| "Sphere": "{sphere:{scale:0.30}}", | |
| "Line": "{line:{linewidth:2.0}}", | |
| } | |
| bg_map = {"White": "white", "Light Gray": "#f3f5f8", "Black": "black"} | |
| style_js = style_map.get(style, style_map["Stick"]) | |
| bg_js = bg_map.get(background, "white") | |
| mol_block_js = json.dumps(mol_block) | |
| viewer_div = f"viewer3d_{viewer_id}" | |
| reset_btn = f"reset3d_{viewer_id}" | |
| dl_btn = f"download3d_{viewer_id}" | |
| viewer_html = f""" | |
| <div style="display:flex; gap:8px; margin:0 0 10px 0;"> | |
| <button id="{reset_btn}" style="padding:7px 12px; border:1px solid #ccd3df; border-radius:8px; background:#f5f7fb; cursor:pointer; font-weight:600;"> | |
| Reset View | |
| </button> | |
| <button id="{dl_btn}" style="padding:7px 12px; border:1px solid #2c7be5; border-radius:8px; background:#2c7be5; color:white; cursor:pointer; font-weight:700;"> | |
| Download 3D PNG | |
| </button> | |
| </div> | |
| <div id="{viewer_div}" style="width:100%; height:{height}px; border:1px solid #d8dde6; border-radius:10px;"></div> | |
| <script src="https://3Dmol.org/build/3Dmol-min.js"></script> | |
| <script> | |
| const viewer = $3Dmol.createViewer("{viewer_div}", {{backgroundColor: "{bg_js}"}}); | |
| viewer.addModel({mol_block_js}, "mol"); | |
| viewer.setStyle({{}}, {style_js}); | |
| viewer.zoomTo(); | |
| viewer.render(); | |
| document.getElementById("{reset_btn}").onclick = function() {{ | |
| viewer.zoomTo(); | |
| viewer.render(); | |
| }}; | |
| document.getElementById("{dl_btn}").onclick = function() {{ | |
| const uri = viewer.pngURI(); | |
| const a = document.createElement("a"); | |
| a.href = uri; | |
| a.download = "molecule_3d.png"; | |
| a.click(); | |
| }}; | |
| </script> | |
| """ | |
| html(viewer_html, height=height + 56) | |
| # ------------------------- | |
| # Legend with colors + H included | |
| # ------------------------- | |
| def render_element_legend_with_colors(mol: Chem.Mol, include_hydrogens: bool = True) -> None: | |
| """ | |
| Element legend with colored dots + counts. | |
| If include_hydrogens=True, uses Chem.AddHs for counting H. | |
| """ | |
| cpk = { | |
| "H": "#d4dfef", | |
| "C": "#909090", | |
| "N": "#3050f8", | |
| "O": "#ff0d0d", | |
| "F": "#90e050", | |
| "Cl": "#1ff01f", | |
| "Br": "#a62929", | |
| "I": "#940094", | |
| "S": "#ffff30", | |
| "P": "#ff8000", | |
| "*": "#b0b7c3", | |
| } | |
| m = Chem.AddHs(Chem.Mol(mol)) if include_hydrogens else mol | |
| counts = Counter(a.GetSymbol() for a in m.GetAtoms()) | |
| counts.pop("*", None) | |
| st.markdown("**Element Legend**") | |
| if not counts: | |
| st.caption("No element legend available.") | |
| return | |
| h1, h2, h3 = st.columns([0.8, 1.2, 1.0]) | |
| h1.markdown("**Color**") | |
| h2.markdown("**Element**") | |
| h3.markdown("**Count**") | |
| st.markdown("---") | |
| for elem, cnt in sorted(counts.items()): | |
| color = cpk.get(elem, "#b0b7c3") | |
| c1, c2, c3 = st.columns([0.8, 1.2, 1.0]) | |
| c1.markdown( | |
| f""" | |
| <span style="width:14px; height:14px; border-radius:50%; background:{color}; | |
| border:1px solid #9ca8ba; display:inline-block;"></span> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| c2.write(elem) | |
| c3.write(int(cnt)) | |
| # ------------------------- | |
| # Session state | |
| # ------------------------- | |
| if "mv_active_smiles" not in st.session_state: | |
| st.session_state["mv_active_smiles"] = "" | |
| # ------------------------- | |
| # Inputs | |
| # ------------------------- | |
| smiles_input = st.text_area( | |
| "Enter polymer SMILES", | |
| height=110, | |
| placeholder="Paste polymer SMILES here...", | |
| ) | |
| img_size = st.selectbox("2D image size", options=[500,1000,2000], index=0) | |
| if st.button("Render structure", type="primary"): | |
| s = (smiles_input or "").strip() | |
| mol_try = smiles_polymer_as_is(s) | |
| if mol_try is None: | |
| st.error("Invalid SMILES. Please check the string.") | |
| else: | |
| st.session_state["mv_active_smiles"] = s | |
| active_smiles = st.session_state.get("mv_active_smiles", "").strip() | |
| if not active_smiles: | |
| #st.info("Enter a SMILES string and click `Render structure`.") | |
| st.stop() | |
| mol_2d = smiles_polymer_as_is(active_smiles) | |
| if mol_2d is None: | |
| st.error("Stored SMILES is invalid. Please render again.") | |
| st.stop() | |
| info = repeat_unit_info_from_polymer_smiles(active_smiles) | |
| if info is None: | |
| st.error("Could not compute repeat-unit info from this polymer SMILES.") | |
| st.stop() | |
| mol_cap = info["mol_cap"] # capped ( '*' -> [H] ) mol, no '*' | |
| # Build 3D molblock once (used for 3D viewer and MOL download) | |
| mol_block_3d, atoms_3d = build_3d_molblock(mol_cap) | |
| # ------------------------- | |
| # Top: 2D + info + MOL download below table | |
| # ------------------------- | |
| top_left, top_right = st.columns([2.2, 1.2]) | |
| with top_left: | |
| st.markdown("2D Structure ") | |
| img = Draw.MolToImage(mol_2d, size=(img_size, img_size)) | |
| st.image(img, width="content") | |
| png_buf = io.BytesIO() | |
| img.save(png_buf, format="PNG") | |
| st.download_button( | |
| "Download 2D PNG", | |
| data=png_buf.getvalue(), | |
| file_name="molecule_2d.png", | |
| mime="image/png", | |
| ) | |
| with top_right: | |
| st.markdown("Molecule Information ") | |
| info_df = pd.DataFrame( | |
| { | |
| "Property": ["Formula", "Molar Weight", "Atoms"], | |
| "Value": [ | |
| info["repeat_formula"], | |
| f"{info['repeat_mw']:.2f} g/mol", | |
| str(info["atoms_repeat"]), | |
| ], | |
| } | |
| ) | |
| info_df.index = range(1, len(info_df) + 1) | |
| st.table(info_df) | |
| # MOL download *below the table* | |
| if mol_block_3d is not None: | |
| st.download_button( | |
| "Download .mol File", | |
| data=mol_block_3d.encode("utf-8"), | |
| file_name="repeat_unit_3d.mol", | |
| mime="chemical/x-mdl-molfile", | |
| ) | |
| else: | |
| st.caption("MOL download unavailable (3D embedding failed).") | |
| st.markdown("---") | |
| # ------------------------- | |
| # Bottom: 3D + controls + legend (with H + colored dots) | |
| # ------------------------- | |
| bottom_left, bottom_right = st.columns([2.2, 1.2]) | |
| with bottom_left: | |
| st.markdown("**3D Structure**") | |
| if mol_block_3d is None: | |
| st.info("3D structure could not be generated for this polymer SMILES.") | |
| else: | |
| style_3d = st.session_state.get("mv_style_3d", "Stick") | |
| bg_3d = st.session_state.get("mv_bg_3d", "White") | |
| viewer_key = f"{abs(hash((active_smiles, style_3d, bg_3d))) % 10**8}" | |
| render_3d_viewer( | |
| mol_block=mol_block_3d, | |
| height=max(360, 200 + 60), | |
| style=style_3d, | |
| background=bg_3d, | |
| viewer_id=viewer_key, | |
| ) | |
| with bottom_right: | |
| st.markdown("**3D Visualization Controls**") | |
| st.selectbox("Style", options=["Stick", "Sphere", "Line"], key="mv_style_3d") | |
| st.selectbox("Background", options=["White", "Light Gray", "Black"], key="mv_bg_3d") | |
| # Legend: include hydrogens + colored dots | |
| # Use capped mol (no '*') for clean element counting | |
| render_element_legend_with_colors(mol_cap, include_hydrogens=True) | |