POLYMER-PROPERTY / pages /3_Molecular_View.py
sobinalosious92's picture
Update pages/3_Molecular_View.py
db2e997 verified
import io
import json
import re
from collections import Counter
import pandas as pd
import streamlit as st
from rdkit import Chem, RDLogger
from rdkit.Chem import AllChem, Descriptors, Draw, rdMolDescriptors
from streamlit.components.v1 import html
from rdkit.Chem import Lipinski, Crippen
from rdkit.Chem.rdMolDescriptors import CalcTPSA, CalcExactMolWt, CalcFractionCSP3, CalcNumRings, CalcNumAromaticRings
from src.ui_style import apply_global_style, render_page_header
RDLogger.DisableLog("rdApp.*")
st.set_page_config(page_title="Molecular View", layout="wide")
apply_global_style()
render_page_header(
title="Molecular Structure View",
subtitle="Inspect 2D and 3D polymer structures and review repeat-unit descriptors.",
badge="Molecular View",
)
# -------------------------
# Polymer-safe helpers
# -------------------------
def smiles_polymer_as_is(smiles: str) -> Chem.Mol | None:
"""2D mol: keep '*' attachment points as dummy atoms."""
s = (smiles or "").strip()
if not s:
return None
try:
return Chem.MolFromSmiles(s, sanitize=True)
except Exception:
return None
def smiles_for_calc_and_3d(smiles: str) -> Chem.Mol | None:
"""
Mol used for calculation and 3D:
- Minimal capping so RDKit can sanitize/embed:
'*' -> [H]
NOTE: This is only to make a chemically valid fragment for RDKit.
"""
s = (smiles or "").strip()
if not s:
return None
try:
s_cap = s.replace("*", "[H]")
return Chem.MolFromSmiles(s_cap, sanitize=True)
except Exception:
return None
def count_attachment_points(mol_poly_as_is: Chem.Mol) -> int:
"""Count '*' dummy atoms in polymer-as-is mol."""
return sum(1 for a in mol_poly_as_is.GetAtoms() if a.GetSymbol() == "*")
def count_atoms_excluding_attachment_points(mol_poly_as_is: Chem.Mol) -> int:
"""Atom count for the repeat unit backbone (exclude '*' dummy atoms)."""
return sum(1 for a in mol_poly_as_is.GetAtoms() if a.GetSymbol() != "*")
def parse_formula(formula: str) -> dict[str, int]:
"""Parse e.g. 'C2H4Cl' -> {'C':2,'H':4,'Cl':1}."""
tokens = re.findall(r"([A-Z][a-z]?)(\d*)", formula)
out: dict[str, int] = {}
for elem, n in tokens:
out[elem] = out.get(elem, 0) + (int(n) if n else 1)
return out
def format_formula(counts: dict[str, int]) -> str:
"""Format formula in Hill order (C, H, then alphabetical)."""
def add(elem: str, n: int, parts: list[str]) -> None:
if n <= 0:
return
parts.append(elem if n == 1 else f"{elem}{n}")
parts: list[str] = []
if "C" in counts:
add("C", counts["C"], parts)
if "H" in counts:
add("H", counts["H"], parts)
for elem in sorted(k for k in counts.keys() if k not in {"C", "H"}):
add(elem, counts[elem], parts)
return "".join(parts) if parts else ""
def repeat_unit_info_from_polymer_smiles(poly_smiles: str) -> dict | None:
mol_poly = smiles_polymer_as_is(poly_smiles)
if mol_poly is None:
return None
n_attach = count_attachment_points(mol_poly)
mol_cap = smiles_for_calc_and_3d(poly_smiles)
if mol_cap is None:
return None
mol_cap_h = Chem.AddHs(Chem.Mol(mol_cap))
capped_formula = rdMolDescriptors.CalcMolFormula(mol_cap_h)
capped_mw = Descriptors.MolWt(mol_cap_h)
counts = parse_formula(capped_formula)
counts["H"] = counts.get("H", 0) - n_attach
if counts.get("H", 0) <= 0:
counts.pop("H", None)
repeat_formula = format_formula(counts)
repeat_mw = capped_mw - (n_attach * 1.008)
# Correct atom count including hydrogen
atoms_repeat = sum(counts.values())
return {
"repeat_formula": repeat_formula,
"repeat_mw": repeat_mw,
"atoms_repeat": atoms_repeat,
"n_attach": n_attach,
"mol_cap": mol_cap,
}
# -------------------------
# 3D generation + viewer
# -------------------------
def build_3d_molblock(mol: Chem.Mol) -> tuple[str | None, int]:
"""Build a 3D mol block (best-effort)."""
m3d = Chem.Mol(mol)
try:
params = AllChem.ETKDGv2()
params.randomSeed = 7
status = AllChem.EmbedMolecule(m3d, params)
if status != 0:
return None, 0
try:
AllChem.UFFOptimizeMolecule(m3d, maxIters=200)
except Exception:
pass
return Chem.MolToMolBlock(m3d), m3d.GetNumAtoms()
except Exception:
return None, 0
def render_3d_viewer(mol_block: str, height: int, style: str, background: str, viewer_id: str) -> None:
style_map = {
"Stick": "{stick:{radius:0.18}}",
"Sphere": "{sphere:{scale:0.30}}",
"Line": "{line:{linewidth:2.0}}",
}
bg_map = {"White": "white", "Light Gray": "#f3f5f8", "Black": "black"}
style_js = style_map.get(style, style_map["Stick"])
bg_js = bg_map.get(background, "white")
mol_block_js = json.dumps(mol_block)
viewer_div = f"viewer3d_{viewer_id}"
reset_btn = f"reset3d_{viewer_id}"
dl_btn = f"download3d_{viewer_id}"
viewer_html = f"""
<div style="display:flex; gap:8px; margin:0 0 10px 0;">
<button id="{reset_btn}" style="padding:7px 12px; border:1px solid #ccd3df; border-radius:8px; background:#f5f7fb; cursor:pointer; font-weight:600;">
Reset View
</button>
<button id="{dl_btn}" style="padding:7px 12px; border:1px solid #2c7be5; border-radius:8px; background:#2c7be5; color:white; cursor:pointer; font-weight:700;">
Download 3D PNG
</button>
</div>
<div id="{viewer_div}" style="width:100%; height:{height}px; border:1px solid #d8dde6; border-radius:10px;"></div>
<script src="https://3Dmol.org/build/3Dmol-min.js"></script>
<script>
const viewer = $3Dmol.createViewer("{viewer_div}", {{backgroundColor: "{bg_js}"}});
viewer.addModel({mol_block_js}, "mol");
viewer.setStyle({{}}, {style_js});
viewer.zoomTo();
viewer.render();
document.getElementById("{reset_btn}").onclick = function() {{
viewer.zoomTo();
viewer.render();
}};
document.getElementById("{dl_btn}").onclick = function() {{
const uri = viewer.pngURI();
const a = document.createElement("a");
a.href = uri;
a.download = "molecule_3d.png";
a.click();
}};
</script>
"""
html(viewer_html, height=height + 56)
# -------------------------
# Legend with colors + H included
# -------------------------
def render_element_legend_with_colors(mol: Chem.Mol, include_hydrogens: bool = True) -> None:
"""
Element legend with colored dots + counts.
If include_hydrogens=True, uses Chem.AddHs for counting H.
"""
cpk = {
"H": "#d4dfef",
"C": "#909090",
"N": "#3050f8",
"O": "#ff0d0d",
"F": "#90e050",
"Cl": "#1ff01f",
"Br": "#a62929",
"I": "#940094",
"S": "#ffff30",
"P": "#ff8000",
"*": "#b0b7c3",
}
m = Chem.AddHs(Chem.Mol(mol)) if include_hydrogens else mol
counts = Counter(a.GetSymbol() for a in m.GetAtoms())
counts.pop("*", None)
st.markdown("**Element Legend**")
if not counts:
st.caption("No element legend available.")
return
h1, h2, h3 = st.columns([0.8, 1.2, 1.0])
h1.markdown("**Color**")
h2.markdown("**Element**")
h3.markdown("**Count**")
st.markdown("---")
for elem, cnt in sorted(counts.items()):
color = cpk.get(elem, "#b0b7c3")
c1, c2, c3 = st.columns([0.8, 1.2, 1.0])
c1.markdown(
f"""
<span style="width:14px; height:14px; border-radius:50%; background:{color};
border:1px solid #9ca8ba; display:inline-block;"></span>
""",
unsafe_allow_html=True,
)
c2.write(elem)
c3.write(int(cnt))
# -------------------------
# Session state
# -------------------------
if "mv_active_smiles" not in st.session_state:
st.session_state["mv_active_smiles"] = ""
# -------------------------
# Inputs
# -------------------------
smiles_input = st.text_area(
"Enter polymer SMILES",
height=110,
placeholder="Paste polymer SMILES here...",
)
img_size = st.selectbox("2D image size", options=[500,1000,2000], index=0)
if st.button("Render structure", type="primary"):
s = (smiles_input or "").strip()
mol_try = smiles_polymer_as_is(s)
if mol_try is None:
st.error("Invalid SMILES. Please check the string.")
else:
st.session_state["mv_active_smiles"] = s
active_smiles = st.session_state.get("mv_active_smiles", "").strip()
if not active_smiles:
#st.info("Enter a SMILES string and click `Render structure`.")
st.stop()
mol_2d = smiles_polymer_as_is(active_smiles)
if mol_2d is None:
st.error("Stored SMILES is invalid. Please render again.")
st.stop()
info = repeat_unit_info_from_polymer_smiles(active_smiles)
if info is None:
st.error("Could not compute repeat-unit info from this polymer SMILES.")
st.stop()
mol_cap = info["mol_cap"] # capped ( '*' -> [H] ) mol, no '*'
# Build 3D molblock once (used for 3D viewer and MOL download)
mol_block_3d, atoms_3d = build_3d_molblock(mol_cap)
# -------------------------
# Top: 2D + info + MOL download below table
# -------------------------
top_left, top_right = st.columns([2.2, 1.2])
with top_left:
st.markdown("2D Structure ")
img = Draw.MolToImage(mol_2d, size=(img_size, img_size))
st.image(img, width="content")
png_buf = io.BytesIO()
img.save(png_buf, format="PNG")
st.download_button(
"Download 2D PNG",
data=png_buf.getvalue(),
file_name="molecule_2d.png",
mime="image/png",
)
with top_right:
st.markdown("Molecule Information ")
info_df = pd.DataFrame(
{
"Property": ["Formula", "Molar Weight", "Atoms"],
"Value": [
info["repeat_formula"],
f"{info['repeat_mw']:.2f} g/mol",
str(info["atoms_repeat"]),
],
}
)
info_df.index = range(1, len(info_df) + 1)
st.table(info_df)
# MOL download *below the table*
if mol_block_3d is not None:
st.download_button(
"Download .mol File",
data=mol_block_3d.encode("utf-8"),
file_name="repeat_unit_3d.mol",
mime="chemical/x-mdl-molfile",
)
else:
st.caption("MOL download unavailable (3D embedding failed).")
st.markdown("---")
# -------------------------
# Bottom: 3D + controls + legend (with H + colored dots)
# -------------------------
bottom_left, bottom_right = st.columns([2.2, 1.2])
with bottom_left:
st.markdown("**3D Structure**")
if mol_block_3d is None:
st.info("3D structure could not be generated for this polymer SMILES.")
else:
style_3d = st.session_state.get("mv_style_3d", "Stick")
bg_3d = st.session_state.get("mv_bg_3d", "White")
viewer_key = f"{abs(hash((active_smiles, style_3d, bg_3d))) % 10**8}"
render_3d_viewer(
mol_block=mol_block_3d,
height=max(360, 200 + 60),
style=style_3d,
background=bg_3d,
viewer_id=viewer_key,
)
with bottom_right:
st.markdown("**3D Visualization Controls**")
st.selectbox("Style", options=["Stick", "Sphere", "Line"], key="mv_style_3d")
st.selectbox("Background", options=["White", "Light Gray", "Black"], key="mv_bg_3d")
# Legend: include hydrogens + colored dots
# Use capped mol (no '*') for clean element counting
render_element_legend_with_colors(mol_cap, include_hydrogens=True)