""" visualize.py - 3Dmol.js 3D visualization and iRASPA CIF export for PoreGCN HF Space. Frontend contract (app.py imports): from visualize import create_3d_visualization, export_iraspa_cif create_3d_visualization(): Returns an HTML string (with embedded 3Dmol.js) ready for gr.HTML(). - Atoms colored by attribution via B-factor channel (bwr colormap): blue = negative attribution, white = neutral, red = positive attribution - Metal atoms rendered as larger spheres (radius 0.6) to visually mark SBU centers - Non-metal atoms rendered as sticks (radius 0.15) - Pores shown as a Mercury-style translucent yellow void isosurface (rolling 1.2 A probe + marching cubes; CCDC convention) - Per-cavity attribution carried by small opaque orange/blue beads at cavity centres, sized by |attribution| - Unit cell wireframe via addUnitCell() export_iraspa_cif(): Writes a CIF with _atom_site_B_iso_or_equiv overwritten by normalised attributions in [1, 99] (1=most negative, 50=neutral, 99=most positive). Compatible with iRASPA's Temperature Factor colouring. """ from __future__ import annotations import os import re from pathlib import Path from typing import Optional import numpy as np # Bundled 3Dmol.js library, loaded once at module import. # Avoids CDN dependency so the viewer works on networks that block cdnjs/jsdelivr. _3DMOL_JS_PATH = Path(__file__).parent / "3Dmol-min.js" _3DMOL_JS_INLINE: Optional[str] = None if _3DMOL_JS_PATH.exists(): _3DMOL_JS_INLINE = _3DMOL_JS_PATH.read_text(encoding="utf-8") def _inline_3dmol_library(viewer_html: str, width: int = 800, height: int = 600) -> str: """Patch py3Dmol HTML so 3Dmol.js runs in an isolated iframe. Two reasons we need the iframe: 1. Gradio's gr.HTML component sanitises ' patched_viewer = re.sub( r"loadScriptAsync\(['\"]https?://[^'\"]+['\"]\)", "Promise.resolve()", viewer_html, count=1, ) full_doc = ( "
" f"{inline_script}" f"" f"{patched_viewer}" ) # HTML-escape the doc for the srcdoc attribute (escape quotes and ampersands) srcdoc = full_doc.replace("&", "&").replace('"', """) iframe_html = ( f'' ) return iframe_html try: import py3Dmol PY3DMOL_OK = True except ImportError: PY3DMOL_OK = False try: from pymatgen.core import Structure from pymatgen.io.cif import CifWriter PYMATGEN_OK = True except ImportError: PYMATGEN_OK = False # Metal elements that get the large-sphere treatment in the 3D viewer. # Covers most MOF secondary building units (SBUs). _METAL_ELEMENTS = { 'Cu', 'Zn', 'Co', 'Zr', 'Fe', 'Mn', 'Ni', 'Mg', 'Ca', 'V', 'Al', 'Cr', 'Ti', 'Hf', 'In', 'Ga', 'Y', 'Ce', 'Nd', 'Tb', 'Eu', 'Gd', 'La', 'Mo', 'W', 'Ru', 'Rh', 'Pd', 'Cd', 'Sn', 'Ba', 'Sr', 'Li', 'Na', 'K', 'Rb', 'Cs', } def _standard_cif_lattice(abc, angles_deg): """Build the standard CIF Cartesian lattice matrix from a/b/c/alpha/beta/gamma. The standard convention (used by 3Dmol.js, Mercury, VESTA, iRASPA when parsing a CIF that specifies only cell scalars): a along +x b in the +xy plane (positive y) c chosen so c_z > 0 Pymatgen does NOT always store its `Lattice.matrix` in this orientation; when a CIF is loaded, the matrix may be rotated to a different frame. Atom positions written by pymatgen's CifWriter contain only fractional coordinates plus the cell scalars, so the rendering viewer (3Dmol) re-derives Cartesian positions in the standard convention. Any geometry we compute alongside the structure must be transformed into the same standard frame before being passed to the viewer. Returns a 3x3 ndarray with row vectors a, b, c. """ a, b, c = abc al, be, ga = [np.deg2rad(x) for x in angles_deg] ax, ay, az = a, 0.0, 0.0 bx, by, bz = b * np.cos(ga), b * np.sin(ga), 0.0 cx = c * np.cos(be) sin_ga = np.sin(ga) if abs(np.sin(ga)) > 1e-12 else 1e-12 cy = c * (np.cos(al) - np.cos(be) * np.cos(ga)) / sin_ga cz = np.sqrt(max(c * c - cx * cx - cy * cy, 0.0)) return np.array([[ax, ay, az], [bx, by, bz], [cx, cy, cz]], dtype=float) def _compute_void_mesh( structure, probe_radius: float = 1.2, target_grid_spacing: float = 0.7, n_per_axis_min: int = 16, n_per_axis_max: int = 60, max_vertices: int = 40000, vdw_eff: float = 1.5, ): """Compute a Mercury-style contact-surface void mesh for the unit cell. Rolls a `probe_radius` probe (default 1.2 A, helium-sized) through the unit cell and runs marching cubes at the void boundary. Mirrors Mercury's Display > Voids > Contact Surface (CCDC, see https://www.ccdc.cam.ac.uk/discover/blog/how-to-search-visualize-and-analyse-mof-structures). Atoms are replicated to a 3x3x3 super-image so distance queries respect periodic boundaries. A constant effective vdW radius of 1.5 A is used instead of per-element vdW; for typical MOF elements (C, N, O, H, Cu, Zn etc.) the per-element value is within 0.2 A of this mean and the visual is unchanged. Robustness behaviour. Per-axis grid resolution is clamped to [`n_per_axis_min`, `n_per_axis_max`] so very large unit cells do not blow up compute or browser payload, and very small cells still get a decent surface. Mesh vertex count is capped at `max_vertices`; if the surface would exceed that, the function returns (None, None) and the caller falls back to a no-mesh render (atom colouring plus attribution beads). All anticipated failure modes (missing scikit-image, missing scipy, degenerate lattice, all-void or all-solid grid, marching-cubes failure, oversize mesh) return (None, None). Returns ------- (vertices_cart, faces) where vertices_cart: np.ndarray [V, 3] in Cartesian Angstroms faces: np.ndarray [F, 3] of vertex indices per triangle or (None, None). """ # Optional dependencies; degrade silently if either is missing try: from skimage.measure import marching_cubes except Exception: return None, None try: from scipy.spatial import cKDTree except Exception: return None, None try: lattice = np.asarray(structure.lattice.matrix, dtype=float) a, b, c = structure.lattice.abc if min(a, b, c) <= 0.0: return None, None # Per-axis resolution clamped so any cell stays within the compute # and payload budget. Tiny cells get more samples than nominal # spacing would suggest; huge cells get coarser sampling. nx = int(np.clip(np.ceil(a / target_grid_spacing), n_per_axis_min, n_per_axis_max)) ny = int(np.clip(np.ceil(b / target_grid_spacing), n_per_axis_min, n_per_axis_max)) nz = int(np.clip(np.ceil(c / target_grid_spacing), n_per_axis_min, n_per_axis_max)) fx = np.linspace(0.0, 1.0, nx, endpoint=False) fy = np.linspace(0.0, 1.0, ny, endpoint=False) fz = np.linspace(0.0, 1.0, nz, endpoint=False) grid_frac = np.stack(np.meshgrid(fx, fy, fz, indexing='ij'), axis=-1) grid_cart = grid_frac.reshape(-1, 3) @ lattice atom_cart = np.array([site.coords for site in structure], dtype=float) if atom_cart.shape[0] == 0: return None, None shifts = [] for da in (-1, 0, 1): for db in (-1, 0, 1): for dc in (-1, 0, 1): shifts.append(da * lattice[0] + db * lattice[1] + dc * lattice[2]) images = np.vstack([atom_cart + s for s in shifts]) tree = cKDTree(images) cutoff = vdw_eff + probe_radius dist, _ = tree.query(grid_cart, k=1) void = (dist > cutoff).reshape(nx, ny, nz).astype(np.float32) if void.max() < 0.5 or void.min() > 0.5: return None, None try: verts_idx, faces, _, _ = marching_cubes( void, level=0.5, allow_degenerate=False ) except Exception: return None, None # Vertex cap so the JSON payload shipped to the iframe stays # bounded even for unusual very-high-porosity structures if verts_idx.shape[0] > max_vertices: return None, None # Convert to fractional coordinates (frame-agnostic), then to # Cartesian using the STANDARD CIF convention so the mesh aligns # with how 3Dmol.js positions the atoms it parses from the CIF. # Using `lattice` (pymatgen's matrix) here would put the mesh in a # different frame from the rendered framework when the source CIF # uses a non-canonical lattice orientation. verts_frac = verts_idx / np.array([nx, ny, nz], dtype=float) # Defensive clip: drop any face whose vertices stray outside # [0, 1) fractional. With endpoint=False linspace this should # never trigger, but keeps the mesh strictly inside the cell # wireframe even if marching_cubes produces an edge case. eps = 1e-6 inside = np.all( (verts_frac >= -eps) & (verts_frac <= 1.0 + eps), axis=1 ) if not inside.all(): face_inside = inside[faces].all(axis=1) faces = faces[face_inside] if faces.shape[0] == 0: return None, None std_lat = _standard_cif_lattice(structure.lattice.abc, structure.lattice.angles) verts_cart = verts_frac @ std_lat return verts_cart.astype(float), faces.astype(int) except Exception: # Catch-all so a malformed structure never breaks the rest of the # rendering pipeline. Caller falls back to no-mesh render. return None, None def _cluster_pore_vertices( positions: np.ndarray, radii: np.ndarray, attrs: np.ndarray, ) -> list: """Cluster Voronoi pore vertices into cavity centres. Greedy non-maximum-suppression style: walk vertices in descending order of inscribed-sphere radius, take the largest as a cavity centre, absorb all unassigned vertices within that radius. The largest inscribed sphere in a cavity *is* the cavity centre by construction (Zeo++ semantics), and nearby smaller-radius vertices are pinch points along channels feeding it. Returns a list of dicts with center, radius, and aggregated attribution statistics over cluster members. The cavity-scale sphere idiom follows Lisensky and Yaghi, J. Chem. Educ. 2022, 99, 1998-2004 (Figures 1-8), where one translucent sphere per crystallographic pore is drawn at the cavity centre, sized to the cavity diameter. """ n = len(positions) if n == 0: return [] positions = np.asarray(positions, dtype=float) radii = np.asarray(radii, dtype=float) attrs = np.asarray(attrs, dtype=float) order = np.argsort(radii)[::-1] consumed = np.zeros(n, dtype=bool) clusters: list = [] for i in order: if consumed[i]: continue centre = positions[i] cavity_radius = float(max(radii[i], 0.5)) # floor to avoid invisible spheres d = np.linalg.norm(positions - centre, axis=1) members = np.where((d <= cavity_radius) & (~consumed))[0] if len(members) == 0: members = np.array([i]) consumed[members] = True member_attrs = attrs[members] # Choose the cluster's representative attribution as the signed value # of the member with the largest magnitude. This preserves the sign of # the strongest contributor instead of cancelling positives and # negatives in a mean. peak_idx = int(np.argmax(np.abs(member_attrs))) peak_attr = float(member_attrs[peak_idx]) clusters.append({ 'center': centre, 'radius': cavity_radius, 'attr': peak_attr, 'members': members, }) return clusters # ============================================================================= # Private helper: CIF string with B-factors injected # ============================================================================= def _cif_string_with_bfactors(structure, per_atom_attrs: np.ndarray) -> str: """ Build a CIF text string from a pymatgen Structure with per-atom B-factors encoding XAI attributions on the iRASPA scale: 1 = most negative attribution 50 = neutral 99 = most positive attribution Both create_3d_visualization and export_iraspa_cif delegate here so the B-factor injection logic is defined exactly once. Atom fractional coordinates are wrapped to [0, 1) before writing the CIF so that no atoms appear outside the unit cell wireframe in the rendered viewer. Source CIFs occasionally place atoms at fractional positions slightly past 1.0 (numeric precision or symmetry-related sites), which otherwise produce visually confusing atoms drifting outside the cell. Args: structure: pymatgen Structure per_atom_attrs: np.ndarray [N_atoms], signed attributions Returns: CIF text string with _atom_site_B_iso_or_equiv column appended. """ abs_max = max(float(np.max(np.abs(per_atom_attrs))), 1e-8) b_factors = 50.0 + 49.0 * (per_atom_attrs / abs_max) # Wrap fractional coordinates to [0, 1) so 3Dmol does not render atoms # outside the unit cell wireframe. Atom order is preserved so that # per_atom_attrs indexing remains valid. try: wrapped_struct = Structure( lattice=structure.lattice, species=[site.species for site in structure], coords=[(np.asarray(site.frac_coords, dtype=float) % 1.0) for site in structure], coords_are_cartesian=False, ) except Exception: wrapped_struct = structure writer = CifWriter(wrapped_struct) cif_text = str(writer) lines = cif_text.split('\n') new_lines = [] in_atom_block = False in_atom_data = False b_col_injected = False atom_site_columns_seen: list = [] atom_counter = 0 for line in lines: stripped = line.strip() if stripped.startswith('_atom_site_'): in_atom_block = True atom_site_columns_seen.append(stripped) new_lines.append(line) continue if in_atom_block and not stripped.startswith('_atom_site_'): if '_atom_site_B_iso_or_equiv' not in '\n'.join(atom_site_columns_seen): new_lines.append(' _atom_site_B_iso_or_equiv') b_col_injected = True in_atom_block = False in_atom_data = True if ( in_atom_data and b_col_injected and stripped and not stripped.startswith('_') and not stripped.startswith('loop_') and not stripped.startswith('#') and len(stripped.split()) >= 4 and atom_counter < len(b_factors) ): bf = b_factors[atom_counter] line = f'{line} {bf:.2f}' atom_counter += 1 elif in_atom_data and not stripped: in_atom_data = False new_lines.append(line) return '\n'.join(new_lines) # ============================================================================= # create_3d_visualization # ============================================================================= def create_3d_visualization( structure, per_atom_attrs: np.ndarray, per_pore_attrs: np.ndarray, pore_positions: np.ndarray, pore_radii: np.ndarray, property_name: str, prediction_value: Optional[float] = None, scenario: Optional[str] = None, width: int = 800, height: int = 600, ) -> str: """ Render the MOF structure in 3Dmol.js with chemistry-style aesthetics. Color scheme (bwr = blue-white-red, low B-factor to high): B = 1 (most negative attribution) -> blue B = 50 (neutral) -> white B = 99 (most positive attribution) -> red Rendering: Metal atoms: large indigo spheres marking SBU centres Non-metal atoms: ball-and-stick with Jmol colours Void mesh: translucent yellow isosurface at the contact-surface boundary (probe radius 1.2 A, grid 0.7 A) Attribution: small orange/blue beads at cavity centres, opacity 0.95, radius 0.7-1.4 A scaled by |attr|, top-12 by |peak attribution| above 10% threshold Unit cell: subtle gray wireframe Args: structure: pymatgen Structure per_atom_attrs: np.ndarray [N_atoms], signed attributions per_pore_attrs: np.ndarray [N_pores], signed attributions pore_positions: np.ndarray [N_pores, 3] Cartesian Angstroms pore_radii: np.ndarray [N_pores] Angstroms property_name: Property string for the info header prediction_value: Optional float; shown in header if given scenario: Optional scenario letter ('A'/'B'/'C'/'D') width: Canvas width in pixels (default 800) height: Canvas height in pixels (default 600) Returns: HTML string with embedded 3Dmol.js, ready for gr.HTML(). """ if not PY3DMOL_OK: return ( '