rdkit_api / app.py
Nanny7's picture
Fix cube file parsing to follow standard format specification
d2f237b
raw
history blame
15.9 kB
import plotly.graph_objects as go
import numpy as np
def parse_cube_file(cube_file):
"""Parse a cube file and return grid coordinates and values."""
try:
with open(cube_file, 'r') as f:
lines = f.readlines()
if len(lines) < 6:
raise ValueError("Cube file too short")
# Standard cube format:
# Line 0-1: comments
# Line 2: natoms, origin_x, origin_y, origin_z
# Line 3: nx, voxel_x, 0, 0
# Line 4: ny, 0, voxel_y, 0
# Line 5: nz, 0, 0, voxel_z
# Lines 6 to 6+natoms-1: atom data
# Remaining lines: volumetric data
# Parse line 2 (natoms and origin)
parts = lines[2].split()
natoms = abs(int(float(parts[0]))) # abs() handles negative natoms (sometimes used)
origin = np.array([float(parts[1]), float(parts[2]), float(parts[3])])
# Parse line 3 (nx and voxel spacing)
parts = lines[3].split()
nx = abs(int(float(parts[0])))
dx = float(parts[1])
# Parse line 4 (ny and voxel spacing)
parts = lines[4].split()
ny = abs(int(float(parts[0])))
dy = float(parts[2])
# Parse line 5 (nz and voxel spacing)
parts = lines[5].split()
nz = abs(int(float(parts[0])))
dz = float(parts[3])
# Data starts after atom lines
data_start = 6 + natoms
data = []
for line in lines[data_start:]:
data.extend([float(x) for x in line.split()])
if len(data) != nx * ny * nz:
raise ValueError(f"Data size mismatch: expected {nx*ny*nz}, got {len(data)}")
# Reshape data
values = np.array(data).reshape((nx, ny, nz))
# Create coordinate grids
x = origin[0] + np.arange(nx) * dx
y = origin[1] + np.arange(ny) * dy
z = origin[2] + np.arange(nz) * dz
return x, y, z, values
except Exception as e:
raise ValueError(f"Error parsing cube file: {e}")
import gradio as gr
from rdkit import Chem
from rdkit.Chem import Descriptors, Draw, AllChem
import cirpy
import pubchempy as pcp
from urllib.error import HTTPError, URLError
import os
import tempfile
from pathlib import Path
# RDKit API with multiple endpoints
def _mol_from_smiles(smiles: str):
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise gr.Error("Invalid SMILES string.")
return mol
def smiles_to_canonical(smiles: str) -> str:
mol = _mol_from_smiles(smiles)
return Chem.MolToSmiles(mol)
def molecular_weight(smiles: str) -> float:
mol = _mol_from_smiles(smiles)
return float(Descriptors.MolWt(mol))
def logp(smiles: str) -> float:
mol = _mol_from_smiles(smiles)
return float(Descriptors.MolLogP(mol))
def tpsa(smiles: str) -> float:
mol = _mol_from_smiles(smiles)
return float(Descriptors.TPSA(mol))
def mol_image(smiles: str):
mol = _mol_from_smiles(smiles)
return Draw.MolToImage(mol)
def name_to_smiles(name: str) -> str:
"""Convert chemical name to SMILES using Chemical Identifier Resolver (CIR)"""
try:
smiles = cirpy.resolve(name, 'smiles')
if smiles is None:
raise gr.Error(f"Could not find SMILES for chemical name: {name}")
return smiles
except Exception as e:
raise gr.Error(f"Error converting name to SMILES: {str(e)}")
def smiles_to_name(smiles: str) -> str:
"""Convert SMILES string to chemical name using Chemical Identifier Resolver (CIR)."""
mol = _mol_from_smiles(smiles)
canonical_smiles = Chem.MolToSmiles(mol)
try:
name = cirpy.resolve(smiles, "name")
if name:
return name
except (HTTPError, URLError):
# Ignore network failures and fall back to other resolvers.
pass
except Exception:
# Ignore unexpected CIR errors and fall back to other resolvers.
pass
try:
# Try PubChem as a secondary resolver in case CIR fails.
compounds = pcp.get_compounds(canonical_smiles, namespace="smiles")
for compound in compounds:
if compound.iupac_name:
return compound.iupac_name
if compound.synonyms:
return compound.synonyms[0]
except Exception:
# Ignore PubChem issues and fall back to canonical SMILES output.
pass
return f"No name available. Canonical SMILES: {canonical_smiles}"
def smiles_to_molecular_orbitals(smiles_input: str, name_input: str) -> str:
"""Generate HOMO/LUMO isosurfaces using Psikit, when available."""
smiles = smiles_input.strip()
name = name_input.strip()
if not smiles and not name:
raise gr.Error("Enter a SMILES string or a chemical name to compute orbitals.")
if name:
try:
resolved = cirpy.resolve(name, "smiles")
except Exception as exc:
return f"<p>Could not resolve '{name}' to SMILES: {exc}</p>"
if not resolved:
return f"<p>No SMILES found for '{name}'. Try a different name or supply a SMILES directly.</p>"
smiles = resolved
if not smiles:
raise gr.Error("Unable to determine SMILES for orbital calculation.")
mol = _mol_from_smiles(smiles)
canonical_smiles = Chem.MolToSmiles(mol)
if mol.GetNumAtoms() > 30:
raise gr.Error("Please provide a molecule with 30 atoms or fewer for orbital visualization.")
try:
import pyscf # type: ignore[import]
except ImportError:
return (
"<p><strong>PySCF is not available.</strong> "
"Install it with <code>pip install pyscf</code> "
"for molecular orbital calculations.</p>"
"<p><strong>Alternative online tools:</strong></p>"
"<ul>"
"<li><a href='https://www.webmo.net/' target='_blank'>WebMO</a> - Web-based molecular modeling</li>"
"<li><a href='https://gaussian.com/' target='_blank'>Gaussian</a> - Quantum chemistry software</li>"
"<li><a href='https://www.chemcraftprog.com/' target='_blank'>ChemCraft</a> - Molecular visualization</li>"
"</ul>"
f"<p>You can copy this SMILES to these tools: <code>{canonical_smiles}</code></p>"
)
with tempfile.TemporaryDirectory() as tmpdir:
original_cwd = os.getcwd()
os.chdir(tmpdir)
try:
# Generate 3D coordinates with RDKit
mol_3d = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol_3d, randomSeed=42)
AllChem.MMFFOptimizeMolecule(mol_3d)
# Extract coordinates and atomic numbers
coords = []
atoms = []
for atom in mol_3d.GetAtoms():
pos = mol_3d.GetConformer().GetAtomPosition(atom.GetIdx())
coords.append([pos.x, pos.y, pos.z])
atoms.append(atom.GetSymbol())
# Set up PySCF molecule
mol_pyscf = pyscf.gto.Mole()
mol_pyscf.atom = list(zip(atoms, coords))
mol_pyscf.basis = 'sto-3g' # Small basis set for speed
mol_pyscf.build()
# Run Hartree-Fock calculation
mf = pyscf.scf.RHF(mol_pyscf)
energy = mf.kernel()
if not mf.converged:
return "<p>Hartree-Fock calculation did not converge. Try a smaller molecule or different geometry.</p>"
# Get HOMO and LUMO indices
nocc = mol_pyscf.nelectron // 2
homo_idx = nocc - 1
lumo_idx = nocc
# Generate cube files for HOMO and LUMO
from pyscf.tools import cubegen
cube_files = []
for idx, label in [(homo_idx, 'HOMO'), (lumo_idx, 'LUMO')]:
cube_file = f'{label.lower()}.cube'
cubegen.orbital(mol_pyscf, cube_file, mf.mo_coeff[:, idx])
cube_files.append((cube_file, label))
mol_block = Chem.MolToMolBlock(mol_3d)
html_sections: list[str] = []
if name_input.strip():
html_sections.append(
f"<p><strong>Resolved '{name_input.strip()}' to SMILES:</strong> {canonical_smiles}</p>"
)
for cube_file, label in cube_files:
if not Path(cube_file).exists():
continue
# Parse cube file
x, y, z, values = parse_cube_file(cube_file)
# Create plotly figure for volume rendering
fig = go.Figure(data=go.Volume(
x=x,
y=y,
z=z,
value=values.flatten(),
isomin=-0.02,
isomax=0.02,
opacity=0.1,
surface_count=2,
colorscale='RdBu',
reversescale=True
))
# Add molecular structure
atom_x, atom_y, atom_z = [], [], []
atom_colors = []
for atom in mol_3d.GetAtoms():
pos = mol_3d.GetConformer().GetAtomPosition(atom.GetIdx())
atom_x.append(pos.x)
atom_y.append(pos.y)
atom_z.append(pos.z)
# Color by atom type
if atom.GetSymbol() == 'C':
atom_colors.append('black')
elif atom.GetSymbol() == 'N':
atom_colors.append('blue')
elif atom.GetSymbol() == 'O':
atom_colors.append('red')
elif atom.GetSymbol() == 'H':
atom_colors.append('white')
else:
atom_colors.append('gray')
fig.add_trace(go.Scatter3d(
x=atom_x, y=atom_y, z=atom_z,
mode='markers',
marker=dict(size=8, color=atom_colors),
name='Atoms'
))
fig.update_layout(
title=f'{label} Orbital',
scene=dict(
xaxis_title='X',
yaxis_title='Y',
zaxis_title='Z'
)
)
# Generate HTML
import plotly.io as pio
html_sections.append(f"<h3>{label}</h3>" + pio.to_html(fig, full_html=False))
if not html_sections:
return "<p>Could not prepare HOMO/LUMO visualizations.</p>"
return "".join(html_sections)
except Exception as exc: # pragma: no cover - runtime heavy
return f"<p>Unable to compute molecular orbitals: {exc}</p>"
finally:
os.chdir(original_cwd)
def name_to_3d_molecule(name: str) -> str:
"""Convert chemical name to 3D molecule visualization"""
try:
# Convert name to SMILES
smiles = cirpy.resolve(name, 'smiles')
if smiles is None:
raise gr.Error(f"Could not find SMILES for chemical name: {name}")
# Create molecule from SMILES
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise gr.Error(f"Could not create molecule from SMILES: {smiles}")
# Add hydrogens for better 3D structure
mol = Chem.AddHs(mol)
# Generate 3D coordinates
success = AllChem.EmbedMolecule(mol, AllChem.ETKDG())
if success == -1:
raise gr.Error(f"Could not generate 3D coordinates for: {name}")
# Optimize geometry
AllChem.MMFFOptimizeMolecule(mol)
# Convert to SDF format (contains 3D coordinates)
sdf_string = Chem.SDWriter.GetText(mol)
# Create HTML with embedded 3D viewer using 3Dmol.js
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<script src="https://3dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
</head>
<body>
<div id="container" style="width: 400px; height: 400px; position: relative;"></div>
<script>
let viewer = $3Dmol.createViewer($("#container"));
let sdf = `{sdf_string}`;
viewer.addModel(sdf, "sdf");
viewer.setStyle({{'stick': {{}}}});
viewer.zoomTo();
viewer.render();
</script>
</body>
</html>
"""
return html_content
except Exception as e:
raise gr.Error(f"Error creating 3D molecule: {str(e)}")
smiles_interface = gr.Interface(
fn=smiles_to_canonical,
inputs=gr.Textbox(label="SMILES"),
outputs=gr.Textbox(label="Canonical SMILES"),
api_name="smiles_to_mol",
description="Convert an input SMILES string to its canonical form.",
)
smiles_to_name_interface = gr.Interface(
fn=smiles_to_name,
inputs=gr.Textbox(label="SMILES", placeholder="e.g., CC(=O)Oc1ccccc1C(=O)O"),
outputs=gr.Textbox(label="Chemical Name"),
api_name="smiles_to_name",
description="Convert a SMILES string to a chemical name.",
)
orbital_interface = gr.Interface(
fn=smiles_to_molecular_orbitals,
inputs=[
gr.Textbox(label="SMILES", placeholder="e.g., CC(=O)O"),
gr.Textbox(label="Chemical Name", placeholder="Optional, e.g., benzene"),
],
outputs=gr.HTML(label="Molecular Orbitals"),
api_name="smiles_to_mo",
description="Generate HOMO/LUMO isosurfaces using Psikit (CPU-intensive). Provide SMILES or a name.",
)
name_interface = gr.Interface(
fn=name_to_smiles,
inputs=gr.Textbox(label="Chemical Name", placeholder="e.g., aspirin, caffeine, benzene"),
outputs=gr.Textbox(label="SMILES"),
api_name="name_to_smiles",
description="Convert a chemical name to SMILES notation.",
examples=[["aspirin"], ["caffeine"], ["benzene"], ["ethanol"]],
)
mw_interface = gr.Interface(
fn=molecular_weight,
inputs=gr.Textbox(label="SMILES"),
outputs=gr.Number(label="Molecular Weight (g/mol)"),
api_name="molecular_weight",
description="Compute the molecular weight from a SMILES string.",
)
logp_interface = gr.Interface(
fn=logp,
inputs=gr.Textbox(label="SMILES"),
outputs=gr.Number(label="logP"),
api_name="logp",
description="Calculate the octanol/water partition coefficient (logP).",
)
tpsa_interface = gr.Interface(
fn=tpsa,
inputs=gr.Textbox(label="SMILES"),
outputs=gr.Number(label="TPSA"),
api_name="tpsa",
description="Calculate the topological polar surface area (TPSA).",
)
molecule_3d_interface = gr.Interface(
fn=name_to_3d_molecule,
inputs=gr.Textbox(label="Chemical Name", placeholder="e.g., benzene, aspirin, caffeine"),
outputs=gr.HTML(label="3D Molecule Viewer"),
api_name="name_to_3d_molecule",
description="Convert a chemical name to an interactive 3D molecule visualization.",
examples=[["benzene"], ["aspirin"], ["caffeine"], ["ethanol"]],
)
demo = gr.TabbedInterface(
[
name_interface,
molecule_3d_interface,
orbital_interface,
smiles_interface,
smiles_to_name_interface,
mw_interface,
logp_interface,
tpsa_interface,
],
[
"Name to SMILES",
"3D Molecule Viewer",
"Molecular Orbitals",
"SMILES to Canonical",
"SMILES to Name",
"Molecular Weight",
"LogP",
"TPSA",
],
title="RDKit API",
css=".gradio-container {max-width: 800px; margin: auto;}",
)
if __name__ == "__main__":
demo.queue().launch(server_name="0.0.0.0", server_port=7860)