Nanny7 commited on
Commit
b18cfa0
·
1 Parent(s): 71f8c18

Add Chemiscope explorer

Browse files
Files changed (3) hide show
  1. README.md +12 -2
  2. app.py +241 -1
  3. requirements.txt +3 -1
README.md CHANGED
@@ -20,8 +20,8 @@ This is a Hugging Face Space providing API access to various RDKit cheminformati
20
  - `POST /api/smiles_to_mol` : Convert SMILES to canonical SMILES
21
  - `POST /api/molecular_weight` : Calculate molecular weight
22
  - `POST /api/logp` : Calculate logP (partition coefficient)
23
- - `POST /api/tpsa` : Calculate topological polar surface area
24
  - `POST /api/tpsa` : Calculate TPSA (topological polar surface area)
 
25
  - `POST /api/mol_image` : Generate 2D molecule image (returns PIL Image)
26
 
27
  ## Usage
@@ -35,4 +35,14 @@ pip install -r requirements.txt
35
  python app.py
36
  ```
37
 
38
- The API endpoints can be accessed programmatically using the Gradio client or HTTP requests.
 
 
 
 
 
 
 
 
 
 
 
20
  - `POST /api/smiles_to_mol` : Convert SMILES to canonical SMILES
21
  - `POST /api/molecular_weight` : Calculate molecular weight
22
  - `POST /api/logp` : Calculate logP (partition coefficient)
 
23
  - `POST /api/tpsa` : Calculate TPSA (topological polar surface area)
24
+ - `POST /api/chemiscope_explorer` : Build a Chemiscope dataset + visualization from a batch of SMILES
25
  - `POST /api/mol_image` : Generate 2D molecule image (returns PIL Image)
26
 
27
  ## Usage
 
35
  python app.py
36
  ```
37
 
38
+ The API endpoints can be accessed programmatically using the Gradio client or HTTP requests.
39
+
40
+ ## Chemiscope-powered exploration
41
+
42
+ The new **Chemiscope Explorer** tab leverages the official [`chemiscope` Python package](https://chemiscope.org/docs/python/index.html) plus the standalone Chemiscope web viewer to create rich, shareable datasets directly inside the Space:
43
+
44
+ 1. Paste up to 12 SMILES (one per line or comma-separated). RDKit generates 3D ETKDG conformers, computes descriptors (MolWt, logP, TPSA, H-bond counts, rotatable bonds) and per-atom Gasteiger charges.
45
+ 2. The resulting ASE frames and descriptors are converted into the Chemiscope JSON schema and embedded with the [`chemiscope_standalone.html`](https://chemiscope.org/docs/index.html) viewer so you can explore structures + map plots without leaving the Space.
46
+ 3. Download the `.json.gz` artifact and reload it later on [chemiscope.org](https://chemiscope.org/) via the *Load/Save* menu or by hosting it and passing `?load=<url>` to share interactive analyses with collaborators.
47
+
48
+ This flow keeps the Hugging Face interface lightweight while exposing the full Chemiscope feature set for interactive dataset mining.
app.py CHANGED
@@ -1,5 +1,11 @@
1
  import plotly.graph_objects as go
2
  import numpy as np
 
 
 
 
 
 
3
 
4
 
5
  def parse_cube_file(cube_file):
@@ -157,6 +163,219 @@ def smiles_to_name(smiles: str) -> str:
157
  return f"No name available. Canonical SMILES: {canonical_smiles}"
158
 
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  def smiles_to_molecular_orbitals(smiles_input: str, name_input: str) -> str:
161
  """Generate HOMO/LUMO isosurfaces using Psikit, when available."""
162
  smiles = smiles_input.strip()
@@ -529,11 +748,31 @@ molecule_3d_interface = gr.Interface(
529
  examples=[["benzene"], ["aspirin"], ["caffeine"], ["ethanol"]],
530
  )
531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
  demo = gr.TabbedInterface(
534
  [
535
  name_interface,
536
  molecule_3d_interface,
 
537
  orbital_interface,
538
  smiles_interface,
539
  smiles_to_name_interface,
@@ -544,6 +783,7 @@ demo = gr.TabbedInterface(
544
  [
545
  "Name to SMILES",
546
  "3D Molecule Viewer",
 
547
  "Molecular Orbitals",
548
  "SMILES to Canonical",
549
  "SMILES to Name",
@@ -557,4 +797,4 @@ demo = gr.TabbedInterface(
557
 
558
 
559
  if __name__ == "__main__":
560
- demo.queue().launch(server_name="0.0.0.0", server_port=7860)
 
1
  import plotly.graph_objects as go
2
  import numpy as np
3
+ import json
4
+ import re
5
+ import urllib.request
6
+
7
+ import chemiscope
8
+ from ase import Atoms
9
 
10
 
11
  def parse_cube_file(cube_file):
 
163
  return f"No name available. Canonical SMILES: {canonical_smiles}"
164
 
165
 
166
+ CHEMISCOPE_TEMPLATE_URL = "https://chemiscope.org/chemiscope_standalone.html"
167
+ CHEMISCOPE_TEMPLATE_CACHE = Path(tempfile.gettempdir()) / "chemiscope_standalone.html"
168
+ _CHEMISCOPE_TEMPLATE = None
169
+ _MAX_CHEMISCOPE_MOLECULES = 12
170
+
171
+
172
+ def _load_chemiscope_template():
173
+ """Load (and cache) the standalone Chemiscope HTML shell."""
174
+ global _CHEMISCOPE_TEMPLATE
175
+ if _CHEMISCOPE_TEMPLATE:
176
+ return _CHEMISCOPE_TEMPLATE
177
+
178
+ if CHEMISCOPE_TEMPLATE_CACHE.exists():
179
+ try:
180
+ _CHEMISCOPE_TEMPLATE = CHEMISCOPE_TEMPLATE_CACHE.read_text(encoding="utf-8")
181
+ return _CHEMISCOPE_TEMPLATE
182
+ except OSError:
183
+ # Cache is best-effort; fall back to downloading a fresh copy.
184
+ pass
185
+
186
+ try:
187
+ with urllib.request.urlopen(CHEMISCOPE_TEMPLATE_URL, timeout=10) as response:
188
+ template = response.read().decode("utf-8")
189
+ except Exception as exc:
190
+ raise gr.Error(
191
+ "Unable to download the Chemiscope viewer. Please try again in a moment."
192
+ ) from exc
193
+
194
+ try:
195
+ CHEMISCOPE_TEMPLATE_CACHE.write_text(template, encoding="utf-8")
196
+ except OSError:
197
+ # The temp directory might be read-only; ignore caching failures.
198
+ pass
199
+
200
+ _CHEMISCOPE_TEMPLATE = template
201
+ return template
202
+
203
+
204
+ def _smiles_list_from_block(smiles_block: str):
205
+ """Split a block of SMILES lines/CSV text into a unique, validated list."""
206
+ tokens = re.split(r"[,\n;]+", smiles_block or "")
207
+ smiles_list = [token.strip() for token in tokens if token.strip()]
208
+ if not smiles_list:
209
+ raise gr.Error("Provide at least one SMILES string (one per line or comma separated).")
210
+
211
+ unique_smiles = []
212
+ for token in smiles_list:
213
+ canonical = Chem.MolToSmiles(_mol_from_smiles(token))
214
+ if canonical not in unique_smiles:
215
+ unique_smiles.append(canonical)
216
+
217
+ if len(unique_smiles) > _MAX_CHEMISCOPE_MOLECULES:
218
+ raise gr.Error(
219
+ f"Please limit Chemiscope batches to {_MAX_CHEMISCOPE_MOLECULES} molecules to keep the viewer responsive."
220
+ )
221
+
222
+ return unique_smiles
223
+
224
+
225
+ def _embed_smiles_in_3d(smiles: str, seed: int):
226
+ mole = Chem.AddHs(_mol_from_smiles(smiles))
227
+ params = AllChem.ETKDGv3()
228
+ params.randomSeed = seed + 1
229
+ params.maxAttempts = 20
230
+ status = AllChem.EmbedMolecule(mole, params)
231
+ if status == -1:
232
+ params.useRandomCoords = True
233
+ status = AllChem.EmbedMolecule(mole, params)
234
+ if status == -1:
235
+ raise gr.Error(f"Unable to generate a 3D conformer for {smiles}. Try a smaller molecule.")
236
+
237
+ AllChem.UFFOptimizeMolecule(mole, maxIters=200)
238
+ Chem.rdPartialCharges.ComputeGasteigerCharges(mole)
239
+ return mole
240
+
241
+
242
+ def _rdkit_to_ase_atoms(mol: Chem.Mol, label: str) -> Atoms:
243
+ """Convert an RDKit molecule with coordinates into an ASE Atoms object."""
244
+ conf = mol.GetConformer()
245
+ coords = []
246
+ for atom_idx in range(mol.GetNumAtoms()):
247
+ pos = conf.GetAtomPosition(atom_idx)
248
+ coords.append((float(pos.x), float(pos.y), float(pos.z)))
249
+ symbols = [atom.GetSymbol() for atom in mol.GetAtoms()]
250
+ ase_atoms = Atoms(symbols=symbols, positions=coords)
251
+ ase_atoms.info["name"] = label
252
+ return ase_atoms
253
+
254
+
255
+ def _extract_gasteiger_charges(mol: Chem.Mol):
256
+ charges = []
257
+ for atom in mol.GetAtoms():
258
+ if atom.HasProp("_GasteigerCharge"):
259
+ try:
260
+ charges.append(float(atom.GetProp("_GasteigerCharge")))
261
+ except ValueError:
262
+ charges.append(0.0)
263
+ else:
264
+ charges.append(0.0)
265
+ return charges
266
+
267
+
268
+ def _build_chemiscope_embed(dataset_payload: dict) -> str:
269
+ """Create a standalone iframe with the Chemiscope viewer and dataset content."""
270
+ template_html = _load_chemiscope_template()
271
+ dataset_json = json.dumps(dataset_payload, ensure_ascii=False, separators=(",", ":"))
272
+ combined = template_html + dataset_json
273
+ encoded = base64.b64encode(combined.encode("utf-8")).decode("ascii")
274
+ return (
275
+ "<div style='width:100%;'>"
276
+ "<iframe "
277
+ "title='Chemiscope explorer' "
278
+ "style='width:100%;height:620px;border:none;border-radius:8px;' "
279
+ f"src='data:text/html;base64,{encoded}'></iframe>"
280
+ "<p style='font-size:0.9em;margin-top:0.5rem;'>"
281
+ "Use the map view to explore descriptors interactively or download the dataset for "
282
+ "https://chemiscope.org/. "
283
+ "</p>"
284
+ "</div>"
285
+ )
286
+
287
+
288
+ def smiles_to_chemiscope_dataset(smiles_block: str):
289
+ """Generate a Chemiscope dataset and embed it alongside a downloadable artifact."""
290
+ smiles_list = _smiles_list_from_block(smiles_block)
291
+ frames = []
292
+ smiles_labels = []
293
+ atom_counts = []
294
+ mw_values = []
295
+ logp_values = []
296
+ tpsa_values = []
297
+ hbd_values = []
298
+ hba_values = []
299
+ rotatable_values = []
300
+ atomic_charges = []
301
+
302
+ for idx, smiles in enumerate(smiles_list):
303
+ mol3d = _embed_smiles_in_3d(smiles, idx)
304
+ canonical = Chem.MolToSmiles(Chem.RemoveHs(mol3d))
305
+ frames.append(_rdkit_to_ase_atoms(mol3d, canonical))
306
+ smiles_labels.append(canonical)
307
+ atom_counts.append(int(mol3d.GetNumAtoms()))
308
+ atomic_charges.extend(_extract_gasteiger_charges(mol3d))
309
+
310
+ descriptor_mol = Chem.RemoveHs(mol3d)
311
+ mw_values.append(float(Descriptors.MolWt(mol3d)))
312
+ logp_values.append(float(Descriptors.MolLogP(descriptor_mol)))
313
+ tpsa_values.append(float(Descriptors.TPSA(descriptor_mol)))
314
+ hbd_values.append(float(Descriptors.NumHDonors(descriptor_mol)))
315
+ hba_values.append(float(Descriptors.NumHAcceptors(descriptor_mol)))
316
+ rotatable_values.append(float(Descriptors.NumRotatableBonds(descriptor_mol)))
317
+
318
+ properties = {
319
+ "SMILES": {"target": "structure", "values": smiles_labels},
320
+ "Atom count": {"target": "structure", "values": atom_counts, "units": "atoms"},
321
+ "Molecular weight (g/mol)": {
322
+ "target": "structure",
323
+ "values": mw_values,
324
+ "units": "g/mol",
325
+ },
326
+ "logP": {"target": "structure", "values": logp_values},
327
+ "TPSA (Ų)": {"target": "structure", "values": tpsa_values, "units": "Ų"},
328
+ "H-bond donors": {"target": "structure", "values": hbd_values},
329
+ "H-bond acceptors": {"target": "structure", "values": hba_values},
330
+ "Rotatable bonds": {"target": "structure", "values": rotatable_values},
331
+ "Gasteiger charge (e)": {
332
+ "target": "atom",
333
+ "values": atomic_charges,
334
+ "units": "e",
335
+ },
336
+ }
337
+
338
+ settings = {
339
+ "map": {
340
+ "x": {"property": "Molecular weight (g/mol)"},
341
+ "y": {"property": "logP"},
342
+ "color": {"property": "TPSA (Ų)"},
343
+ "size": {"property": "Atom count"},
344
+ }
345
+ }
346
+
347
+ meta = {
348
+ "name": "RDKit Chemiscope Explorer",
349
+ "description": (
350
+ "Interactive Chemiscope session generated directly inside the RDKit Hugging Face Space."
351
+ ),
352
+ "references": [
353
+ "https://chemiscope.org/docs/python/index.html",
354
+ "https://chemiscope.org/docs/index.html",
355
+ ],
356
+ }
357
+
358
+ dataset = chemiscope.create_input(
359
+ frames=frames,
360
+ properties=properties,
361
+ meta=meta,
362
+ settings=settings,
363
+ )
364
+
365
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json.gz")
366
+ tmp_file.close()
367
+ chemiscope.write_input(
368
+ tmp_file.name,
369
+ frames=frames,
370
+ properties=properties,
371
+ meta=meta,
372
+ settings=settings,
373
+ )
374
+
375
+ viewer_html = _build_chemiscope_embed(dataset)
376
+ return viewer_html, tmp_file.name
377
+
378
+
379
  def smiles_to_molecular_orbitals(smiles_input: str, name_input: str) -> str:
380
  """Generate HOMO/LUMO isosurfaces using Psikit, when available."""
381
  smiles = smiles_input.strip()
 
748
  examples=[["benzene"], ["aspirin"], ["caffeine"], ["ethanol"]],
749
  )
750
 
751
+ chemiscope_interface = gr.Interface(
752
+ fn=smiles_to_chemiscope_dataset,
753
+ inputs=gr.Textbox(
754
+ label="SMILES batch",
755
+ lines=6,
756
+ placeholder="One SMILES per line or comma separated (max 12 molecules).",
757
+ ),
758
+ outputs=[
759
+ gr.HTML(label="Chemiscope Viewer"),
760
+ gr.File(label="Chemiscope Dataset (.json.gz)"),
761
+ ],
762
+ api_name="chemiscope_explorer",
763
+ description=(
764
+ "Generate a Chemiscope dataset using RDKit + ASE + Chemiscope tooling, then explore it "
765
+ "directly inside the Space or download the JSON for chemiscope.org."
766
+ ),
767
+ examples=[["CCO\nc1ccccc1"]],
768
+ )
769
+
770
 
771
  demo = gr.TabbedInterface(
772
  [
773
  name_interface,
774
  molecule_3d_interface,
775
+ chemiscope_interface,
776
  orbital_interface,
777
  smiles_interface,
778
  smiles_to_name_interface,
 
783
  [
784
  "Name to SMILES",
785
  "3D Molecule Viewer",
786
+ "Chemiscope Explorer",
787
  "Molecular Orbitals",
788
  "SMILES to Canonical",
789
  "SMILES to Name",
 
797
 
798
 
799
  if __name__ == "__main__":
800
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -7,6 +7,8 @@ pubchempy
7
  psikit
8
  py3Dmol
9
  pyscf
 
 
10
  plotly
11
  matplotlib
12
- pillow
 
7
  psikit
8
  py3Dmol
9
  pyscf
10
+ chemiscope
11
+ ase
12
  plotly
13
  matplotlib
14
+ pillow