File size: 3,461 Bytes
a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 8c26fbc a3863ea 5a820b2 8c26fbc 5a820b2 8c26fbc 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 8c26fbc a3863ea 5a820b2 a3863ea 8c26fbc 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 8c26fbc a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea 5a820b2 a3863ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# app/src/molecules/generated_variations.py
import sys
import subprocess
from pathlib import Path
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
def _ensure_attachment_point(smiles: str) -> str:
"""
Ensure the scaffold contains the required [*] wildcard.
If missing, add [*] to the front.
"""
smiles = (smiles or "").strip()
if not smiles:
return ""
if "[*]" in smiles:
return smiles
return "[*]" + smiles
def generate_variations_from_partial_smiles(user_smiles: str, n_to_gen: int = 12):
"""
Use Gen_PartialSMILES2.py to generate molecules from a scaffold SMILES,
then turn them into RDKit images for the UI grid.
Returns a list of dicts:
[
{"smiles": str, "image": PIL.Image, "style": "partial_smiles_gen"},
...
]
"""
if not user_smiles or not user_smiles.strip():
return []
scaffold = _ensure_attachment_point(user_smiles)
print(f"[generate_variations_from_partial_smiles] Using scaffold: {scaffold}")
# Resolve /app as project root
# This file is app/src/molecules/generated_variations.py
# parents[0] = .../molecules, [1] = .../src, [2] = .../app
project_root = Path(__file__).resolve().parents[2]
script_path = project_root / "Gen_PartialSMILES2.py"
csv_path = project_root / "generated_molecules.csv"
icons_dir = project_root / "icons"
# Clean up old CSV (optional)
if csv_path.exists():
try:
csv_path.unlink()
except Exception as e:
print(f"Warning: could not delete old CSV: {e}")
icons_dir.mkdir(exist_ok=True)
# Build the command: python Gen_PartialSMILES2.py --scaffold ... --n_to_gen ...
cmd = [
sys.executable,
str(script_path),
"--scaffold", scaffold,
"--n_to_gen", str(n_to_gen),
]
print(f"[generate_variations_from_partial_smiles] Running command:")
print(" ", " ".join(cmd))
try:
# Run from /app so relative paths in Gen_PartialSMILES2.py work
subprocess.run(cmd, cwd=project_root, check=True)
except subprocess.CalledProcessError as e:
print(f"[generate_variations_from_partial_smiles] Error running generator: {e}")
return []
if not csv_path.exists():
print("[generate_variations_from_partial_smiles] generated_molecules.csv not found.")
return []
# Load generated SMILES
df = pd.read_csv(csv_path)
smiles_list = df["smiles"].tolist()
variations = []
for idx, smi in enumerate(smiles_list[:n_to_gen]):
mol = Chem.MolFromSmiles(smi)
if mol is None:
print(f"[generate_variations_from_partial_smiles] Skipping invalid SMILES: {smi}")
continue
# Make a 2D icon image for the grid
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
# Optionally save icon to /app/icons/icon_N.png (nice for debugging)
icon_path = icons_dir / f"icon_{idx+1}.png"
try:
img.save(icon_path)
except Exception as e:
print(f"[generate_variations_from_partial_smiles] Could not save icon {icon_path}: {e}")
variations.append({
"smiles": smi,
"image": img,
"style": "partial_smiles_gen",
})
print(f"[generate_variations_from_partial_smiles] Created {len(variations)} variations.")
return variations
|