Joey / src /molecules /generated_variations.py
Joey Callanan
minor changes
5a820b2
# app/src/molecules/generated_variations.py
import sys
import subprocess
from pathlib import Path
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
def _ensure_attachment_point(smiles: str) -> str:
"""
Ensure the scaffold contains the required [*] wildcard.
If missing, add [*] to the front.
"""
smiles = (smiles or "").strip()
if not smiles:
return ""
if "[*]" in smiles:
return smiles
return "[*]" + smiles
def generate_variations_from_partial_smiles(user_smiles: str, n_to_gen: int = 12):
"""
Use Gen_PartialSMILES2.py to generate molecules from a scaffold SMILES,
then turn them into RDKit images for the UI grid.
Returns a list of dicts:
[
{"smiles": str, "image": PIL.Image, "style": "partial_smiles_gen"},
...
]
"""
if not user_smiles or not user_smiles.strip():
return []
scaffold = _ensure_attachment_point(user_smiles)
print(f"[generate_variations_from_partial_smiles] Using scaffold: {scaffold}")
# Resolve /app as project root
# This file is app/src/molecules/generated_variations.py
# parents[0] = .../molecules, [1] = .../src, [2] = .../app
project_root = Path(__file__).resolve().parents[2]
script_path = project_root / "Gen_PartialSMILES2.py"
csv_path = project_root / "generated_molecules.csv"
icons_dir = project_root / "icons"
# Clean up old CSV (optional)
if csv_path.exists():
try:
csv_path.unlink()
except Exception as e:
print(f"Warning: could not delete old CSV: {e}")
icons_dir.mkdir(exist_ok=True)
# Build the command: python Gen_PartialSMILES2.py --scaffold ... --n_to_gen ...
cmd = [
sys.executable,
str(script_path),
"--scaffold", scaffold,
"--n_to_gen", str(n_to_gen),
]
print(f"[generate_variations_from_partial_smiles] Running command:")
print(" ", " ".join(cmd))
try:
# Run from /app so relative paths in Gen_PartialSMILES2.py work
subprocess.run(cmd, cwd=project_root, check=True)
except subprocess.CalledProcessError as e:
print(f"[generate_variations_from_partial_smiles] Error running generator: {e}")
return []
if not csv_path.exists():
print("[generate_variations_from_partial_smiles] generated_molecules.csv not found.")
return []
# Load generated SMILES
df = pd.read_csv(csv_path)
smiles_list = df["smiles"].tolist()
variations = []
for idx, smi in enumerate(smiles_list[:n_to_gen]):
mol = Chem.MolFromSmiles(smi)
if mol is None:
print(f"[generate_variations_from_partial_smiles] Skipping invalid SMILES: {smi}")
continue
# Make a 2D icon image for the grid
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True)
# Optionally save icon to /app/icons/icon_N.png (nice for debugging)
icon_path = icons_dir / f"icon_{idx+1}.png"
try:
img.save(icon_path)
except Exception as e:
print(f"[generate_variations_from_partial_smiles] Could not save icon {icon_path}: {e}")
variations.append({
"smiles": smi,
"image": img,
"style": "partial_smiles_gen",
})
print(f"[generate_variations_from_partial_smiles] Created {len(variations)} variations.")
return variations