# app/src/molecules/generated_variations.py import sys import subprocess from pathlib import Path import os import pandas as pd from rdkit import Chem from rdkit.Chem import Draw def _ensure_attachment_point(smiles: str) -> str: """ Ensure the scaffold contains the required [*] wildcard. If missing, add [*] to the front. """ smiles = (smiles or "").strip() if not smiles: return "" if "[*]" in smiles: return smiles return "[*]" + smiles def generate_variations_from_partial_smiles(user_smiles: str, n_to_gen: int = 12): """ Use Gen_PartialSMILES2.py to generate molecules from a scaffold SMILES, then turn them into RDKit images for the UI grid. Returns a list of dicts: [ {"smiles": str, "image": PIL.Image, "style": "partial_smiles_gen"}, ... ] """ if not user_smiles or not user_smiles.strip(): return [] scaffold = _ensure_attachment_point(user_smiles) print(f"[generate_variations_from_partial_smiles] Using scaffold: {scaffold}") # Resolve /app as project root # This file is app/src/molecules/generated_variations.py # parents[0] = .../molecules, [1] = .../src, [2] = .../app project_root = Path(__file__).resolve().parents[2] script_path = project_root / "Gen_PartialSMILES2.py" csv_path = project_root / "generated_molecules.csv" icons_dir = project_root / "icons" # Clean up old CSV (optional) if csv_path.exists(): try: csv_path.unlink() except Exception as e: print(f"Warning: could not delete old CSV: {e}") icons_dir.mkdir(exist_ok=True) # Build the command: python Gen_PartialSMILES2.py --scaffold ... --n_to_gen ... cmd = [ sys.executable, str(script_path), "--scaffold", scaffold, "--n_to_gen", str(n_to_gen), ] print(f"[generate_variations_from_partial_smiles] Running command:") print(" ", " ".join(cmd)) try: # Run from /app so relative paths in Gen_PartialSMILES2.py work subprocess.run(cmd, cwd=project_root, check=True) except subprocess.CalledProcessError as e: print(f"[generate_variations_from_partial_smiles] Error running generator: {e}") return [] if not csv_path.exists(): print("[generate_variations_from_partial_smiles] generated_molecules.csv not found.") return [] # Load generated SMILES df = pd.read_csv(csv_path) smiles_list = df["smiles"].tolist() variations = [] for idx, smi in enumerate(smiles_list[:n_to_gen]): mol = Chem.MolFromSmiles(smi) if mol is None: print(f"[generate_variations_from_partial_smiles] Skipping invalid SMILES: {smi}") continue # Make a 2D icon image for the grid img = Draw.MolToImage(mol, size=(200, 200), kekulize=True) # Optionally save icon to /app/icons/icon_N.png (nice for debugging) icon_path = icons_dir / f"icon_{idx+1}.png" try: img.save(icon_path) except Exception as e: print(f"[generate_variations_from_partial_smiles] Could not save icon {icon_path}: {e}") variations.append({ "smiles": smi, "image": img, "style": "partial_smiles_gen", }) print(f"[generate_variations_from_partial_smiles] Created {len(variations)} variations.") return variations