|
|
|
|
|
|
|
|
import sys |
|
|
import subprocess |
|
|
from pathlib import Path |
|
|
import os |
|
|
|
|
|
import pandas as pd |
|
|
from rdkit import Chem |
|
|
from rdkit.Chem import Draw |
|
|
|
|
|
|
|
|
def _ensure_attachment_point(smiles: str) -> str: |
|
|
""" |
|
|
Ensure the scaffold contains the required [*] wildcard. |
|
|
If missing, add [*] to the front. |
|
|
""" |
|
|
smiles = (smiles or "").strip() |
|
|
if not smiles: |
|
|
return "" |
|
|
if "[*]" in smiles: |
|
|
return smiles |
|
|
return "[*]" + smiles |
|
|
|
|
|
|
|
|
def generate_variations_from_partial_smiles(user_smiles: str, n_to_gen: int = 12): |
|
|
""" |
|
|
Use Gen_PartialSMILES2.py to generate molecules from a scaffold SMILES, |
|
|
then turn them into RDKit images for the UI grid. |
|
|
|
|
|
Returns a list of dicts: |
|
|
[ |
|
|
{"smiles": str, "image": PIL.Image, "style": "partial_smiles_gen"}, |
|
|
... |
|
|
] |
|
|
""" |
|
|
|
|
|
if not user_smiles or not user_smiles.strip(): |
|
|
return [] |
|
|
|
|
|
scaffold = _ensure_attachment_point(user_smiles) |
|
|
print(f"[generate_variations_from_partial_smiles] Using scaffold: {scaffold}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
project_root = Path(__file__).resolve().parents[2] |
|
|
script_path = project_root / "Gen_PartialSMILES2.py" |
|
|
csv_path = project_root / "generated_molecules.csv" |
|
|
icons_dir = project_root / "icons" |
|
|
|
|
|
|
|
|
if csv_path.exists(): |
|
|
try: |
|
|
csv_path.unlink() |
|
|
except Exception as e: |
|
|
print(f"Warning: could not delete old CSV: {e}") |
|
|
|
|
|
icons_dir.mkdir(exist_ok=True) |
|
|
|
|
|
|
|
|
cmd = [ |
|
|
sys.executable, |
|
|
str(script_path), |
|
|
"--scaffold", scaffold, |
|
|
"--n_to_gen", str(n_to_gen), |
|
|
] |
|
|
|
|
|
print(f"[generate_variations_from_partial_smiles] Running command:") |
|
|
print(" ", " ".join(cmd)) |
|
|
|
|
|
try: |
|
|
|
|
|
subprocess.run(cmd, cwd=project_root, check=True) |
|
|
except subprocess.CalledProcessError as e: |
|
|
print(f"[generate_variations_from_partial_smiles] Error running generator: {e}") |
|
|
return [] |
|
|
|
|
|
if not csv_path.exists(): |
|
|
print("[generate_variations_from_partial_smiles] generated_molecules.csv not found.") |
|
|
return [] |
|
|
|
|
|
|
|
|
df = pd.read_csv(csv_path) |
|
|
smiles_list = df["smiles"].tolist() |
|
|
|
|
|
variations = [] |
|
|
|
|
|
for idx, smi in enumerate(smiles_list[:n_to_gen]): |
|
|
mol = Chem.MolFromSmiles(smi) |
|
|
if mol is None: |
|
|
print(f"[generate_variations_from_partial_smiles] Skipping invalid SMILES: {smi}") |
|
|
continue |
|
|
|
|
|
|
|
|
img = Draw.MolToImage(mol, size=(200, 200), kekulize=True) |
|
|
|
|
|
|
|
|
icon_path = icons_dir / f"icon_{idx+1}.png" |
|
|
try: |
|
|
img.save(icon_path) |
|
|
except Exception as e: |
|
|
print(f"[generate_variations_from_partial_smiles] Could not save icon {icon_path}: {e}") |
|
|
|
|
|
variations.append({ |
|
|
"smiles": smi, |
|
|
"image": img, |
|
|
"style": "partial_smiles_gen", |
|
|
}) |
|
|
|
|
|
print(f"[generate_variations_from_partial_smiles] Created {len(variations)} variations.") |
|
|
return variations |
|
|
|