Joey / Join.py
Joey Callanan
minor changes
a3863ea
from rdkit import Chem
import re
import random
# supress rdkit warnings
import warnings
warnings.filterwarnings("ignore")
ATTACHMENT_POINT_TOKEN = "*"
ATTACHMENT_POINT_NUM_REGEXP = r"\[{}:(\d+)\]".format(re.escape(ATTACHMENT_POINT_TOKEN))
ATTACHMENT_POINT_REGEXP = r"(?:{0}|\[{0}[^\]]*\])".format(re.escape(ATTACHMENT_POINT_TOKEN))
ATTACHMENT_POINT_NO_BRACKETS_REGEXP = r"(?<!\[){}".format(re.escape(ATTACHMENT_POINT_TOKEN))
# "[*][C@H]1C[C@@H](N)C1
def add_attachment_point_numbers(mol_or_smi, canonicalize=True):
smi = mol_or_smi
if canonicalize:
smi = Chem.MolToSmiles(Chem.MolFromSmiles(mol_or_smi), isomericSmiles=True, canonical=True)
# only add numbers ordered by the SMILES ordering
num = -1
def _ap_callback(_):
nonlocal num
num += 1
return "[{}:{}]".format(ATTACHMENT_POINT_TOKEN, num)
return re.sub(ATTACHMENT_POINT_REGEXP, _ap_callback, smi)
def remove_attachment_point_numbers(smi):
return re.sub(ATTACHMENT_POINT_NUM_REGEXP, "[{}]".format(ATTACHMENT_POINT_TOKEN), smi)
def join(scaffold_smi, decoration_smi, keep_label_on_atoms=False,invert_chiralty=False):
scaffold = Chem.MolFromSmiles(scaffold_smi)
decoration = Chem.MolFromSmiles(decoration_smi)
if scaffold and decoration:
# obtain id in the decoration
try:
attachment_points = [atom.GetProp("molAtomMapNumber") for atom in decoration.GetAtoms()
if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN]
if len(attachment_points) != 1:
return None # more than one attachment point...
attachment_point = attachment_points[0]
except KeyError:
return None
combined_scaffold = Chem.RWMol(Chem.CombineMols(decoration, scaffold))
attachments = [atom for atom in combined_scaffold.GetAtoms()
if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN and
atom.HasProp("molAtomMapNumber") and atom.GetProp("molAtomMapNumber") == attachment_point]
if len(attachments) != 2:
return None # something weird
neighbors = []
for atom in attachments:
if atom.GetDegree() != 1:
return None # the attachment is wrongly generated
neighbors.append(atom.GetNeighbors()[0])
bonds = [atom.GetBonds()[0] for atom in attachments]
bond_type = Chem.BondType.SINGLE
if any(bond for bond in bonds if bond.GetBondType() == Chem.BondType.DOUBLE):
bond_type = Chem.BondType.DOUBLE
combined_scaffold.AddBond(neighbors[0].GetIdx(), neighbors[1].GetIdx(), bond_type)
combined_scaffold.RemoveAtom(attachments[0].GetIdx())
combined_scaffold.RemoveAtom(attachments[1].GetIdx())
if invert_chiralty:
neighbors[1].InvertChirality()
if keep_label_on_atoms:
for neigh in neighbors:
_add_attachment_point_num(neigh, attachment_point)
scaffold = combined_scaffold.GetMol()
try:
Chem.SanitizeMol(scaffold)
except ValueError: # sanitization error
return None
else:
return None
return scaffold
def join_scaf_deco(scaffold='O=C1NN=C([*])c2c1cccc2',decorator='[*]N1CCN(C)CC1',Parameter_InvertChiralty=False):
try:
# smiles_scaffold = remove_attachment_point_numbers(scaffold)
# smiles_decorator = remove_attachment_point_numbers(decorator)
smiles_scaffold = add_attachment_point_numbers(scaffold)
smiles_decorator = add_attachment_point_numbers(decorator)
smiles_joined = Chem.MolToSmiles(join(smiles_scaffold,smiles_decorator,invert_chiralty=Parameter_InvertChiralty), isomericSmiles=True, canonical=True)
smiles_joined = remove_attachment_point_numbers(smiles_joined)
return smiles_joined
except:
return ''
# print results to the terminal for testing
if __name__ == "__main__":
scaffold = 'O=C1NN=C([*])c2c1cccc2'
decorator = '[*]N1CCN(C)CC1'
print("Scaffold: ", scaffold)
print("Decorator:", decorator)
joined = join_scaf_deco(scaffold,decorator,Parameter_InvertChiralty=True)
print("Joined: ", joined)