File size: 4,228 Bytes
a3863ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from rdkit import Chem
import re
import random
# supress rdkit warnings
import warnings
warnings.filterwarnings("ignore")

ATTACHMENT_POINT_TOKEN = "*"
ATTACHMENT_POINT_NUM_REGEXP = r"\[{}:(\d+)\]".format(re.escape(ATTACHMENT_POINT_TOKEN))
ATTACHMENT_POINT_REGEXP = r"(?:{0}|\[{0}[^\]]*\])".format(re.escape(ATTACHMENT_POINT_TOKEN))
ATTACHMENT_POINT_NO_BRACKETS_REGEXP = r"(?<!\[){}".format(re.escape(ATTACHMENT_POINT_TOKEN))
# "[*][C@H]1C[C@@H](N)C1

def add_attachment_point_numbers(mol_or_smi, canonicalize=True):
    smi = mol_or_smi
    if canonicalize:
        smi = Chem.MolToSmiles(Chem.MolFromSmiles(mol_or_smi), isomericSmiles=True, canonical=True)
    # only add numbers ordered by the SMILES ordering
    num = -1
    def _ap_callback(_):
        nonlocal num
        num += 1
        return "[{}:{}]".format(ATTACHMENT_POINT_TOKEN, num)
    return re.sub(ATTACHMENT_POINT_REGEXP, _ap_callback, smi)



def remove_attachment_point_numbers(smi):
    return re.sub(ATTACHMENT_POINT_NUM_REGEXP, "[{}]".format(ATTACHMENT_POINT_TOKEN), smi)




def join(scaffold_smi, decoration_smi, keep_label_on_atoms=False,invert_chiralty=False):
    scaffold = Chem.MolFromSmiles(scaffold_smi)
    decoration = Chem.MolFromSmiles(decoration_smi)

    if scaffold and decoration:
        # obtain id in the decoration
        try:
            attachment_points = [atom.GetProp("molAtomMapNumber") for atom in decoration.GetAtoms()
                                 if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN]
            if len(attachment_points) != 1:
                return None  # more than one attachment point...
            attachment_point = attachment_points[0]
        except KeyError:
            return None
        combined_scaffold = Chem.RWMol(Chem.CombineMols(decoration, scaffold))
        attachments = [atom for atom in combined_scaffold.GetAtoms()
                       if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN and
                       atom.HasProp("molAtomMapNumber") and atom.GetProp("molAtomMapNumber") == attachment_point]
        if len(attachments) != 2:
            return None  # something weird
        neighbors = []
        for atom in attachments:
            if atom.GetDegree() != 1:
                return None  # the attachment is wrongly generated
            neighbors.append(atom.GetNeighbors()[0])
        bonds = [atom.GetBonds()[0] for atom in attachments]
        bond_type = Chem.BondType.SINGLE
        if any(bond for bond in bonds if bond.GetBondType() == Chem.BondType.DOUBLE):
            bond_type = Chem.BondType.DOUBLE
        combined_scaffold.AddBond(neighbors[0].GetIdx(), neighbors[1].GetIdx(), bond_type)
        combined_scaffold.RemoveAtom(attachments[0].GetIdx())
        combined_scaffold.RemoveAtom(attachments[1].GetIdx())
        if invert_chiralty:
            neighbors[1].InvertChirality()
        if keep_label_on_atoms:
            for neigh in neighbors:
                _add_attachment_point_num(neigh, attachment_point)

        scaffold = combined_scaffold.GetMol()
        try:
            Chem.SanitizeMol(scaffold)
        except ValueError:  # sanitization error
            return None
    else:
        return None
    return scaffold

def join_scaf_deco(scaffold='O=C1NN=C([*])c2c1cccc2',decorator='[*]N1CCN(C)CC1',Parameter_InvertChiralty=False):
    try:
        # smiles_scaffold  = remove_attachment_point_numbers(scaffold)
        # smiles_decorator = remove_attachment_point_numbers(decorator)
        smiles_scaffold  = add_attachment_point_numbers(scaffold)
        smiles_decorator = add_attachment_point_numbers(decorator)
        smiles_joined = Chem.MolToSmiles(join(smiles_scaffold,smiles_decorator,invert_chiralty=Parameter_InvertChiralty), isomericSmiles=True, canonical=True)
        smiles_joined = remove_attachment_point_numbers(smiles_joined)
        return smiles_joined
    except:
        return ''

# print results to the terminal for testing
if __name__ == "__main__":
    scaffold = 'O=C1NN=C([*])c2c1cccc2'
    decorator = '[*]N1CCN(C)CC1'
    print("Scaffold: ", scaffold)
    print("Decorator:", decorator)
    joined = join_scaf_deco(scaffold,decorator,Parameter_InvertChiralty=True)
    print("Joined:   ", joined)