File size: 2,591 Bytes
f0a96d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from typing import Optional
import numpy as np
import py3Dmol
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import torch


class MorganFingerprint:
    def __init__(self, shape: Optional[int] = 2048, radius: Optional[int] = 2):
        self.shape = shape
        self.radius = radius

    @staticmethod
    def canonicalize(smiles):
        mol = Chem.MolFromSmiles(smiles)
        if mol is not None:
            return Chem.MolToSmiles(mol, isomericSmiles=True)
        else:
            return smiles

    def smiles_to_morgan(self, smile: str) -> torch.Tensor:
        try:
            smile = self.canonicalize(smile)
            mol = Chem.MolFromSmiles(smile)
            features_vec = AllChem.GetMorganFingerprintAsBitVect(
                mol, self.radius, nBits=self.shape
            )
            features = np.zeros((1,))
            DataStructs.ConvertToNumpyArray(features_vec, features)
        except Exception as e:
            features = np.zeros((self.shape,))
        return torch.tensor(features, dtype=torch.float32)


def get_morgan(input_sequences):
    m = MorganFingerprint()
    morgans = []
    for s in input_sequences:
        r = m.smiles_to_morgan(s)
        indices_of_ones = torch.nonzero(r == 1.0, as_tuple=False)
        indices_of_ones = indices_of_ones.squeeze(-1)
        indices_of_ones = indices_of_ones.tolist()
        s = ""
        for i in indices_of_ones:
            s += "[" + str(i) + "]"
        morgans.append(s)
    return morgans


def morgan_fingerprint_to_text(morgan_fn):
        indices_of_ones = torch.nonzero(morgan_fn == 1.0, as_tuple=False)
        indices_of_ones = indices_of_ones.squeeze(-1)
        indices_of_ones = indices_of_ones.tolist()
        s = ""
        for i in indices_of_ones:
            s += "[" + str(i) + "]"
        return s


def smiles_to_3d(smiles_list, width=400, height=300):
    # Visualize the 3D structure using py3Dmol
    view = py3Dmol.view(width=width, height=height)
    for smiles in smiles_list:
        # Generate the RDKit molecule object
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            raise ValueError("Invalid SMILES string")

        # Add hydrogens to the molecule
        mol = Chem.AddHs(mol)

        # Generate 3D coordinates
        AllChem.EmbedMolecule(mol, randomSeed=42)
        AllChem.UFFOptimizeMolecule(mol)

        # Generate the 3D structure in the form of a pdb string
        pdb = Chem.MolToPDBBlock(mol)
        view.addModel(pdb, 'pdb')
        view.setStyle({'stick': {}})
        view.zoomTo()
    return view