File size: 2,591 Bytes
f0a96d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
from typing import Optional
import numpy as np
import py3Dmol
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import torch
class MorganFingerprint:
def __init__(self, shape: Optional[int] = 2048, radius: Optional[int] = 2):
self.shape = shape
self.radius = radius
@staticmethod
def canonicalize(smiles):
mol = Chem.MolFromSmiles(smiles)
if mol is not None:
return Chem.MolToSmiles(mol, isomericSmiles=True)
else:
return smiles
def smiles_to_morgan(self, smile: str) -> torch.Tensor:
try:
smile = self.canonicalize(smile)
mol = Chem.MolFromSmiles(smile)
features_vec = AllChem.GetMorganFingerprintAsBitVect(
mol, self.radius, nBits=self.shape
)
features = np.zeros((1,))
DataStructs.ConvertToNumpyArray(features_vec, features)
except Exception as e:
features = np.zeros((self.shape,))
return torch.tensor(features, dtype=torch.float32)
def get_morgan(input_sequences):
m = MorganFingerprint()
morgans = []
for s in input_sequences:
r = m.smiles_to_morgan(s)
indices_of_ones = torch.nonzero(r == 1.0, as_tuple=False)
indices_of_ones = indices_of_ones.squeeze(-1)
indices_of_ones = indices_of_ones.tolist()
s = ""
for i in indices_of_ones:
s += "[" + str(i) + "]"
morgans.append(s)
return morgans
def morgan_fingerprint_to_text(morgan_fn):
indices_of_ones = torch.nonzero(morgan_fn == 1.0, as_tuple=False)
indices_of_ones = indices_of_ones.squeeze(-1)
indices_of_ones = indices_of_ones.tolist()
s = ""
for i in indices_of_ones:
s += "[" + str(i) + "]"
return s
def smiles_to_3d(smiles_list, width=400, height=300):
# Visualize the 3D structure using py3Dmol
view = py3Dmol.view(width=width, height=height)
for smiles in smiles_list:
# Generate the RDKit molecule object
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError("Invalid SMILES string")
# Add hydrogens to the molecule
mol = Chem.AddHs(mol)
# Generate 3D coordinates
AllChem.EmbedMolecule(mol, randomSeed=42)
AllChem.UFFOptimizeMolecule(mol)
# Generate the 3D structure in the form of a pdb string
pdb = Chem.MolToPDBBlock(mol)
view.addModel(pdb, 'pdb')
view.setStyle({'stick': {}})
view.zoomTo()
return view |