Spaces:
Running
Running
| import logging | |
| import random | |
| from typing import List, Tuple, Callable, Any, Union, Dict, Optional, Literal | |
| from functools import lru_cache | |
| from rdkit import Chem | |
| from rdkit.Chem import AllChem | |
| from rdkit.Chem import rdchem | |
| from rdkit import RDLogger | |
| from rdkit.Chem import CanonSmiles | |
| from .chemoinformatics import ( | |
| canonize, | |
| smiles2mol, | |
| ) | |
| RDLogger.DisableLog("rdApp.*") | |
| def get_mol(smiles: str) -> rdchem.Mol: | |
| return Chem.MolFromSmiles(smiles) | |
| def find_atom_idx_of_map_atoms( | |
| mol: rdchem.Mol, | |
| find_poi: True, | |
| find_e3: True, | |
| poi_attachment_id: int = 1, | |
| e3_attachment_id: int = 2, | |
| ) -> Union[int, Tuple[int, int]]: | |
| """ Find the indices of the attachment points in the given molecule. | |
| Args: | |
| mol (rdkit.Chem.rdchem.Mol): The molecule. | |
| find_poi (bool): Whether to find the POI attachment point. | |
| find_e3 (bool): Whether to find the E3 attachment point. | |
| poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]". | |
| e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]". | |
| Returns: | |
| int | Tuple[int, int]: The index of the attachment point for the POI ligand if find_poi is True, the index of the attachment point for the E3 binder if find_e3 is True, or a tuple containing POI and E3 indices (in this order) if both find_poi and find_e3 are True. | |
| """ | |
| if find_poi and find_e3: | |
| poi_idx = None | |
| e3_idx = None | |
| for atom in mol.GetAtoms(): | |
| if atom.GetAtomMapNum() == poi_attachment_id: | |
| poi_idx = atom.GetIdx() | |
| elif atom.GetAtomMapNum() == e3_attachment_id: | |
| e3_idx = atom.GetIdx() | |
| if poi_idx is not None and e3_idx is not None: | |
| break | |
| return poi_idx, e3_idx | |
| elif find_poi: | |
| for atom in mol.GetAtoms(): | |
| if atom.GetAtomMapNum() == poi_attachment_id: | |
| return atom.GetIdx() | |
| elif find_e3: | |
| for atom in mol.GetAtoms(): | |
| if atom.GetAtomMapNum() == e3_attachment_id: | |
| return atom.GetIdx() | |
| def reassemble_protac( | |
| ligands_smiles: Optional[str] = None, | |
| poi_smiles: Optional[str] = None, | |
| linker_smiles: Optional[str] = None, | |
| e3_smiles: Optional[str] = None, | |
| e3_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single', | |
| poi_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single', | |
| poi_attachment_id: int = 1, | |
| e3_attachment_id: int = 2, | |
| rand_generator = None, | |
| ) -> Tuple[str, Chem.rdchem.Mol]: | |
| """ Reassemble a PROTAC molecule from its substructures. The SMILES must contain attachment points. | |
| In case the bond type cannot be formed an error will be raised. | |
| Example of usage: | |
| ```python | |
| e3_smiles = '[*:2]NC(C(=O)N1CC(O)CC1C(=O)NCc1ccc(-c2scnc2C)cc1)C(C)(C)C' | |
| linker_smiles = '[*:2]C(=O)CCCCCCCCCC[*:1]' | |
| poi_smiles = '[*:1]CN1CCN(c2ccc(Nc3ncc4c(C)cc(=O)n(-c5cccc(NC(=O)C=C)c5)c4n3)c(OC)c2)CC1' | |
| merged_smiles, _ = reassemble_protac(poi_smiles, linker_smiles, e3_smiles, 'single', 'single') | |
| print(merged_smiles) | |
| ``` | |
| Args: | |
| poi_smiles (str): The SMILES notation for the POI ligand. | |
| linker_smiles (str): The SMILES notation for the linker. | |
| e3_smiles (str): The SMILES notation for the E3 binder. | |
| e3_bond_type (str): The type of bond to be added between the E3 binder and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'. | |
| poi_bond_type (str): The type of bond to be added between the POI ligand and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'. | |
| poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]". | |
| e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]". | |
| rand_generator: A random number generator for 'rand_uniform' bond types. Defaults to None, i.e., standard library random. | |
| Returns: | |
| Tuple[str, Chem.rdchem.Mol]: The SMILES notation and RDKit molecule object for the reassembled PROTAC molecule. | |
| """ | |
| if ligands_smiles is None: | |
| if None in [poi_smiles, linker_smiles, e3_smiles]: | |
| raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles") | |
| ligands_smiles = f'{e3_smiles}.{linker_smiles}.{poi_smiles}' | |
| if None in [poi_smiles, linker_smiles, e3_smiles]: | |
| if ligands_smiles is None: | |
| raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles") | |
| ligands_mol = canonize(smiles2mol(ligands_smiles)) | |
| if ligands_mol is None: | |
| return None, None | |
| try: | |
| protac_mol = Chem.molzip(ligands_mol) | |
| except ValueError as e: | |
| logging.error(f"Failed to reassemble PROTAC: {e}") | |
| return None, None |