Spaces:
Running
Running
| """ Adjusts amide and ester bonds in PROTAC substructures. """ | |
| from typing import Tuple, Dict | |
| from rdkit import Chem | |
| from protac_splitter.chemoinformatics import ( | |
| dummy2query, | |
| canonize, | |
| ) | |
| from protac_splitter.display_utils import display_mol | |
| from protac_splitter.evaluation import check_reassembly | |
| def adjust_amide_bond( | |
| substruct: Chem.Mol, | |
| linker: Chem.Mol, | |
| substruct_attachment_id: int, | |
| verbose: int = 0, | |
| ) -> Tuple[Chem.Mol, Chem.Mol]: | |
| """ | |
| Adjust the amide bond between the substruct and linker substructure. | |
| Handles the case when neighboring atoms of the amide bond are dummy atoms, which represent attachment points. | |
| The linker will be modified with the required additional atoms. | |
| Args: | |
| substruct: The substructure of the substruct (protein of interest) that contains the amide bond. | |
| linker: The linker molecule that connects substruct to the E3 ligase. | |
| substruct_attachment_id: The attachment point ID in the substruct substructure. E.g., 1 for the POI, as in "[*:1]". | |
| Returns: | |
| Tuple[Chem.Mol, Chem.Mol]: The adjusted substruct and linker molecules, in that order. | |
| """ | |
| # Pseudo-code of the algorithm: | |
| """ | |
| ```python | |
| # Check if the amide bond (N-C=O) is in the substructure | |
| if "N-C(=O)" in substruct: | |
| if neighbor("N-C(=O)") == "[*:substruct]": | |
| # If the neighboring atom of the amide bond is a dummy atom, i.e., attachment point | |
| mark_protac_as_wrong("[PROTAC]") | |
| # Identify the bond to split, i.e., the nitrogen-carbon bond, and split | |
| "[*:substruct]-[<optional neighboring atom>]-N-[*:tmp]", "[*:tmp]-C(=O)-[rest of the PROTAC]" = split_PROTAC_at("N-C") | |
| "[Linker]-N-[*:tmp]" = join("[Linker]-[*:substruct]", "[*:substruct]-N-[*:tmp]") | |
| rename_attachment_point("[*:tmp]-C(=O)-[rest of the PROTAC]") | |
| rename_attachment_point("[Linker]-N-[*:tmp]") | |
| elif neighbor(neighbor("N-C(=O)")) == "[*:substruct]": | |
| # If the second neighbor of athe amide bond is a dummy atom, i.e., attachment point | |
| mark_protac_as_wrong("[PROTAC]") | |
| # Do as above | |
| # Identify the bond to split, i.e., the nitrogen-carbon bond, and split | |
| "[*:substruct]-N-[*:tmp]", "[*:tmp]-C(=O)-[rest of the PROTAC]" = split_PROTAC_at("N-C") | |
| "[Linker]-N-[*:tmp]" = join("[Linker]-[*:substruct]", "[*:substruct]-N-[*:tmp]") | |
| rename_attachment_point("[*:tmp]-C(=O)-[rest of the PROTAC]") | |
| rename_attachment_point("[Linker]-N-[*:tmp]") | |
| ``` | |
| """ | |
| # Convert dummy atoms in substruct to query atoms for substructure search | |
| query_substruct = dummy2query(substruct) | |
| # Identify amide bond (N-C=O) in substruct substructure | |
| amide_pattern = Chem.MolFromSmarts("[NX3][CX3](=[OX1])") | |
| amide_matches = query_substruct.GetSubstructMatches(amide_pattern, useChirality=True) | |
| if not amide_matches: | |
| return substruct, linker # No amide bond found, return the original substruct | |
| side_atom = None | |
| nitrogen_idx_found, carbonyl_idx_found = None, None | |
| for match in amide_matches: | |
| nitrogen_idx, carbonyl_idx = match[0], match[1] | |
| nitrogen_atom = query_substruct.GetAtomWithIdx(nitrogen_idx) | |
| carbonyl_atom = query_substruct.GetAtomWithIdx(carbonyl_idx) | |
| for amide_atom in [nitrogen_atom, carbonyl_atom]: | |
| # Check neighboring atoms for attachment points | |
| # NOTE: The dummy atom representing an attachment point have atomic number 0 | |
| for neighbor in amide_atom.GetNeighbors(): | |
| if neighbor.GetAtomicNum() == 0: | |
| nitrogen_idx_found = nitrogen_idx | |
| carbonyl_idx_found = carbonyl_idx | |
| side_atom = "N" if amide_atom == nitrogen_atom else "C" | |
| break | |
| # If previous search failed, check the neighbors of the neighboring | |
| # atoms (second-order neighbors) | |
| if nitrogen_idx_found is None or carbonyl_idx_found is None: | |
| for neighbor in amide_atom.GetNeighbors(): | |
| for second_neighbor in neighbor.GetNeighbors(): | |
| if second_neighbor.GetIdx() == carbonyl_idx or second_neighbor.GetIdx() == nitrogen_idx: | |
| continue # Skip the opposite atom from the amide bond | |
| if second_neighbor.GetAtomicNum() == 0: | |
| nitrogen_idx_found = nitrogen_idx | |
| carbonyl_idx_found = carbonyl_idx | |
| side_atom = "N" if amide_atom == nitrogen_atom else "C" | |
| break | |
| else: | |
| break | |
| if nitrogen_idx_found is None or carbonyl_idx_found is None or side_atom is None: | |
| return substruct, linker | |
| # Split the amide bond and adjust | |
| dummy_label = 3 | |
| dummy_labels = [(dummy_label, dummy_label)] # The E3 and substruct will have 1 and 2, so we need a third one | |
| amid_bond_idx = query_substruct.GetBondBetweenAtoms(nitrogen_idx_found, carbonyl_idx_found).GetIdx() | |
| fragments = Chem.FragmentOnBonds(query_substruct, [amid_bond_idx], addDummies=True, dummyLabels=dummy_labels) | |
| # Get the fragments resulting from bond breaking | |
| try: | |
| mol_frags = Chem.GetMolFrags(fragments, asMols=True, sanitizeFrags=True) | |
| except Exception as e: | |
| print(e) | |
| return substruct, linker | |
| # Identify the "[*:substruct][<optional neighboring atom>]N[3*]" fragment, the other one will be the "truncated" substruct | |
| amide_fragment_pattern = Chem.MolFromSmarts(f"[*:{substruct_attachment_id}][{side_atom}][{dummy_label}*]") | |
| amide_fragment = None | |
| substruct_fixed = None | |
| if verbose: | |
| print(f'Attachment point: *:{substruct_attachment_id}') | |
| print('Substruct:') | |
| display_mol(substruct) | |
| print('Linker:') | |
| display_mol(linker) | |
| for frag in mol_frags: | |
| if frag.HasSubstructMatch(dummy2query(amide_fragment_pattern)): | |
| amide_fragment = frag | |
| if verbose: | |
| print('Amide fragment:') | |
| display_mol(frag) | |
| else: | |
| if verbose: | |
| print('Substruct fragment:') | |
| display_mol(frag) | |
| substruct_fixed = frag | |
| if amide_fragment is None or substruct_fixed is None: | |
| return substruct, linker | |
| # In order for the function to be used "on linkers", we need to make sure | |
| # that the amide fragment contains the attachment point of the substruct. | |
| # If not, there's nothing to do. | |
| if f'[*:{substruct_attachment_id}]' not in Chem.MolToSmiles(amide_fragment, canonical=True): | |
| return substruct, linker | |
| # Rename the "[3*]" attachment point on the amide fragment to "[*:3]" | |
| amide_fragment_smiles = Chem.MolToSmiles(amide_fragment, canonical=True) | |
| amide_fragment_smiles = amide_fragment_smiles.replace(f'[{dummy_label}*]', f'[*:{dummy_label}]') | |
| amide_fragment_smiles = canonize(amide_fragment_smiles) | |
| amide_fragment = Chem.MolFromSmiles(amide_fragment_smiles) | |
| # Use molzip to join the linker and the fragment at the original attachment point | |
| linker_fixed = Chem.molzip(linker, amide_fragment) | |
| # Rename the "[*:3]" attachment point back to the original attachment point on the linker | |
| linker_fixed_smiles = Chem.MolToSmiles(linker_fixed, canonical=True) | |
| linker_fixed_smiles = linker_fixed_smiles.replace(f'[*:{dummy_label}]', f'[*:{substruct_attachment_id}]') | |
| linker_fixed_smiles = canonize(linker_fixed_smiles) | |
| linker_fixed = Chem.MolFromSmiles(linker_fixed_smiles) | |
| # Rename the "[3*]" attachment point back to the original attachment point on the substruct | |
| substruct_fixed_smiles = Chem.MolToSmiles(substruct_fixed, canonical=True) | |
| substruct_fixed_smiles = substruct_fixed_smiles.replace(f'[{dummy_label}*]', f'[*:{substruct_attachment_id}]') | |
| substruct_fixed_smiles = canonize(substruct_fixed_smiles) | |
| substruct_fixed = Chem.MolFromSmiles(substruct_fixed_smiles) | |
| return substruct_fixed, linker_fixed | |
| def adjust_amide_bonds_in_substructs( | |
| substructs: Dict[str, str], | |
| protac_smiles: str, | |
| poi_attachment_id: int = 1, | |
| e3_attachment_id: int = 2, | |
| ) -> Dict[str, str]: | |
| """ Adjusts the amide bonds in the substructures of a PROTAC. Just a wrapper function to apply it to multiple substructures. | |
| Args: | |
| substructs: The substructures of the PROTAC. A dictionary of SMILES with keys 'poi', 'linker', and 'e3'. | |
| protac_smiles: The SMILES of the PROTAC for checking reassembly. | |
| Returns: | |
| The updated substructures dictionary. | |
| """ | |
| poi_mol = Chem.MolFromSmiles(substructs['poi']) | |
| e3_mol = Chem.MolFromSmiles(substructs['e3']) | |
| linker_mol = Chem.MolFromSmiles(substructs['linker']) | |
| # Fix the amide group on the POI ligand | |
| poi_mol, linker_mol = adjust_amide_bond(poi_mol, linker_mol, poi_attachment_id) | |
| poi_smiles = Chem.MolToSmiles(poi_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| e3_smiles = substructs['e3'] | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| # Fix the amide group on the E3 binder | |
| e3_mol, linker_mol = adjust_amide_bond(e3_mol, linker_mol, e3_attachment_id) | |
| e3_smiles = Chem.MolToSmiles(e3_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| # Fix the amide group on the linker, E3 side | |
| linker_mol, e3_mol = adjust_amide_bond(linker_mol, e3_mol, e3_attachment_id) | |
| e3_smiles = Chem.MolToSmiles(e3_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| # Fix the amide group on the linker, POI side | |
| linker_mol, poi_mol = adjust_amide_bond(linker_mol, poi_mol, poi_attachment_id) | |
| poi_smiles = Chem.MolToSmiles(poi_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| substructs['poi'] = poi_smiles | |
| substructs['e3'] = e3_smiles | |
| substructs['linker'] = linker_smiles | |
| return substructs | |
| def adjust_ester_bond( | |
| substruct: Chem.Mol, | |
| linker: Chem.Mol, | |
| substruct_attachment_id: int, | |
| verbose: int = 0, | |
| ) -> Tuple[Chem.Mol, Chem.Mol]: | |
| """ | |
| Adjust the amide bond between the substruct and linker substructure. | |
| Handles the case when neighboring atoms of the amide bond are dummy atoms, which represent attachment points. | |
| Args: | |
| substruct: The substructure of the substruct (protein of interest) that contains the amide bond. | |
| linker: The linker molecule that connects substruct to the E3 ligase. | |
| substruct_attachment_id: The attachment point ID in the substruct substructure. E.g., 1 for the POI, as in "[*:1]". | |
| Returns: | |
| Tuple[Chem.Mol, Chem.Mol]: The adjusted substruct and linker molecules, in that order. | |
| """ | |
| # Convert dummy atoms in substruct to query atoms for substructure search | |
| query_substruct = dummy2query(substruct) | |
| # Identify ester group (COOR) in substruct substructure | |
| ester_pattern = Chem.MolFromSmarts("[OX2][CX3](=[OX1])") | |
| ester_matches = query_substruct.GetSubstructMatches(ester_pattern) | |
| if not ester_matches: | |
| return substruct, linker # No amide bond found, return the original substruct | |
| side_atom = None | |
| oxygen_idx_found, carbonyl_idx_found = None, None | |
| for match in ester_matches: | |
| oxygen_idx, carbonyl_idx = match[0], match[1] | |
| oxygen_atom = query_substruct.GetAtomWithIdx(oxygen_idx) | |
| carbonyl_atom = query_substruct.GetAtomWithIdx(carbonyl_idx) | |
| for ester_atom in [oxygen_atom, carbonyl_atom]: | |
| # Check neighboring atoms for attachment points | |
| # NOTE: The dummy atom representing an attachment point have atomic number 0 | |
| for neighbor in ester_atom.GetNeighbors(): | |
| if neighbor.GetAtomicNum() == 0: | |
| oxygen_idx_found = oxygen_idx | |
| carbonyl_idx_found = carbonyl_idx | |
| side_atom = "O" if ester_atom == oxygen_atom else "C" | |
| break | |
| # If previous search failed, check the neighbors of the neighboring | |
| # atoms (second-order neighbors) | |
| if oxygen_idx_found is None or carbonyl_idx_found is None: | |
| for neighbor in ester_atom.GetNeighbors(): | |
| for second_neighbor in neighbor.GetNeighbors(): | |
| if second_neighbor.GetIdx() == carbonyl_idx or second_neighbor.GetIdx() == oxygen_idx: | |
| continue # Skip the opposite atom from the amide bond | |
| if second_neighbor.GetAtomicNum() == 0: | |
| oxygen_idx_found = oxygen_idx | |
| carbonyl_idx_found = carbonyl_idx | |
| side_atom = "O" if ester_atom == oxygen_atom else "C" | |
| break | |
| else: | |
| break | |
| if oxygen_idx_found is None or carbonyl_idx_found is None or side_atom is None: | |
| return substruct, linker | |
| # Split the amide bond and adjust | |
| dummy_label = 3 | |
| dummy_labels = [(dummy_label, dummy_label)] # The E3 and substruct will have 1 and 2, so we need a third one | |
| amid_bond_idx = query_substruct.GetBondBetweenAtoms(oxygen_idx_found, carbonyl_idx_found).GetIdx() | |
| fragments = Chem.FragmentOnBonds(query_substruct, [amid_bond_idx], addDummies=True, dummyLabels=dummy_labels) | |
| # Get the fragments resulting from bond breaking | |
| try: | |
| mol_frags = Chem.GetMolFrags(fragments, asMols=True, sanitizeFrags=True) | |
| except Exception as e: | |
| if verbose: | |
| print(e) | |
| return substruct, linker | |
| # Identify the "[*:substruct][<optional neighboring atom>]N[3*]" fragment, the other one will be the "truncated" substruct | |
| ester_fragment_pattern = Chem.MolFromSmarts(f"[*:{substruct_attachment_id}][{side_atom}][{dummy_label}*]") | |
| ester_fragment = None | |
| substruct_fixed = None | |
| for frag in mol_frags: | |
| if frag.HasSubstructMatch(dummy2query(ester_fragment_pattern)): | |
| ester_fragment = frag | |
| else: | |
| substruct_fixed = frag | |
| if ester_fragment is None or substruct_fixed is None: | |
| return substruct, linker | |
| # In order for the function to be used "on linkers", we need to make sure | |
| # that the ester fragment contains the attachment point of the substruct. | |
| # If not, there's nothing to do. | |
| if f'[*:{substruct_attachment_id}]' not in Chem.MolToSmiles(ester_fragment, canonical=True): | |
| return substruct, linker | |
| # Rename the "[3*]" attachment point on the amide fragment to "[*:3]" | |
| ester_fragment_smiles = Chem.MolToSmiles(ester_fragment, canonical=True) | |
| ester_fragment_smiles = ester_fragment_smiles.replace(f'[{dummy_label}*]', f'[*:{dummy_label}]') | |
| ester_fragment = Chem.MolFromSmiles(ester_fragment_smiles) | |
| # Use molzip to join the linker and the fragment at the original attachment point | |
| linker_fixed = Chem.molzip(linker, ester_fragment) | |
| # Rename the "[*:3]" attachment point back to the original attachment point on the linker | |
| linker_fixed_smiles = Chem.MolToSmiles(linker_fixed, canonical=True) | |
| linker_fixed_smiles = linker_fixed_smiles.replace(f'[*:{dummy_label}]', f'[*:{substruct_attachment_id}]') | |
| linker_fixed = Chem.MolFromSmiles(linker_fixed_smiles) | |
| # Rename the "[3*]" attachment point back to the original attachment point on the substruct | |
| substruct_fixed_smiles = Chem.MolToSmiles(substruct_fixed, canonical=True) | |
| substruct_fixed_smiles = substruct_fixed_smiles.replace(f'[{dummy_label}*]', f'[*:{substruct_attachment_id}]') | |
| substruct_fixed = Chem.MolFromSmiles(substruct_fixed_smiles) | |
| return substruct_fixed, linker_fixed | |
| def adjust_ester_bonds_in_substructs( | |
| substructs: Dict[str, str], | |
| protac_smiles: str, | |
| poi_attachment_id: int = 1, | |
| e3_attachment_id: int = 2, | |
| ) -> Dict[str, str]: | |
| """ Adjusts the ester bonds in the substructures of a PROTAC. Just a wrapper function to apply it to multiple substructures. | |
| Args: | |
| substructs: The substructures of the PROTAC. A dictionary of SMILES with keys 'poi', 'linker', and 'e3'. | |
| protac_smiles: The SMILES of the PROTAC for checking reassembly. | |
| Returns: | |
| The updated substructures dictionary. | |
| """ | |
| poi_mol = Chem.MolFromSmiles(substructs['poi']) | |
| e3_mol = Chem.MolFromSmiles(substructs['e3']) | |
| linker_mol = Chem.MolFromSmiles(substructs['linker']) | |
| # Fix the amide group on the POI ligand | |
| poi_mol, linker_mol = adjust_ester_bond(poi_mol, linker_mol, poi_attachment_id) | |
| poi_smiles = Chem.MolToSmiles(poi_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| e3_smiles = substructs['e3'] | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| # Fix the amide group on the E3 binder | |
| e3_mol, linker_mol = adjust_ester_bond(e3_mol, linker_mol, e3_attachment_id) | |
| e3_smiles = Chem.MolToSmiles(e3_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| # Fix the amide group on the linker, E3 side | |
| linker_mol, e3_mol = adjust_ester_bond(linker_mol, e3_mol, e3_attachment_id) | |
| e3_smiles = Chem.MolToSmiles(e3_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| # Fix the amide group on the linker, POI side | |
| linker_mol, poi_mol = adjust_ester_bond(linker_mol, poi_mol, poi_attachment_id) | |
| poi_smiles = Chem.MolToSmiles(poi_mol, canonical=True) | |
| linker_smiles = Chem.MolToSmiles(linker_mol, canonical=True) | |
| if not check_reassembly(protac_smiles, '.'.join([poi_smiles, linker_smiles, e3_smiles])): | |
| return substructs | |
| substructs['poi'] = poi_smiles | |
| substructs['e3'] = e3_smiles | |
| substructs['linker'] = linker_smiles | |
| return substructs |