Spaces:
Sleeping
Sleeping
| from typing import Any, Optional, List | |
| import numpy as np | |
| from rdkit import Chem, DataStructs | |
| from rdkit.Chem import rdFingerprintGenerator | |
| def get_fp( | |
| smiles: str, | |
| fp_generator: Optional[Any] = None, | |
| return_np: bool = True, | |
| ) -> Optional[np.ndarray]: | |
| """ | |
| Get the Morgan fingerprint of a molecule from its SMILES representation. | |
| Parameters: | |
| smiles (str): The SMILES string of the molecule. | |
| fp_generator (Any, optional): The fingerprint generator to use. If None, a default generator is used. | |
| return_np (bool): Whether to return the fingerprint as a NumPy array. Defaults to True. | |
| Returns: | |
| Optional[np.ndarray]: The Morgan fingerprint of the molecule as a NumPy array, or None if the SMILES is invalid. | |
| """ | |
| mol = Chem.MolFromSmiles(smiles) | |
| if mol is None: | |
| return None | |
| if fp_generator is None: | |
| fp_generator = rdFingerprintGenerator.GetMorganGenerator( | |
| radius=16, | |
| fpSize=1024, | |
| useBondTypes=True, | |
| includeChirality=True, | |
| ) | |
| if return_np: | |
| return fp_generator.GetFingerprintAsNumPy(mol) | |
| else: | |
| return fp_generator.GetFingerprint(mol) | |
| def average_tanimoto_distance( | |
| smiles: str, | |
| fingerprints: List[DataStructs.ExplicitBitVect], | |
| morgan_fp_generator: Optional[Any] = None, | |
| ) -> float: | |
| """ | |
| Compute the average Tanimoto distance between a query SMILES and a list of RDKit fingerprints. | |
| Parameters: | |
| smiles (str): SMILES string of the query molecule. | |
| fingerprints (list): List of RDKit fingerprint objects (e.g., ExplicitBitVect). | |
| morgan_fp_generator: RDKit Morgan fingerprint generator. | |
| Returns: | |
| float: Average Tanimoto distance (1 - similarity) between the query and the fingerprints. | |
| """ | |
| query_fp = get_fp(smiles, morgan_fp_generator, return_np=False) | |
| if query_fp is None: | |
| raise ValueError(f"Invalid SMILES string: {smiles}") | |
| distances = DataStructs.BulkTanimotoSimilarity(query_fp, fingerprints, returnDistance=True) | |
| return np.array(distances).mean() | |
| def numpy_to_rdkit_fp(arr: np.ndarray) -> DataStructs.ExplicitBitVect: | |
| """ | |
| Convert a NumPy array to an RDKit ExplicitBitVect. | |
| """ | |
| return DataStructs.CreateFromBitString(''.join(arr.astype(str))) |