Spaces:
Running
Running
Gilmullin Almaz
Refactor code structure and remove redundant sections for improved readability and maintainability
914ea41
| """Module containing additional functions needed in different reaction data processing | |
| protocols.""" | |
| import logging | |
| from typing import Iterable | |
| from CGRtools.containers import ( | |
| CGRContainer, | |
| MoleculeContainer, | |
| QueryContainer, | |
| ReactionContainer, | |
| ) | |
| from CGRtools.exceptions import InvalidAromaticRing | |
| from tqdm import tqdm | |
| from synplan.chem import smiles_parser | |
| from synplan.utils.files import MoleculeReader, MoleculeWriter | |
| from chython import MoleculeContainer as MoleculeContainerChython | |
| def mol_from_smiles( | |
| smiles: str, | |
| standardize: bool = True, | |
| clean_stereo: bool = True, | |
| clean2d: bool = True, | |
| ) -> MoleculeContainer: | |
| """Converts a SMILES string to a `MoleculeContainer` object and optionally | |
| standardizes, cleans stereochemistry, and cleans 2D coordinates. | |
| :param smiles: The SMILES string representing the molecule. | |
| :param standardize: Whether to standardize the molecule (default is True). | |
| :param clean_stereo: Whether to remove the stereo marks on atoms of the molecule (default is True). | |
| :param clean2d: Whether to clean the 2D coordinates of the molecule (default is True). | |
| :return: The processed molecule object. | |
| :raises ValueError: If the SMILES string could not be processed by CGRtools. | |
| """ | |
| molecule = smiles_parser(smiles) | |
| if not isinstance(molecule, MoleculeContainer): | |
| raise ValueError("SMILES string was not processed by CGRtools") | |
| tmp = molecule.copy() | |
| try: | |
| if standardize: | |
| tmp.canonicalize() | |
| if clean_stereo: | |
| tmp.clean_stereo() | |
| if clean2d: | |
| tmp.clean2d() | |
| molecule = tmp | |
| except InvalidAromaticRing: | |
| logging.warning( | |
| "CGRtools was not able to standardize molecule due to invalid aromatic ring" | |
| ) | |
| return molecule | |
| def query_to_mol(query: QueryContainer) -> MoleculeContainer: | |
| """Converts a QueryContainer object into a MoleculeContainer object. | |
| :param query: A QueryContainer object representing the query structure. | |
| :return: A MoleculeContainer object that replicates the structure of the query. | |
| """ | |
| new_mol = MoleculeContainer() | |
| for n, atom in query.atoms(): | |
| new_mol.add_atom( | |
| atom.atomic_symbol, n, charge=atom.charge, is_radical=atom.is_radical | |
| ) | |
| for i, j, bond in query.bonds(): | |
| new_mol.add_bond(i, j, int(bond)) | |
| return new_mol | |
| def reaction_query_to_reaction(reaction_rule: ReactionContainer) -> ReactionContainer: | |
| """Converts a ReactionContainer object with query structures into a | |
| ReactionContainer with molecular structures. | |
| :param reaction_rule: A ReactionContainer object where reactants and products are | |
| QueryContainer objects. | |
| :return: A new ReactionContainer object where reactants and products are | |
| MoleculeContainer objects. | |
| """ | |
| reactants = [query_to_mol(q) for q in reaction_rule.reactants] | |
| products = [query_to_mol(q) for q in reaction_rule.products] | |
| reagents = [ | |
| query_to_mol(q) for q in reaction_rule.reagents | |
| ] # Assuming reagents are also part of the rule | |
| reaction = ReactionContainer(reactants, products, reagents, reaction_rule.meta) | |
| reaction.name = reaction_rule.name | |
| return reaction | |
| def unite_molecules(molecules: Iterable[MoleculeContainer]) -> MoleculeContainer: | |
| """Unites a list of MoleculeContainer objects into a single MoleculeContainer. This | |
| function takes multiple molecules and combines them into one larger molecule. The | |
| first molecule in the list is taken as the base, and subsequent molecules are united | |
| with it sequentially. | |
| :param molecules: A list of MoleculeContainer objects to be united. | |
| :return: A single MoleculeContainer object representing the union of all input | |
| molecules. | |
| """ | |
| new_mol = MoleculeContainer() | |
| for mol in molecules: | |
| new_mol = new_mol.union(mol) | |
| return new_mol | |
| def safe_canonicalization(molecule: MoleculeContainer) -> MoleculeContainer: | |
| """Attempts to canonicalize a molecule, handling any exceptions. If the | |
| canonicalization process fails due to an InvalidAromaticRing exception, it safely | |
| returns the original molecule. | |
| :param molecule: The given molecule to be canonicalized. | |
| :return: The canonicalized molecule if successful, otherwise the original molecule. | |
| """ | |
| molecule._atoms = dict(sorted(molecule._atoms.items())) | |
| molecule_copy = molecule.copy() | |
| try: | |
| molecule_copy.canonicalize() | |
| molecule_copy.clean_stereo() | |
| return molecule_copy | |
| except InvalidAromaticRing: | |
| return molecule | |
| def standardize_building_blocks(input_file: str, output_file: str) -> str: | |
| """Standardizes custom building blocks. | |
| :param input_file: The path to the file that stores the original building blocks. | |
| :param output_file: The path to the file that will store the standardized building | |
| blocks. | |
| :return: The path to the file with standardized building blocks. | |
| """ | |
| if input_file == output_file: | |
| raise ValueError("input_file name and output_file name cannot be the same.") | |
| with MoleculeReader(input_file) as inp_file, MoleculeWriter( | |
| output_file | |
| ) as out_file: | |
| for mol in tqdm( | |
| inp_file, | |
| desc="Number of building blocks processed: ", | |
| bar_format="{desc}{n} [{elapsed}]", | |
| ): | |
| try: | |
| mol = safe_canonicalization(mol) | |
| except Exception as e: | |
| logging.debug(e) | |
| continue | |
| out_file.write(mol) | |
| return output_file | |
| def cgr_from_reaction_rule(reaction_rule: ReactionContainer) -> CGRContainer: | |
| """Creates a CGR from the given reaction rule. | |
| :param reaction_rule: The reaction rule to be converted. | |
| :return: The resulting CGR. | |
| """ | |
| reaction_rule = reaction_query_to_reaction(reaction_rule) | |
| cgr_rule = ~reaction_rule | |
| return cgr_rule | |
| def hash_from_reaction_rule(reaction_rule: ReactionContainer) -> hash: | |
| """Generates hash for the given reaction rule. | |
| :param reaction_rule: The reaction rule to be converted. | |
| :return: The resulting hash. | |
| """ | |
| reactants_hash = tuple(sorted(hash(r) for r in reaction_rule.reactants)) | |
| reagents_hash = tuple(sorted(hash(r) for r in reaction_rule.reagents)) | |
| products_hash = tuple(sorted(hash(r) for r in reaction_rule.products)) | |
| return hash((reactants_hash, reagents_hash, products_hash)) | |
| def reverse_reaction( | |
| reaction: ReactionContainer, | |
| ) -> ReactionContainer: | |
| """Reverses the given reaction. | |
| :param reaction: The reaction to be reversed. | |
| :return: The reversed reaction. | |
| """ | |
| reversed_reaction = ReactionContainer( | |
| reaction.products, reaction.reactants, reaction.reagents, reaction.meta | |
| ) | |
| reversed_reaction.name = reaction.name | |
| return reversed_reaction | |
| def cgrtools_to_chython_molecule(molecule): | |
| molecule_chython = MoleculeContainerChython() | |
| for n, atom in molecule.atoms(): | |
| molecule_chython.add_atom(atom.atomic_symbol, n) | |
| for n, m, bond in molecule.bonds(): | |
| molecule_chython.add_bond(n, m, int(bond)) | |
| return molecule_chython | |
| def chython_query_to_cgrtools(query): | |
| cgrtools_query = QueryContainer() | |
| for n, atom in query.atoms(): | |
| cgrtools_query.add_atom( | |
| atom=atom.atomic_symbol, | |
| charge=atom.charge, | |
| neighbors=atom.neighbors, | |
| hybridization=atom.hybridization, | |
| _map=n, | |
| ) | |
| for n, m, bond in query.bonds(): | |
| cgrtools_query.add_bond(n, m, int(bond)) | |
| return cgrtools_query | |