Gilmullin Almaz
Refactor code structure for improved readability and maintainability
72a3513
"""Module containing additional functions needed in different reaction data processing
protocols."""
import logging
from typing import Iterable
from CGRtools.containers import (
CGRContainer,
MoleculeContainer,
QueryContainer,
ReactionContainer,
)
from CGRtools.exceptions import InvalidAromaticRing
from tqdm import tqdm
from synplan.chem import smiles_parser
from synplan.utils.files import MoleculeReader, MoleculeWriter
from chython import MoleculeContainer as MoleculeContainerChython
def mol_from_smiles(
smiles: str,
standardize: bool = True,
clean_stereo: bool = True,
clean2d: bool = True,
) -> MoleculeContainer:
"""Converts a SMILES string to a `MoleculeContainer` object and optionally
standardizes, cleans stereochemistry, and cleans 2D coordinates.
:param smiles: The SMILES string representing the molecule.
:param standardize: Whether to standardize the molecule (default is True).
:param clean_stereo: Whether to remove the stereo marks on atoms of the molecule (default is True).
:param clean2d: Whether to clean the 2D coordinates of the molecule (default is True).
:return: The processed molecule object.
:raises ValueError: If the SMILES string could not be processed by CGRtools.
"""
molecule = smiles_parser(smiles)
if not isinstance(molecule, MoleculeContainer):
raise ValueError("SMILES string was not processed by CGRtools")
tmp = molecule.copy()
try:
if standardize:
tmp.canonicalize()
if clean_stereo:
tmp.clean_stereo()
if clean2d:
tmp.clean2d()
molecule = tmp
except InvalidAromaticRing:
logging.warning(
"CGRtools was not able to standardize molecule due to invalid aromatic ring"
)
return molecule
def query_to_mol(query: QueryContainer) -> MoleculeContainer:
"""Converts a QueryContainer object into a MoleculeContainer object.
:param query: A QueryContainer object representing the query structure.
:return: A MoleculeContainer object that replicates the structure of the query.
"""
new_mol = MoleculeContainer()
for n, atom in query.atoms():
new_mol.add_atom(
atom.atomic_symbol, n, charge=atom.charge, is_radical=atom.is_radical
)
for i, j, bond in query.bonds():
new_mol.add_bond(i, j, int(bond))
return new_mol
def reaction_query_to_reaction(reaction_rule: ReactionContainer) -> ReactionContainer:
"""Converts a ReactionContainer object with query structures into a
ReactionContainer with molecular structures.
:param reaction_rule: A ReactionContainer object where reactants and products are
QueryContainer objects.
:return: A new ReactionContainer object where reactants and products are
MoleculeContainer objects.
"""
reactants = [query_to_mol(q) for q in reaction_rule.reactants]
products = [query_to_mol(q) for q in reaction_rule.products]
reagents = [
query_to_mol(q) for q in reaction_rule.reagents
] # Assuming reagents are also part of the rule
reaction = ReactionContainer(reactants, products, reagents, reaction_rule.meta)
reaction.name = reaction_rule.name
return reaction
def unite_molecules(molecules: Iterable[MoleculeContainer]) -> MoleculeContainer:
"""Unites a list of MoleculeContainer objects into a single MoleculeContainer. This
function takes multiple molecules and combines them into one larger molecule. The
first molecule in the list is taken as the base, and subsequent molecules are united
with it sequentially.
:param molecules: A list of MoleculeContainer objects to be united.
:return: A single MoleculeContainer object representing the union of all input
molecules.
"""
new_mol = MoleculeContainer()
for mol in molecules:
new_mol = new_mol.union(mol)
return new_mol
def safe_canonicalization(molecule: MoleculeContainer) -> MoleculeContainer:
"""Attempts to canonicalize a molecule, handling any exceptions. If the
canonicalization process fails due to an InvalidAromaticRing exception, it safely
returns the original molecule.
:param molecule: The given molecule to be canonicalized.
:return: The canonicalized molecule if successful, otherwise the original molecule.
"""
molecule._atoms = dict(sorted(molecule._atoms.items()))
molecule_copy = molecule.copy()
try:
molecule_copy.canonicalize()
molecule_copy.clean_stereo()
return molecule_copy
except InvalidAromaticRing:
return molecule
def standardize_building_blocks(input_file: str, output_file: str) -> str:
"""Standardizes custom building blocks.
:param input_file: The path to the file that stores the original building blocks.
:param output_file: The path to the file that will store the standardized building
blocks.
:return: The path to the file with standardized building blocks.
"""
if input_file == output_file:
raise ValueError("input_file name and output_file name cannot be the same.")
with MoleculeReader(input_file) as inp_file, MoleculeWriter(
output_file
) as out_file:
for mol in tqdm(
inp_file,
desc="Number of building blocks processed: ",
bar_format="{desc}{n} [{elapsed}]",
):
try:
mol = safe_canonicalization(mol)
except Exception as e:
logging.debug(e)
continue
out_file.write(mol)
return output_file
def cgr_from_reaction_rule(reaction_rule: ReactionContainer) -> CGRContainer:
"""Creates a CGR from the given reaction rule.
:param reaction_rule: The reaction rule to be converted.
:return: The resulting CGR.
"""
reaction_rule = reaction_query_to_reaction(reaction_rule)
cgr_rule = ~reaction_rule
return cgr_rule
def hash_from_reaction_rule(reaction_rule: ReactionContainer) -> hash:
"""Generates hash for the given reaction rule.
:param reaction_rule: The reaction rule to be converted.
:return: The resulting hash.
"""
reactants_hash = tuple(sorted(hash(r) for r in reaction_rule.reactants))
reagents_hash = tuple(sorted(hash(r) for r in reaction_rule.reagents))
products_hash = tuple(sorted(hash(r) for r in reaction_rule.products))
return hash((reactants_hash, reagents_hash, products_hash))
def reverse_reaction(
reaction: ReactionContainer,
) -> ReactionContainer:
"""Reverses the given reaction.
:param reaction: The reaction to be reversed.
:return: The reversed reaction.
"""
reversed_reaction = ReactionContainer(
reaction.products, reaction.reactants, reaction.reagents, reaction.meta
)
reversed_reaction.name = reaction.name
return reversed_reaction
def cgrtools_to_chython_molecule(molecule):
molecule_chython = MoleculeContainerChython()
for n, atom in molecule.atoms():
molecule_chython.add_atom(atom.atomic_symbol, n)
for n, m, bond in molecule.bonds():
molecule_chython.add_bond(n, m, int(bond))
return molecule_chython
def chython_query_to_cgrtools(query):
cgrtools_query = QueryContainer()
for n, atom in query.atoms():
cgrtools_query.add_atom(
atom=atom.atomic_symbol,
charge=atom.charge,
neighbors=atom.neighbors,
hybridization=atom.hybridization,
_map=n,
)
for n, m, bond in query.bonds():
cgrtools_query.add_bond(n, m, int(bond))
return cgrtools_query