File size: 7,670 Bytes
72a3513
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
"""Module containing additional functions needed in different reaction data processing
protocols."""

import logging
from typing import Iterable

from CGRtools.containers import (
    CGRContainer,
    MoleculeContainer,
    QueryContainer,
    ReactionContainer,
)
from CGRtools.exceptions import InvalidAromaticRing
from tqdm import tqdm

from synplan.chem import smiles_parser
from synplan.utils.files import MoleculeReader, MoleculeWriter

from chython import MoleculeContainer as MoleculeContainerChython


def mol_from_smiles(
    smiles: str,
    standardize: bool = True,
    clean_stereo: bool = True,
    clean2d: bool = True,
) -> MoleculeContainer:
    """Converts a SMILES string to a `MoleculeContainer` object and optionally
    standardizes, cleans stereochemistry, and cleans 2D coordinates.

    :param smiles: The SMILES string representing the molecule.
    :param standardize: Whether to standardize the molecule (default is True).
    :param clean_stereo: Whether to remove the stereo marks on atoms of the molecule (default is True).
    :param clean2d: Whether to clean the 2D coordinates of the molecule (default is True).
    :return: The processed molecule object.
    :raises ValueError: If the SMILES string could not be processed by CGRtools.
    """
    molecule = smiles_parser(smiles)

    if not isinstance(molecule, MoleculeContainer):
        raise ValueError("SMILES string was not processed by CGRtools")

    tmp = molecule.copy()
    try:
        if standardize:
            tmp.canonicalize()
        if clean_stereo:
            tmp.clean_stereo()
        if clean2d:
            tmp.clean2d()
        molecule = tmp
    except InvalidAromaticRing:
        logging.warning(
            "CGRtools was not able to standardize molecule due to invalid aromatic ring"
        )
    return molecule


def query_to_mol(query: QueryContainer) -> MoleculeContainer:
    """Converts a QueryContainer object into a MoleculeContainer object.

    :param query: A QueryContainer object representing the query structure.
    :return: A MoleculeContainer object that replicates the structure of the query.
    """
    new_mol = MoleculeContainer()
    for n, atom in query.atoms():
        new_mol.add_atom(
            atom.atomic_symbol, n, charge=atom.charge, is_radical=atom.is_radical
        )
    for i, j, bond in query.bonds():
        new_mol.add_bond(i, j, int(bond))
    return new_mol


def reaction_query_to_reaction(reaction_rule: ReactionContainer) -> ReactionContainer:
    """Converts a ReactionContainer object with query structures into a
    ReactionContainer with molecular structures.

    :param reaction_rule: A ReactionContainer object where reactants and products are
        QueryContainer objects.
    :return: A new ReactionContainer object where reactants and products are
        MoleculeContainer objects.
    """
    reactants = [query_to_mol(q) for q in reaction_rule.reactants]
    products = [query_to_mol(q) for q in reaction_rule.products]
    reagents = [
        query_to_mol(q) for q in reaction_rule.reagents
    ]  # Assuming reagents are also part of the rule
    reaction = ReactionContainer(reactants, products, reagents, reaction_rule.meta)
    reaction.name = reaction_rule.name
    return reaction


def unite_molecules(molecules: Iterable[MoleculeContainer]) -> MoleculeContainer:
    """Unites a list of MoleculeContainer objects into a single MoleculeContainer. This
    function takes multiple molecules and combines them into one larger molecule. The
    first molecule in the list is taken as the base, and subsequent molecules are united
    with it sequentially.

    :param molecules: A list of MoleculeContainer objects to be united.
    :return: A single MoleculeContainer object representing the union of all input
        molecules.
    """
    new_mol = MoleculeContainer()
    for mol in molecules:
        new_mol = new_mol.union(mol)
    return new_mol


def safe_canonicalization(molecule: MoleculeContainer) -> MoleculeContainer:
    """Attempts to canonicalize a molecule, handling any exceptions. If the
    canonicalization process fails due to an InvalidAromaticRing exception, it safely
    returns the original molecule.

    :param molecule: The given molecule to be canonicalized.
    :return: The canonicalized molecule if successful, otherwise the original molecule.
    """
    molecule._atoms = dict(sorted(molecule._atoms.items()))

    molecule_copy = molecule.copy()
    try:
        molecule_copy.canonicalize()
        molecule_copy.clean_stereo()
        return molecule_copy
    except InvalidAromaticRing:
        return molecule


def standardize_building_blocks(input_file: str, output_file: str) -> str:
    """Standardizes custom building blocks.

    :param input_file: The path to the file that stores the original building blocks.
    :param output_file: The path to the file that will store the standardized building
        blocks.
    :return: The path to the file with standardized building blocks.
    """
    if input_file == output_file:
        raise ValueError("input_file name and output_file name cannot be the same.")

    with MoleculeReader(input_file) as inp_file, MoleculeWriter(
        output_file
    ) as out_file:
        for mol in tqdm(
            inp_file,
            desc="Number of building blocks processed: ",
            bar_format="{desc}{n} [{elapsed}]",
        ):
            try:
                mol = safe_canonicalization(mol)
            except Exception as e:
                logging.debug(e)
                continue
            out_file.write(mol)

    return output_file


def cgr_from_reaction_rule(reaction_rule: ReactionContainer) -> CGRContainer:
    """Creates a CGR from the given reaction rule.

    :param reaction_rule: The reaction rule to be converted.
    :return: The resulting CGR.
    """

    reaction_rule = reaction_query_to_reaction(reaction_rule)
    cgr_rule = ~reaction_rule

    return cgr_rule


def hash_from_reaction_rule(reaction_rule: ReactionContainer) -> hash:
    """Generates hash for the given reaction rule.

    :param reaction_rule: The reaction rule to be converted.
    :return: The resulting hash.
    """

    reactants_hash = tuple(sorted(hash(r) for r in reaction_rule.reactants))
    reagents_hash = tuple(sorted(hash(r) for r in reaction_rule.reagents))
    products_hash = tuple(sorted(hash(r) for r in reaction_rule.products))

    return hash((reactants_hash, reagents_hash, products_hash))


def reverse_reaction(
    reaction: ReactionContainer,
) -> ReactionContainer:
    """Reverses the given reaction.

    :param reaction: The reaction to be reversed.
    :return: The reversed reaction.
    """
    reversed_reaction = ReactionContainer(
        reaction.products, reaction.reactants, reaction.reagents, reaction.meta
    )
    reversed_reaction.name = reaction.name

    return reversed_reaction


def cgrtools_to_chython_molecule(molecule):
    molecule_chython = MoleculeContainerChython()
    for n, atom in molecule.atoms():
        molecule_chython.add_atom(atom.atomic_symbol, n)

    for n, m, bond in molecule.bonds():
        molecule_chython.add_bond(n, m, int(bond))

    return molecule_chython


def chython_query_to_cgrtools(query):
    cgrtools_query = QueryContainer()
    for n, atom in query.atoms():
        cgrtools_query.add_atom(
            atom=atom.atomic_symbol,
            charge=atom.charge,
            neighbors=atom.neighbors,
            hybridization=atom.hybridization,
            _map=n,
        )
    for n, m, bond in query.bonds():
        cgrtools_query.add_bond(n, m, int(bond))

    return cgrtools_query