Spaces:
Running
Running
Gilmullin Almaz
commited on
Commit
·
dfa290e
1
Parent(s):
81a56f7
rm extra files
Browse files- __init__.py +0 -0
- clustering.py +0 -171
- rs_cgr.py +0 -35
- super_cgr.py +0 -204
__init__.py
DELETED
|
File without changes
|
clustering.py
DELETED
|
@@ -1,171 +0,0 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
import pandas as pd
|
| 3 |
-
from scipy.spatial.distance import squareform
|
| 4 |
-
from scipy.cluster.hierarchy import fcluster
|
| 5 |
-
from sklearn.metrics import silhouette_score, calinski_harabasz_score
|
| 6 |
-
import fastcluster
|
| 7 |
-
|
| 8 |
-
def tanimoto_similarity_continuous(matrix_1, matrix_2):
|
| 9 |
-
"""
|
| 10 |
-
"The Tanimoto coefficient is a measure of the similarity between two sets.
|
| 11 |
-
It is defined as the size of the intersection divided by the size of the union of the sample sets."
|
| 12 |
-
|
| 13 |
-
The Tanimoto coefficient is also known as the Jaccard index
|
| 14 |
-
|
| 15 |
-
Adoppted from https://github.com/cimm-kzn/CIMtools/blob/master/CIMtools/metrics/pairwise.py
|
| 16 |
-
|
| 17 |
-
:param matrix_1: 2D array of features.
|
| 18 |
-
:param matrix_2: 2D array of features.
|
| 19 |
-
:return: The Tanimoto coefficient between the two arrays.
|
| 20 |
-
"""
|
| 21 |
-
x_dot = np.dot(matrix_1, matrix_2.T)
|
| 22 |
-
|
| 23 |
-
x2 = (matrix_1**2).sum(axis=1)
|
| 24 |
-
y2 = (matrix_2**2).sum(axis=1)
|
| 25 |
-
|
| 26 |
-
len_x2 = len(x2)
|
| 27 |
-
len_y2 = len(y2)
|
| 28 |
-
|
| 29 |
-
result = x_dot / (np.array([x2] * len_y2).T + np.array([y2] * len_x2) - x_dot)
|
| 30 |
-
result[np.isnan(result)] = 0
|
| 31 |
-
|
| 32 |
-
return result
|
| 33 |
-
|
| 34 |
-
def calculate_fingerprints(cgrs, fingerprint_method):
|
| 35 |
-
"""Calculate fingerprints for a collection of CGRs.
|
| 36 |
-
|
| 37 |
-
Args:
|
| 38 |
-
cgrs (dict): Dictionary of CGRs
|
| 39 |
-
fingerprint_method: Initialized fingerprint calculator (e.g., MorganFingerprint instance)
|
| 40 |
-
|
| 41 |
-
Returns:
|
| 42 |
-
np.ndarray: Array of fingerprints
|
| 43 |
-
"""
|
| 44 |
-
fingerprints = []
|
| 45 |
-
for cgr in cgrs.values():
|
| 46 |
-
fp = fingerprint_method.transform([cgr])[0]
|
| 47 |
-
fingerprints.append(fp)
|
| 48 |
-
return np.array(fingerprints)
|
| 49 |
-
|
| 50 |
-
def create_similarity_matrix(fingerprints, labels):
|
| 51 |
-
"""Create a similarity matrix from fingerprints.
|
| 52 |
-
|
| 53 |
-
Args:
|
| 54 |
-
fingerprints (np.ndarray): Array of fingerprints
|
| 55 |
-
labels (list): Labels for the fingerprints
|
| 56 |
-
|
| 57 |
-
Returns:
|
| 58 |
-
pd.DataFrame: Similarity matrix as a DataFrame
|
| 59 |
-
"""
|
| 60 |
-
similarity_matrix = tanimoto_similarity_continuous(fingerprints, fingerprints)
|
| 61 |
-
return pd.DataFrame(similarity_matrix, columns=labels, index=labels)
|
| 62 |
-
|
| 63 |
-
def calculate_linkage(similarity_df, method='average'):
|
| 64 |
-
"""Calculate linkage matrix for hierarchical clustering.
|
| 65 |
-
|
| 66 |
-
Args:
|
| 67 |
-
similarity_df (pd.DataFrame): Similarity matrix
|
| 68 |
-
method (str): Linkage method
|
| 69 |
-
|
| 70 |
-
Returns:
|
| 71 |
-
np.ndarray: Linkage matrix
|
| 72 |
-
"""
|
| 73 |
-
distance_matrix = 1 - similarity_df
|
| 74 |
-
condensed_distance = squareform(distance_matrix)
|
| 75 |
-
return fastcluster.linkage(condensed_distance, method=method)
|
| 76 |
-
|
| 77 |
-
def optimal_cluster_num(Z, distance_matrix, max_clusters=10):
|
| 78 |
-
"""Find optimal number of clusters using silhouette score.
|
| 79 |
-
|
| 80 |
-
Args:
|
| 81 |
-
Z (np.ndarray): Linkage matrix
|
| 82 |
-
distance_matrix (np.ndarray): Distance matrix
|
| 83 |
-
max_clusters (int): Maximum number of clusters to consider
|
| 84 |
-
|
| 85 |
-
Returns:
|
| 86 |
-
int: Optimal number of clusters
|
| 87 |
-
"""
|
| 88 |
-
cluster_range = range(2, max_clusters)
|
| 89 |
-
silhouette_scores = []
|
| 90 |
-
|
| 91 |
-
for n_clusters in cluster_range:
|
| 92 |
-
cluster_labels = fcluster(Z, n_clusters, criterion='maxclust')
|
| 93 |
-
score = silhouette_score(distance_matrix, cluster_labels, metric='precomputed')
|
| 94 |
-
silhouette_scores.append(score)
|
| 95 |
-
|
| 96 |
-
return cluster_range[np.argmax(silhouette_scores)]
|
| 97 |
-
|
| 98 |
-
def perform_clustering(Z, threshold=0.0, max_clusters=10):
|
| 99 |
-
"""Perform hierarchical clustering with automatic cluster number optimization.
|
| 100 |
-
|
| 101 |
-
Args:
|
| 102 |
-
Z (np.ndarray): Linkage matrix
|
| 103 |
-
threshold (float): Distance threshold for initial clustering
|
| 104 |
-
max_clusters (int): Maximum number of clusters
|
| 105 |
-
|
| 106 |
-
Returns:
|
| 107 |
-
np.ndarray: Cluster labels
|
| 108 |
-
"""
|
| 109 |
-
cluster_labels = fcluster(Z, t=threshold, criterion='distance')
|
| 110 |
-
unique_clusters = np.unique(cluster_labels)
|
| 111 |
-
|
| 112 |
-
if max(unique_clusters) > max_clusters:
|
| 113 |
-
optimal_n_clusters = optimal_cluster_num(Z, 1 - similarity_df, max_clusters)
|
| 114 |
-
cluster_labels = fcluster(Z, optimal_n_clusters, criterion='maxclust')
|
| 115 |
-
|
| 116 |
-
return cluster_labels
|
| 117 |
-
|
| 118 |
-
def create_clusters_dict(cluster_labels, labels):
|
| 119 |
-
"""Create a dictionary of clusters with their members.
|
| 120 |
-
|
| 121 |
-
Args:
|
| 122 |
-
cluster_labels (np.ndarray): Cluster assignments
|
| 123 |
-
labels (list): Labels for the items
|
| 124 |
-
|
| 125 |
-
Returns:
|
| 126 |
-
dict: Dictionary mapping cluster numbers to lists of member labels
|
| 127 |
-
"""
|
| 128 |
-
unique_clusters = np.unique(cluster_labels)
|
| 129 |
-
clusters_dict = {}
|
| 130 |
-
|
| 131 |
-
for cluster in unique_clusters:
|
| 132 |
-
cluster_indices = np.where(cluster_labels == cluster)[0]
|
| 133 |
-
clusters_dict[cluster] = list(labels[cluster_indices])
|
| 134 |
-
|
| 135 |
-
return clusters_dict
|
| 136 |
-
|
| 137 |
-
def cluster_molecules(cgrs, fingerprint_method, threshold=0.0, max_clusters=10, linkage_method='average'):
|
| 138 |
-
"""Main function to perform molecular clustering.
|
| 139 |
-
|
| 140 |
-
Args:
|
| 141 |
-
cgrs (dict): Dictionary of CGRs
|
| 142 |
-
fingerprint_method: Initialized fingerprint calculator
|
| 143 |
-
threshold (float): Distance threshold for clustering
|
| 144 |
-
max_clusters (int): Maximum number of clusters
|
| 145 |
-
linkage_method (str): Method for hierarchical clustering
|
| 146 |
-
|
| 147 |
-
Returns:
|
| 148 |
-
dict: Clustering results containing clusters_dict and cluster_labels
|
| 149 |
-
"""
|
| 150 |
-
# Calculate fingerprints
|
| 151 |
-
fingerprints = calculate_fingerprints(cgrs, fingerprint_method)
|
| 152 |
-
|
| 153 |
-
# Create similarity matrix
|
| 154 |
-
labels = list(cgrs.keys())
|
| 155 |
-
similarity_df = create_similarity_matrix(fingerprints, labels)
|
| 156 |
-
|
| 157 |
-
# Calculate linkage
|
| 158 |
-
Z = calculate_linkage(similarity_df, method=linkage_method)
|
| 159 |
-
|
| 160 |
-
# Perform clustering
|
| 161 |
-
cluster_labels = perform_clustering(Z, threshold, max_clusters)
|
| 162 |
-
|
| 163 |
-
# Create clusters dictionary
|
| 164 |
-
clusters_dict = create_clusters_dict(cluster_labels, np.array(labels))
|
| 165 |
-
|
| 166 |
-
return {
|
| 167 |
-
'clusters_dict': clusters_dict,
|
| 168 |
-
'cluster_labels': cluster_labels,
|
| 169 |
-
'similarity_matrix': similarity_df,
|
| 170 |
-
'linkage_matrix': Z
|
| 171 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rs_cgr.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
def cleaning_cgr(cgr):
|
| 2 |
-
cgr_prods = [cgr.substructure(c) for c in cgr.connected_components]
|
| 3 |
-
target_cgr = cgr_prods[0]
|
| 4 |
-
|
| 5 |
-
decomposed = ReactionContainer.from_cgr(cgr)
|
| 6 |
-
|
| 7 |
-
bond_items = list(target_cgr._bonds.items())
|
| 8 |
-
for atom1, bond_set in bond_items:
|
| 9 |
-
bond_set_items = list(bond_set.items())
|
| 10 |
-
for atom2, bond in bond_set_items:
|
| 11 |
-
|
| 12 |
-
# Leaving groups removal
|
| 13 |
-
if bond.p_order == None and bond.order is not None:
|
| 14 |
-
# print(atom1, atom2)
|
| 15 |
-
# print(bond)
|
| 16 |
-
target_cgr.delete_bond(atom1, atom2)
|
| 17 |
-
# target_cgr.clean2d()
|
| 18 |
-
# display(SVG(target_cgr.depict()))
|
| 19 |
-
|
| 20 |
-
## Modified bond, but not leaving group
|
| 21 |
-
elif type(bond.p_order) is int and type(bond.order) is int and bond.p_order < bond.order:
|
| 22 |
-
p_order = int(bond.p_order)
|
| 23 |
-
target_cgr.delete_bond(atom1, atom2)
|
| 24 |
-
target_cgr.add_bond(atom1, atom2, DynamicBond(p_order, p_order))
|
| 25 |
-
|
| 26 |
-
clean_cgr = [target_cgr.substructure(c) for c in target_cgr.connected_components][0]
|
| 27 |
-
|
| 28 |
-
# Charge neutralizer
|
| 29 |
-
if clean_cgr._p_charges != clean_cgr._charges:
|
| 30 |
-
for num, charge in clean_cgr._charges.items():
|
| 31 |
-
if charge != 0:
|
| 32 |
-
clean_cgr._atoms[num].charge = 0
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
return clean_cgr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
super_cgr.py
DELETED
|
@@ -1,204 +0,0 @@
|
|
| 1 |
-
def find_next_atom_num(accum_cgr, reactions):
|
| 2 |
-
"""Find the next available atom number."""
|
| 3 |
-
max_num = 0
|
| 4 |
-
for reaction in reactions:
|
| 5 |
-
cgr = reaction.compose()
|
| 6 |
-
max_num = max(max_num, max(cgr._atoms.keys()))
|
| 7 |
-
return max_num + 1
|
| 8 |
-
|
| 9 |
-
def get_clean_mapping(curr_prod, prod, reverse=False):
|
| 10 |
-
"""Get clean mapping between molecules while avoiding number conflicts."""
|
| 11 |
-
dict_map = {}
|
| 12 |
-
m = list(curr_prod.get_mapping(prod))
|
| 13 |
-
|
| 14 |
-
if len(m) == 0:
|
| 15 |
-
return dict_map
|
| 16 |
-
|
| 17 |
-
# Get existing atom numbers in both molecules
|
| 18 |
-
curr_atoms = set(curr_prod._atoms.keys())
|
| 19 |
-
prod_atoms = set(prod._atoms.keys())
|
| 20 |
-
|
| 21 |
-
rr = m[0]
|
| 22 |
-
|
| 23 |
-
# Build mapping while checking for conflicts
|
| 24 |
-
for key, value in rr.items():
|
| 25 |
-
if key != value:
|
| 26 |
-
if value in rr.keys() and rr[value] != key:
|
| 27 |
-
# Skip cyclic mappings that could cause conflicts
|
| 28 |
-
continue
|
| 29 |
-
|
| 30 |
-
source = value if reverse else key
|
| 31 |
-
target = key if reverse else value
|
| 32 |
-
|
| 33 |
-
# Check if target number already exists in the molecule
|
| 34 |
-
if reverse and target in curr_atoms:
|
| 35 |
-
continue
|
| 36 |
-
if not reverse and target in prod_atoms:
|
| 37 |
-
continue
|
| 38 |
-
|
| 39 |
-
dict_map[source] = target
|
| 40 |
-
|
| 41 |
-
return dict_map
|
| 42 |
-
|
| 43 |
-
def validate_molecule_components(curr_mol, node_id):
|
| 44 |
-
"""Validate that molecule has only one connected component."""
|
| 45 |
-
new_rmol = [curr_mol.substructure(c) for c in curr_mol.connected_components]
|
| 46 |
-
if len(new_rmol) > 1:
|
| 47 |
-
print(f'Error tree {node_id}: We have more than one molecule in one node')
|
| 48 |
-
|
| 49 |
-
def get_leaving_groups(products):
|
| 50 |
-
"""Extract leaving group atom numbers from products."""
|
| 51 |
-
lg_atom_nums = []
|
| 52 |
-
for i, prod in enumerate(products):
|
| 53 |
-
if i != 0: # Skip first product (main product)
|
| 54 |
-
lg_atom_nums.extend(prod._atoms.keys())
|
| 55 |
-
return lg_atom_nums
|
| 56 |
-
|
| 57 |
-
def process_first_reaction(first_react, tree, node_id, min_mol_size):
|
| 58 |
-
"""Process first reaction in the route and initialize building block set."""
|
| 59 |
-
bb_set = set()
|
| 60 |
-
|
| 61 |
-
for curr_mol in first_react.reactants:
|
| 62 |
-
react_key = tuple(curr_mol._atoms)
|
| 63 |
-
react_key_set = set(react_key)
|
| 64 |
-
|
| 65 |
-
if len(curr_mol) <= min_mol_size or str(curr_mol) in tree.building_blocks:
|
| 66 |
-
bb_set = react_key_set
|
| 67 |
-
|
| 68 |
-
validate_molecule_components(curr_mol, node_id)
|
| 69 |
-
|
| 70 |
-
return bb_set
|
| 71 |
-
|
| 72 |
-
def update_reaction_dict(reaction, node_id, mapping, react_dict, tree, min_mol_size, bb_set, prev_remap=None):
|
| 73 |
-
"""Update reaction dictionary with new mappings."""
|
| 74 |
-
for curr_mol in reaction.reactants:
|
| 75 |
-
react_key = tuple(curr_mol._atoms)
|
| 76 |
-
react_key_set = set(react_key)
|
| 77 |
-
|
| 78 |
-
validate_molecule_components(curr_mol, node_id)
|
| 79 |
-
|
| 80 |
-
if len(curr_mol) <= min_mol_size or str(curr_mol) in tree.building_blocks:
|
| 81 |
-
bb_set = bb_set.union(react_key_set)
|
| 82 |
-
|
| 83 |
-
# Filter the mapping to include only keys present in the current react_key
|
| 84 |
-
filtered_mapping = {k: v for k, v in mapping.items() if k in react_key_set}
|
| 85 |
-
if prev_remap:
|
| 86 |
-
prev_remappping = {k: v for k, v in prev_remap.items() if k in react_key_set}
|
| 87 |
-
filtered_mapping.update(prev_remappping)
|
| 88 |
-
react_dict[react_key] = filtered_mapping
|
| 89 |
-
|
| 90 |
-
return react_dict, bb_set
|
| 91 |
-
|
| 92 |
-
def process_target_blocks(curr_products, curr_prod, lg_atom_nums, curr_lg_atom_nums, bb_set):
|
| 93 |
-
"""Process and collect target blocks for remapping."""
|
| 94 |
-
target_block = []
|
| 95 |
-
if len(curr_products) > 1:
|
| 96 |
-
for prod in curr_products:
|
| 97 |
-
dict_map = get_clean_mapping(curr_prod, prod)
|
| 98 |
-
if prod._atoms.keys() != curr_prod._atoms.keys():
|
| 99 |
-
for key in list(prod._atoms.keys()):
|
| 100 |
-
if key in lg_atom_nums or key in curr_lg_atom_nums:
|
| 101 |
-
target_block.append(key)
|
| 102 |
-
if key in bb_set:
|
| 103 |
-
target_block.append(key)
|
| 104 |
-
return target_block
|
| 105 |
-
|
| 106 |
-
def process_single_route(tree, node_id, min_mol_size=6):
|
| 107 |
-
"""Process a single synthesis route maintaining consistent state."""
|
| 108 |
-
try:
|
| 109 |
-
reactions = tree.synthesis_route(node_id)
|
| 110 |
-
|
| 111 |
-
first_react = reactions[-1]
|
| 112 |
-
|
| 113 |
-
accum_cgr = first_react.compose()
|
| 114 |
-
bb_set = process_first_reaction(first_react, tree, node_id, min_mol_size)
|
| 115 |
-
|
| 116 |
-
react_dict = {}
|
| 117 |
-
|
| 118 |
-
max_num = find_next_atom_num(accum_cgr, reactions)
|
| 119 |
-
|
| 120 |
-
for step in range(len(reactions) - 2, -1, -1):
|
| 121 |
-
# print("\nProcessing step:", step + 1)
|
| 122 |
-
reaction = reactions[step]
|
| 123 |
-
curr_cgr = reaction.compose()
|
| 124 |
-
|
| 125 |
-
curr_prod = reaction.products[0]
|
| 126 |
-
accum_products = accum_cgr.decompose()[1].split()
|
| 127 |
-
lg_atom_nums = get_leaving_groups(accum_products)
|
| 128 |
-
|
| 129 |
-
curr_products = curr_cgr.decompose()[1].split()
|
| 130 |
-
|
| 131 |
-
tuple_atoms = tuple(curr_prod._atoms)
|
| 132 |
-
prev_remap = {}
|
| 133 |
-
|
| 134 |
-
if tuple_atoms in react_dict.keys() and len(react_dict[tuple_atoms]) != 0:
|
| 135 |
-
prev_remap = react_dict[tuple_atoms]
|
| 136 |
-
curr_cgr = curr_cgr.remap(prev_remap, copy=True)
|
| 137 |
-
|
| 138 |
-
curr_lg_atom_nums = []
|
| 139 |
-
for i in range(1, len(curr_products)):
|
| 140 |
-
prod = curr_products[i]
|
| 141 |
-
curr_lg_atom_nums += list(prod._atoms.keys())
|
| 142 |
-
|
| 143 |
-
target_block = process_target_blocks(curr_products, curr_prod, lg_atom_nums, curr_lg_atom_nums, bb_set)
|
| 144 |
-
|
| 145 |
-
mapping = {}
|
| 146 |
-
for atom_num in sorted(target_block):
|
| 147 |
-
if atom_num in accum_cgr._atoms and atom_num not in mapping:
|
| 148 |
-
mapping[atom_num] = max_num
|
| 149 |
-
max_num += 1
|
| 150 |
-
|
| 151 |
-
for i in range(len(accum_products)):
|
| 152 |
-
accum_prod = accum_products[i]
|
| 153 |
-
dict_map = get_clean_mapping(curr_prod, accum_prod, reverse=True)
|
| 154 |
-
|
| 155 |
-
if dict_map:
|
| 156 |
-
curr_cgr.remap(dict_map)
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
#maybe remap, then decompose and to BB
|
| 160 |
-
react_dict, bb_set = update_reaction_dict(reaction, node_id, mapping, react_dict, tree, min_mol_size, bb_set, prev_remap)
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
if mapping:
|
| 164 |
-
curr_cgr.remap(mapping)
|
| 165 |
-
|
| 166 |
-
accum_cgr = curr_cgr.compose(accum_cgr)
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
return {
|
| 170 |
-
'cgr': accum_cgr,
|
| 171 |
-
}
|
| 172 |
-
|
| 173 |
-
except Exception as e:
|
| 174 |
-
print(f"Error processing node {node_id}: {e}")
|
| 175 |
-
return None
|
| 176 |
-
|
| 177 |
-
def reassign_nums(tree, node_id=None, min_mol_size=6):
|
| 178 |
-
"""
|
| 179 |
-
Process routes and reassign atom numbers.
|
| 180 |
-
|
| 181 |
-
Args:
|
| 182 |
-
tree: Synthesis tree
|
| 183 |
-
node_id: Optional specific node ID to process. If None, processes all winning nodes
|
| 184 |
-
min_mol_size: Minimum size for building blocks
|
| 185 |
-
|
| 186 |
-
Returns:
|
| 187 |
-
If node_id is None:
|
| 188 |
-
dict: Dictionary mapping node IDs to their processed CGRs
|
| 189 |
-
If node_id is specified:
|
| 190 |
-
dict: Information about the processed route
|
| 191 |
-
"""
|
| 192 |
-
if node_id is not None:
|
| 193 |
-
return process_single_route(tree, node_id, min_mol_size)
|
| 194 |
-
|
| 195 |
-
complex_cgr_dict = {}
|
| 196 |
-
reactions_dict = {}
|
| 197 |
-
cgrs_list = []
|
| 198 |
-
for node_id in set(tree.winning_nodes):
|
| 199 |
-
result = process_single_route(tree, node_id, min_mol_size)
|
| 200 |
-
if result:
|
| 201 |
-
complex_cgr_dict[node_id] = result['cgr']
|
| 202 |
-
|
| 203 |
-
return dict(sorted(complex_cgr_dict.items()))
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|