SynPlanner

Sleeping

App Files Files Community

Gilmullin Almaz commited on Mar 21, 2025

Commit

57a9d9a

1 Parent(s): c45df67

added module codes to subcluster

Browse files

Files changed (4) hide show

cluster/clustering.py +3 -0
cluster/subcluster.py +33 -0
cluster/utils.py +37 -0
cluster/visualize.py +279 -0

cluster/clustering.py CHANGED Viewed

@@ -29,6 +29,9 @@ def tanimoto_similarity_continuous(matrix_1, matrix_2):
     result = x_dot / (np.array([x2] * len_y2).T + np.array([y2] * len_x2) - x_dot)
     result[np.isnan(result)] = 0
     return result
 def calculate_fingerprints(cgrs, fingerprint_method):

     result = x_dot / (np.array([x2] * len_y2).T + np.array([y2] * len_x2) - x_dot)
     result[np.isnan(result)] = 0
+    if matrix_1.shape == matrix_2.shape:
+        np.fill_diagonal(result, 1.0)
     return result
 def calculate_fingerprints(cgrs, fingerprint_method):

cluster/subcluster.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from collections import defaultdict
+def split_ids_by_length(ids, data):
+    length_to_ids = defaultdict(list)
+    for id_ in ids:
+        if id_ in data:
+            length_to_ids[len(data[id_])].append(id_)
+    return length_to_ids
+def group_ids_by_intermediate_products(ids, reactions_dict):
+    groups = defaultdict(list)
+    for id_ in ids:
+        # Build a key: a tuple of the first product for each reaction.
+        # This assumes that reactions_dict[id_] is a tuple of Reaction objects
+        # and each Reaction object has an attribute 'products' that is indexable.
+        key = tuple(reaction.products[0] for reaction in reactions_dict[id_])
+        groups[key].append(id_)
+    return list(groups.values())
+def sublcuster_all(cluster_dict, reactions_dict):
+    subcluster_dict = {}
+    for num, cluster in cluster_dict.items():
+        step_split_dict = split_ids_by_length(cluster, reactions_dict)
+        subcluster = {}
+        for steps in step_split_dict.keys():
+            ids_to_group = step_split_dict[steps]
+            grouped_ids = group_ids_by_intermediate_products(ids_to_group, reactions_dict)
+            subcluster[steps] = grouped_ids
+        subcluster_dict[num] = subcluster
+    return subcluster_dict

cluster/utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+def extract_reactions(tree):
+    reactions_dict = {}
+    for node_id in set(tree.winning_nodes):
+        reactions = tree.synthesis_route(node_id)
+        reactions_dict[node_id] = reactions
+    return reactions_dict
+class TreeWrapper:
+    def __init__(self, tree):
+        self.tree = tree
+    def __getstate__(self):
+        state = self.__dict__.copy()
+        # Save the state of the tree
+        tree_state = self.tree.__dict__.copy()
+        # Reset or remove non-pickleable attributes (e.g., _tqdm, policy_network, value_network)
+        if '_tqdm' in tree_state:
+            tree_state['_tqdm'] = True  # Reset to a simple flag
+        for attr in ['policy_network', 'value_network']:
+            if attr in tree_state:
+                tree_state[attr] = None
+        # Store tree state separately
+        state['tree_state'] = tree_state
+        # Remove the actual tree instance from the state
+        del state['tree']
+        return state
+    def __setstate__(self, state):
+        # Retrieve the stored tree state
+        tree_state = state.pop('tree_state')
+        # Update the instance state
+        self.__dict__.update(state)
+        # Create a new Tree instance without calling __init__
+        new_tree = Tree.__new__(Tree)
+        new_tree.__dict__.update(tree_state)
+        self.tree = new_tree

cluster/visualize.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import os
+import re
+import numpy as np
+import matplotlib.pyplot as plt
+from collections import Counter
+from synplan.utils.visualisation import get_route_svg
+import seaborn as sns
+def pie_chart(cluster_sizes):
+    labels = [f'Cluster {i+1}' for i in range(len(cluster_sizes))]
+    sns.set_style("whitegrid")
+    fig, ax = plt.subplots(figsize=(6, 6))
+    wedges, texts, autotexts = ax.pie(
+        cluster_sizes, labels=None, autopct='%1.1f%%', colors=sns.color_palette("pastel"),
+        startangle=140, wedgeprops={'edgecolor': 'black'}
+    )
+    ax.legend(wedges, labels, title="Clusters", loc="center left", bbox_to_anchor=(1, 0.5))
+    # plt.show()
+    return fig
+def distribution_by_depth(tree, complex_cgr_dict):
+    if len(complex_cgr_dict) == 0:
+        print('Error: Empty dictionary')
+        return None
+    depths = np.zeros(len(complex_cgr_dict))
+    for n, node in enumerate(complex_cgr_dict.keys()):
+        reactions = tree.synthesis_route(node)
+        depths[n] = len(reactions)
+    return depths
+def histogram_by_depth(depths, mol_id, config):
+    if len(depths) == 0:
+        print('Error no depths')
+        return None
+    # Count frequency of each depth
+    counter = Counter(depths)
+    bins, counts = zip(*sorted(counter.items()))
+    # Plot the histogram
+    plt.bar(bins, counts, width=0.5, color='skyblue', edgecolor='black')
+    plt.xlabel('Number of reactions')
+    plt.ylabel('Frequency')
+    plt.title(f'Frequency Histogram of Number of reactions in one tree of total {len(depths)}')
+    plt.xticks(bins)
+    # plt.show()
+    plt.savefig(f'histograms/by_depth_mol{mol_id}_{config}.png', dpi=100)
+def group_routes_by_depth(depths):
+    """
+    Group route IDs by their reaction count (depth).
+    Args:
+        depths: Dictionary with node_ids as keys and reaction tuples as values
+    Returns:
+        dict: Dictionary with depths as keys and lists of node_ids as values
+    """
+    depth_groups = {}
+    for node_id, reactions in depths.items():
+        depth = len(reactions)
+        if depth not in depth_groups:
+            depth_groups[depth] = []
+        depth_groups[depth].append(node_id)
+    return depth_groups
+def create_route_svg(tree, node_ids, mol_id, config, depths, depth=None):
+    """Create SVG file for specified routes with optimized spacing."""
+    # First pass: analyze all SVGs to find maximum width
+    max_width_cm = 0
+    all_route_svgs = []  # Store SVGs to avoid calling get_route_svg twice
+    for g in node_ids:
+        route_svg = get_route_svg(tree, g)
+        all_route_svgs.append(route_svg)
+        # Extract the actual SVG content
+        svg_match = re.search(r'<svg[^>]*>', route_svg)
+        if svg_match:
+            svg_header = svg_match.group(0)
+            # Try to get width from cm attribute
+            width_match = re.search(r'width="([0-9.]+)cm"', svg_header)
+            if width_match:
+                try:
+                    width_cm = float(width_match.group(1))
+                    max_width_cm = max(max_width_cm, width_cm)
+                except ValueError:
+                    pass
+    # Convert cm to pixels (1cm ≈ 37.8 pixels)
+    CM_TO_PX = 37.8
+    max_width_px = max_width_cm * CM_TO_PX
+    # Add margins
+    left_margin = 50
+    right_margin = 100
+    composite_width = max_width_px + left_margin + right_margin
+    # Continue with SVG creation using calculated width
+    vertical_spacing = 20
+    text_height = 20
+    route_spacing = 250
+    current_y = 30
+    entries = []
+    size = len(node_ids)
+    for num, (g, route_svg_str) in enumerate(zip(node_ids, all_route_svgs), 1):
+        # Calculate dimensions
+        route_px_height = 200
+        # Create entry with optimized spacing
+        entry_parts = []
+        entry_parts.append(f'<g transform="translate({left_margin}, {current_y})">')
+        entry_parts.append(f'  <text x="0" y="{text_height}" font-size="12" fill="black">{num}  (Node ID: {g}, Number of reactions: {len(depths[g])})</text>')
+        inner_y = text_height + 25
+        entry_parts.append(f'  <g transform="translate(0, {inner_y})">{route_svg_str}</g>')
+        total_entry_height = inner_y + route_px_height + 250
+        entry_parts.append('</g>')
+        entry_block = "\n".join(entry_parts)
+        entry_bottom_y = current_y + total_entry_height
+        entries.append((entry_block, entry_bottom_y))
+        current_y = entry_bottom_y + route_spacing - 50
+    # Create master SVG with adjusted dimensions
+    master_width = composite_width
+    master_height = current_y + vertical_spacing
+    final_parts = []
+    for entry_block, bottom_y in entries:
+        final_parts.append(entry_block)
+        final_parts.append(f'<line x1="0" y1="{bottom_y}" x2="{master_width}" y2="{bottom_y}" stroke="black" stroke-width="1" />')
+    master_svg = f'<svg xmlns="http://www.w3.org/2000/svg" width="{master_width}" height="{master_height}" viewBox="0 0 {master_width} {master_height}">\n'
+    master_svg += "\n".join(final_parts)
+    master_svg += "\n</svg>"
+    # Save file with appropriate name
+    if depth is None:
+        path_name = f"./routes_img/mol_{mol_id}/mol{mol_id}_{config}_all_{size}.svg"
+    else:
+        path_name = f"./routes_img/mol_{mol_id}/mol{mol_id}_{config}_depth_{depth}_{size}.svg"
+    with open(path_name, "w") as f:
+        f.write(master_svg)
+    print(f"Saved: {path_name}")
+def create_route_svg_cluster(tree, node_ids, mol_id, config, depths, cluster_num):
+    """
+    Create SVG file for specified routes with optimized spacing, grouped by cluster.
+    """
+    # First pass: analyze all SVGs to find maximum width
+    max_width_cm = 0
+    all_route_svgs = []  # Store SVGs to avoid calling get_route_svg twice
+    for g in node_ids:
+        route_svg = get_route_svg(tree, g)
+        all_route_svgs.append(route_svg)
+        # Extract the actual SVG content
+        svg_match = re.search(r'<svg[^>]*>', route_svg)
+        if svg_match:
+            svg_header = svg_match.group(0)
+            # Try to get width from cm attribute
+            width_match = re.search(r'width="([0-9.]+)cm"', svg_header)
+            if width_match:
+                try:
+                    width_cm = float(width_match.group(1))
+                    max_width_cm = max(max_width_cm, width_cm)
+                except ValueError:
+                    pass
+    # Convert cm to pixels (1cm ≈ 37.8 pixels)
+    CM_TO_PX = 37.8
+    max_width_px = max_width_cm * CM_TO_PX
+    # Add margins
+    left_margin = 50
+    right_margin = 100
+    composite_width = max_width_px + left_margin + right_margin
+    # Continue with SVG creation using calculated width
+    vertical_spacing = 20
+    text_height = 20
+    route_spacing = 250
+    current_y = 30
+    entries = []
+    size = len(node_ids)
+    for num, (g, route_svg_str) in enumerate(zip(node_ids, all_route_svgs), 1):
+        # Calculate dimensions
+        route_px_height = 200
+        # Create entry with optimized spacing
+        entry_parts = []
+        entry_parts.append(f'<g transform="translate({left_margin}, {current_y})">')
+        entry_parts.append(f'  <text x="0" y="{text_height}" font-size="12" fill="black">{num}  (Node ID: {g}, Number of reactions: {len(depths[g])})</text>')
+        inner_y = text_height + 25
+        entry_parts.append(f'  <g transform="translate(0, {inner_y})">{route_svg_str}</g>')
+        total_entry_height = inner_y + route_px_height + 350
+        entry_parts.append('</g>')
+        entry_block = "\n".join(entry_parts)
+        entry_bottom_y = current_y + total_entry_height
+        entries.append((entry_block, entry_bottom_y))
+        current_y = entry_bottom_y + route_spacing - 50
+    # Create master SVG with adjusted dimensions
+    master_width = composite_width
+    master_height = current_y + vertical_spacing
+    final_parts = []
+    for entry_block, bottom_y in entries:
+        final_parts.append(entry_block)
+        final_parts.append(f'<line x1="0" y1="{bottom_y}" x2="{master_width}" y2="{bottom_y}" stroke="black" stroke-width="1" />')
+    master_svg = f'<svg xmlns="http://www.w3.org/2000/svg" width="{master_width}" height="{master_height}" viewBox="0 0 {master_width} {master_height}">\n'
+    master_svg += "\n".join(final_parts)
+    master_svg += "\n</svg>"
+    # Save file with cluster-specific name
+    path_name = f"./routes_img/mol_{mol_id}/mol{mol_id}_{config}_cluster_{cluster_num}_{size}.svg"
+    with open(path_name, "w") as f:
+        f.write(master_svg)
+    print(f"Saved: {path_name}")
+def save_route_images(tree, depths, mol_id=1, config=1, cluster_dict=None):
+    """
+    Save route images grouped by depth and/or cluster.
+    Args:
+        tree: Synthesis tree
+        routes: Dictionary of routes
+        depths: Dictionary of reaction depths
+        mol_id: Molecule ID
+        config: Configuration value
+        cluster_dict: Optional dictionary mapping cluster numbers to lists of node_ids
+    """
+    # Create directory if it doesn't exist
+    os.makedirs("./routes_img", exist_ok=True)
+    os.makedirs(f"./routes_img/mol_{mol_id}", exist_ok=True)
+    # Save complete image with all routes
+    all_node_ids = sorted(depths.keys())
+    create_route_svg(tree, all_node_ids, mol_id, config, depths)
+    # Group routes by depth and save separate images
+    depth_groups = group_routes_by_depth(depths)
+    for depth, node_ids in depth_groups.items():
+        create_route_svg(tree, sorted(node_ids), mol_id, config, depths, depth)
+    # If cluster dictionary is provided, save routes grouped by cluster
+    if cluster_dict is not None:
+        for cluster_num, node_ids in cluster_dict.items():
+            # Filter node_ids to only include those that exist in routes
+            valid_node_ids = [nid for nid in node_ids if nid in depths]
+            if valid_node_ids:
+                create_route_svg_cluster(tree, sorted(valid_node_ids),
+                                      mol_id, config, depths, cluster_num)