SynPlanner

Sleeping

App Files Files Community

Gilmullin Almaz commited on Apr 11, 2025

Commit

f2f3593

1 Parent(s): 2830c50

debugging

Browse files

Files changed (5) hide show

cluster/{super_cgr.py → generalized_cgr.py} +0 -0
cluster/reduced_g_cgr.py +159 -0
cluster/rs_cgr.py +0 -40
cluster/utils.py +285 -8
cluster/visualize.py +211 -9

cluster/{super_cgr.py → generalized_cgr.py} RENAMED Viewed

File without changes

cluster/reduced_g_cgr.py ADDED Viewed

	@@ -0,0 +1,159 @@

+from CGRtools.containers.bonds import DynamicBond
+def reducing_g_cgr(g_cgr):
+    """
+    Reduces a Generalized Condensed Graph of reaction (G-CGR) by performing the following steps:
+    1. Extracts substructures corresponding to connected components from the input G-CGR.
+    2. Selects the first substructure as the target to work on.
+    3. Iterates over all bonds in the target G-CGR:
+       - If a bond is identified as a "leaving group" (its primary order is None while its original order is defined),
+         the bond is removed.
+       - If a bond has a modified order (both primary and original orders are integers) and the primary order is less than the original,
+         the bond is deleted and then re-added with a new dynamic bond using the primary order (this updates the bond to the reduced form).
+    4. After bond modifications, re-extracts the substructure from the target G-CGR (now called the reduced G-CGR or RG-CGR).
+    5. If the charge distributions (_p_charges vs. _charges) differ, neutralizes the charges by setting them to zero.
+    Finally, returns the reduced G-CGR.
+    """
+    # Get all connected components of the G-CGR as separate substructures.
+    cgr_prods = [g_cgr.substructure(c) for c in g_cgr.connected_components]
+    target_cgr = cgr_prods[0]  # Choose the first substructure (main product) for further reduction.
+    # Iterate over each bond in the target G-CGR.
+    bond_items = list(target_cgr._bonds.items())
+    for atom1, bond_set in bond_items:
+        bond_set_items = list(bond_set.items())
+        for atom2, bond in bond_set_items:
+            # Removing bonds corresponding to leaving groups:
+            # If product bond order is None (indicating a leaving group) but an original bond order exists,
+            # delete the bond.
+            if bond.p_order is None and bond.order is not None:
+                target_cgr.delete_bond(atom1, atom2)
+            # For bonds that have been modified (not leaving groups) where the new (primary) order is less than the original:
+            # Remove the bond and re-add it using the DynamicBond with the primary order for both bond orders.
+            elif type(bond.p_order) is int and type(bond.order) is int and bond.p_order != bond.order:
+                p_order = int(bond.p_order)
+                target_cgr.delete_bond(atom1, atom2)
+                target_cgr.add_bond(atom1, atom2, DynamicBond(p_order, p_order))
+    # After modifying bonds, extract the reduced G-CGR from the target's connected components.
+    rg_cgr = [target_cgr.substructure(c) for c in target_cgr.connected_components][0]
+    # Neutralize charges if the primary charges and current charges differ.
+    if rg_cgr._p_charges != rg_cgr._charges:
+        for num, charge in rg_cgr._charges.items():
+            if charge != 0:
+                rg_cgr._atoms[num].charge = 0
+    return rg_cgr
+def process_all_rg_cgrs(g_cgrs_dict):
+    """
+    Processes a collection (dictionary) of G-CGRs to generate their reduced forms (RG-CGRs).
+    Iterates over each G-CGR in the provided dictionary and applies the reducing_g_cgr function.
+    Note: There is an apparent bug in the code since it uses an undefined variable 'super_cgrs_dict'
+    and assigns to 'all_rs_cgrs' instead of 'all_rg_cgrs'. The intended behavior is to iterate over
+    the input dictionary (g_cgrs_dict) and store the reduced RG-CGR for each key.
+    Returns:
+        A dictionary where each key corresponds to the RG-CGR obtained from the input G-CGR.
+    """
+    all_rg_cgrs = dict()
+    for num, cgr in g_cgrs_dict.items():
+        all_rg_cgrs[num] = reducing_g_cgr(cgr)
+    return all_rg_cgrs
+def report_strategic_bonds(result, target_cgr):
+    """
+    Reports strategic bonds from a provided result list.
+    Each element in 'result' is expected to be a list with two elements:
+        - A tuple (atom pair) indicating the connected atoms.
+        - The primary bond order (p_order) associated with that bond.
+    The function prints out the atoms (accessed from target_cgr._atoms) and the bond order.
+    """
+    for value in result:
+        atom_pair = value[0]
+        # Print the two atoms and the associated primary bond order.
+        print('\t', target_cgr._atoms[atom_pair[0]], target_cgr._atoms[atom_pair[1]], value[1])
+def extract_strategic_bonds(target_cgr, report=True):
+    """
+    Extracts and optionally reports strategic bonds from a reduced G-CGR (RG-CGR).
+    Strategic bonds are defined as those with:
+        - No current bond order (order is None) but a defined primary bond order (p_order is not None).
+    The function goes through all bonds in the target_cgr, collects each unique bond (avoiding duplicates by using a set)
+    along with its primary bond order, and optionally prints them out.
+    Returns:
+        A list where each element is a pair: [bond_key (tuple of atom indices), primary bond order]
+    """
+    result = []
+    seen = set()
+    # Loop through all bonds in the RG-CGR.
+    for atom1, bond_set in target_cgr._bonds.items():
+        for atom2, bond in bond_set.items():
+            # Check for strategic bonds (order undefined but p_order defined).
+            if bond.order is None and bond.p_order is not None:
+                # Create a sorted tuple of the atom pair to ensure uniqueness.
+                bond_key = tuple(sorted((atom1, atom2)))
+                if bond_key not in seen:
+                    seen.add(bond_key)
+                    result.append([bond_key, bond.p_order])
+    # If reporting is enabled, print the strategic bonds.
+    if report:
+        print('Strategic bonds in RG-CGR:')
+        report_strategic_bonds(result, target_cgr)
+    return result
+def compare_rg_cgr_by_strategic_bonds(rg_cgr1, rg_cgr2, report=True):
+    """
+    Compares two reduced G-CGRs (RG-CGRs) based on their strategic bonds.
+    The function performs the following steps:
+    1. Extracts the list of strategic bonds for each RG-CGR.
+    2. Converts each list into a set of tuples (bond key and bond order) for easy set operations.
+    3. Identifies common bonds, and bonds unique to each RG-CGR.
+    4. Converts these sets back into lists for reporting.
+    5. Prints out the common bonds, bonds unique to the first RG-CGR, and bonds unique to the second RG-CGR.
+    The reporting uses the report_strategic_bonds function to output the atom details and bond orders.
+    """
+    # Extract strategic bonds from both RG-CGRs without reporting.
+    l1 = extract_strategic_bonds(rg_cgr1, report=False)
+    l2 = extract_strategic_bonds(rg_cgr2, report=False)
+    # Create sets of (atom pair, bond order) tuples for both RG-CGRs.
+    set_l1 = { (tuple(item[0]), item[1]) for item in l1 }
+    set_l2 = { (tuple(item[0]), item[1]) for item in l2 }
+    # Identify common bonds and bonds unique to each list.
+    common = set_l1 & set_l2
+    unique_l1 = set_l1 - set_l2
+    unique_l2 = set_l2 - set_l1
+    # Convert the sets back to list format for reporting.
+    common_list = [ [atom_pair, order] for atom_pair, order in common ]
+    unique_l1_list = [ [atom_pair, order] for atom_pair, order in unique_l1 ]
+    unique_l2_list = [ [atom_pair, order] for atom_pair, order in unique_l2 ]
+    if report:
+        print("Common:")
+        report_strategic_bonds(common_list, rg_cgr1)
+        print("Unique for first RG-CGR:")
+        report_strategic_bonds(unique_l1_list, rg_cgr1)
+        print("Unique for second RG-CGR:")
+        report_strategic_bonds(unique_l2_list, rg_cgr1)

cluster/rs_cgr.py DELETED Viewed

@@ -1,40 +0,0 @@
-from CGRtools.containers.bonds import DynamicBond
-def s_cgr2rs_cgr(s_cgr):
-    cgr_prods = [s_cgr.substructure(c) for c in s_cgr.connected_components]
-    target_cgr = cgr_prods[0]
-    bond_items = list(target_cgr._bonds.items())
-    for atom1, bond_set in bond_items:
-        bond_set_items = list(bond_set.items())
-        for atom2, bond in bond_set_items:
-            # Leaving groups removal
-            if bond.p_order == None and bond.order is not None:
-                # print(atom1, atom2)
-                # print(bond)
-                target_cgr.delete_bond(atom1, atom2)
-                # target_cgr.clean2d()
-                # display(SVG(target_cgr.depict()))
-            ## Modified bond, but not leaving group
-            elif type(bond.p_order) is int and type(bond.order) is int and bond.p_order < bond.order:
-                p_order = int(bond.p_order)
-                target_cgr.delete_bond(atom1, atom2)
-                target_cgr.add_bond(atom1, atom2, DynamicBond(p_order, p_order))
-    rs_cgr = [target_cgr.substructure(c) for c in target_cgr.connected_components][0]
-    # Charge neutralizer
-    if rs_cgr._p_charges != rs_cgr._charges:
-        for num, charge in rs_cgr._charges.items():
-            if charge != 0:
-                rs_cgr._atoms[num].charge = 0
-    return rs_cgr
-def process_all_rs_cgrs(super_cgrs_dict):
-    all_rs_cgrs = dict()
-    for num, cgr in super_cgrs_dict.items():
-        all_rs_cgrs[num] = s_cgr2rs_cgr(cgr)
-    return all_rs_cgrs

cluster/utils.py CHANGED Viewed

@@ -1,3 +1,9 @@
 def extract_reactions(tree):
     reactions_dict = {}
     for node_id in set(tree.winning_nodes):
@@ -5,14 +11,55 @@ def extract_reactions(tree):
         reactions_dict[node_id] = reactions
     return reactions_dict
 class TreeWrapper:
-    def __init__(self, tree):
         self.tree = tree
     def __getstate__(self):
         state = self.__dict__.copy()
-        # Save the state of the tree
         tree_state = self.tree.__dict__.copy()
         # Reset or remove non-pickleable attributes (e.g., _tqdm, policy_network, value_network)
         if '_tqdm' in tree_state:
@@ -20,18 +67,248 @@ class TreeWrapper:
         for attr in ['policy_network', 'value_network']:
             if attr in tree_state:
                 tree_state[attr] = None
-        # Store tree state separately
         state['tree_state'] = tree_state
-        # Remove the actual tree instance from the state
         del state['tree']
         return state
     def __setstate__(self, state):
-        # Retrieve the stored tree state
         tree_state = state.pop('tree_state')
-        # Update the instance state
         self.__dict__.update(state)
-        # Create a new Tree instance without calling __init__
         new_tree = Tree.__new__(Tree)
         new_tree.__dict__.update(tree_state)
-        self.tree = new_tree

+from synplan.mcts.tree import Tree
+from synplan.utils.visualisation import get_route_svg
+from CGRtools.containers import MoleculeContainer
+import pickle
+import os
 def extract_reactions(tree):
     reactions_dict = {}
     for node_id in set(tree.winning_nodes):
         reactions_dict[node_id] = reactions
     return reactions_dict
+def extract_rules_from_route(node_id, tree):
+    nodes = tree.route_to_node(node_id)
+    found_rules_ids = []
+    for i in range(len(nodes)):
+        precursor = nodes[i].new_precursors[0]
+        if len(precursor) != 0:
+            if 'reactor_id' in precursor.molecule.meta.keys():
+                found_rules_ids.append(precursor.molecule.meta['reactor_id'])
+    return found_rules_ids[::-1]
+def save_smarts(mol_id, config, reactions_dict):
+    with open(f'smarts/smarts_mol_{mol_id}_{config}.txt', "w") as file:
+        for node_id, reactions in reactions_dict.items():
+            file.write(f"{node_id}\n")
+            for reaction in reactions:
+                file.write(f"{reaction}\n")
+def get_highest_route_nodes(tree, node_dict):
+    highest_nodes = {}
+    for key, node_ids in node_dict.items():
+        max_score = float('-inf')
+        best_nodes = []
+        for node_id in node_ids:
+            score = round(tree.route_score(node_id), 3)
+            if score > max_score:
+                max_score = score
+                best_nodes = [node_id]
+            elif score == max_score:
+                best_nodes.append(node_id)
+        highest_nodes[key] = best_nodes
+    return highest_nodes
 class TreeWrapper:
+    BASE_DIR = 'forest'
+    def __init__(self, tree, mol_id, config):
+        """Initializes the TreeWrapper."""
         self.tree = tree
+        self.mol_id = mol_id
+        self.config = config
+        # Ensure the directory exists before creating the filename
+        os.makedirs(self.BASE_DIR, exist_ok=True)
+        self.filename = os.path.join(self.BASE_DIR, f'tree_{mol_id}_{config}.pkl')
     def __getstate__(self):
         state = self.__dict__.copy()
         tree_state = self.tree.__dict__.copy()
         # Reset or remove non-pickleable attributes (e.g., _tqdm, policy_network, value_network)
         if '_tqdm' in tree_state:
         for attr in ['policy_network', 'value_network']:
             if attr in tree_state:
                 tree_state[attr] = None
         state['tree_state'] = tree_state
         del state['tree']
         return state
     def __setstate__(self, state):
         tree_state = state.pop('tree_state')
         self.__dict__.update(state)
         new_tree = Tree.__new__(Tree)
         new_tree.__dict__.update(tree_state)
+        self.tree = new_tree
+    def save_tree(self):
+        """Saves the TreeWrapper instance (including the tree state) to a file."""
+        try:
+            with open(self.filename, 'wb') as f:
+                pickle.dump(self, f)
+            print(f"Tree wrapper for mol_id '{self.mol_id}', config '{self.config}' saved to '{self.filename}'.")
+        except Exception as e:
+            print(f"Error saving tree to {self.filename}: {e}")
+    @classmethod
+    def load_tree_from_id(cls, mol_id, config):
+        """
+        Loads a Tree object from a saved file using mol_id and config.
+        Args:
+            mol_id: The molecule ID used for saving.
+            config: The configuration used for saving.
+        Returns:
+            The loaded Tree object, or None if loading fails.
+        """
+        filename = os.path.join(cls.BASE_DIR, f'tree_{mol_id}_{config}.pkl')
+        print(f"Attempting to load tree from: {filename}")
+        try:
+            # Ensure the 'Tree' class is defined in the current scope
+            if 'Tree' not in globals() and 'Tree' not in locals():
+                 raise NameError("The 'Tree' class definition is required to load the object.")
+            with open(filename, 'rb') as f:
+                loaded_wrapper = pickle.load(f) # This implicitly calls __setstate__
+            # Check if the loaded object is indeed a TreeWrapper instance (optional sanity check)
+            if not isinstance(loaded_wrapper, cls):
+                print(f"Warning: Loaded object from {filename} is not a TreeWrapper instance.")
+                return None # Or raise an error
+            print(f"Tree object for mol_id '{mol_id}', config '{config}' successfully loaded from '{filename}'.")
+            # The __setstate__ method already reconstructed the tree inside the wrapper
+            return loaded_wrapper.tree
+        except FileNotFoundError:
+            print(f"Error: File not found at {filename}")
+            return None
+        except (pickle.UnpicklingError, EOFError) as e:
+            print(f"Error: Could not unpickle file {filename}. It might be corrupted or empty. Details: {e}")
+            return None
+        except NameError as e:
+             print(f"Error during loading: {e}. Ensure 'Tree' class is defined.")
+             return None
+        except Exception as e:
+            print(f"An unexpected error occurred loading tree from {filename}: {e}")
+            return None
+def generate_cluster_html(
+        tree: Tree,
+        cluster_node_ids: list,
+        cluster_num: int,
+        rg_cgrs_dict: dict, # <--- New parameter
+        aam: bool = False,
+    ) -> str:
+        # ... (initial setup, validation, filtering routes remains the same) ...
+    """
+    Generates an HTML page report for a specific cluster's synthesis routes.
+    :param tree: The built MCTS tree.
+    :param cluster_node_ids: List of route node IDs belonging to this cluster.
+    :param cluster_num: The identifier number for this cluster (used in title/header).
+    :param aam: If True, depict atom-to-atom mapping in route SVGs.
+    # :param scg_svg: Optional SVG string for the cluster's representative SCG.
+    :return: A string containing the complete HTML report.
+    """
+    # --- Depict Settings (Optional: Keep if get_route_svg depends on it) ---
+    # Uncomment if MoleculeContainer is used and needed:
+    try:
+        if aam:
+            MoleculeContainer.depict_settings(aam=True)
+        else:
+            MoleculeContainer.depict_settings(aam=False)
+    except NameError:
+         # If MoleculeContainer isn't available/needed, just pass
+         pass
+    except Exception as e:
+         print(f"Warning: Error setting MoleculeContainer depict settings: {e}")
+    # --- Validate Input ---
+    if not isinstance(cluster_node_ids, list):
+        return "<html><body>Error: cluster_node_ids must be a list.</body></html>"
+    if not tree or not isinstance(tree, Tree):
+        return "<html><body>Error: Invalid tree object provided.</body></html>"
+    # Filter out node IDs not actually present or not solved in the tree
+    valid_routes_in_cluster = []
+    for node_id in cluster_node_ids:
+        if node_id in tree.nodes and tree.nodes[node_id].is_solved():
+             valid_routes_in_cluster.append(node_id)
+        # Optionally log or warn about invalid/unsolved nodes removed
+    if not valid_routes_in_cluster:
+        # Return a minimal HTML page indicating no valid routes
+        return f"""
+        <!doctype html><html lang="en"><head><meta charset="utf-8">
+        <title>Cluster {cluster_num} Report</title></head><body>
+        <h3>Cluster {cluster_num} Report</h3>
+        <p>No valid/solved routes found for this cluster.</p>
+        </body></html>"""
+    # --- HTML Templates & Tags ---
+    # (Keep tags like th, td, fonts as they were)
+    th = '<th style="text-align: left; background-color:#978785; border: 1px solid black; border-spacing: 0">'
+    td = '<td style="text-align: left; border: 1px solid black; border-spacing: 0">'
+    # font_red = "<font color='red' style='font-weight: bold'>" # Consider using CSS classes instead
+    # font_green = "<font color='light-green' style='font-weight: bold'>"
+    font_head = "<font style='font-weight: bold; font-size: 18px'>"
+    font_normal = "<font style='font-weight: normal; font-size: 18px'>"
+    font_close = "</font>"
+    template_begin = f"""
+    <!doctype html>
+    <html lang="en">
+    <head>
+    <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css"
+    rel="stylesheet"
+    integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3"
+    crossorigin="anonymous">
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Cluster {cluster_num} Routes Report</title>
+    <style>
+        /* Optional: Add some basic styling */
+        .table {{ border-collapse: collapse; width: 100%; }}
+        th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
+        tr:nth-child(even) {{ background-color: #f2f2f2; }}
+        caption {{ caption-side: top; font-size: 1.5em; margin: 1em 0; }}
+        svg {{ max-width: 100%; height: auto; }} /* Make SVGs responsive */
+    </style>
+    </head>
+    <body>
+    <div class="container"> """
+    template_end = """
+    </div> <script
+    src="https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js"
+    integrity="sha384-ka7Sk0Gln4gmtz2MlQnikT1wXgYsOg+OMhuP+IlRH9sENBO0LRn5q+8nbTov4+1p"
+    crossorigin="anonymous">
+    </script>
+    </body>
+    </html>
+    """
+    box_mark = """
+    <svg width="30" height="30" viewBox="0 0 1 1" xmlns="http://www.w3.org/2000/svg" style="vertical-align: middle; margin-right: 5px;">
+    <circle cx="0.5" cy="0.5" r="0.5" fill="rgb()" fill-opacity="0.35" />
+    </svg>
+    """
+    # --- Build HTML Table ---
+    table = f"""
+    <table class="table table-striped table-hover caption-top">
+    <caption><h3>Retrosynthetic Routes Report - Cluster {cluster_num}</h3></caption>
+    <tbody>"""
+    try:
+        target_smiles_str = str(tree.nodes[1].curr_precursor) if 1 in tree.nodes else "N/A"
+    except Exception:
+        target_smiles_str = "Error retrieving target SMILES"
+    table += f"<tr>{td}{font_normal}Target Molecule: {target_smiles_str}{font_close}</td></tr>"
+    table += f"<tr>{td}{font_normal}Cluster Number: {cluster_num}{font_close}</td></tr>"
+    table += f"<tr>{td}{font_normal}Size of Cluster: {len(valid_routes_in_cluster)}{font_close} routes</td></tr>"
+    # --- Add RG-CGR Image ---
+    # Get the node_id of the first valid route in the cluster
+    first_route_id = valid_routes_in_cluster[0] if valid_routes_in_cluster else None
+    if first_route_id and rg_cgrs_dict and first_route_id in rg_cgrs_dict:
+        try:
+            rg_cgr = rg_cgrs_dict[first_route_id]
+            rg_cgr.clean2d()
+            rg_cgr_svg = rg_cgr.depict()
+            # Validate if it looks like SVG (basic check)
+            if rg_cgr_svg.strip().startswith("<svg"):
+                    table += f"<tr>{td}{font_normal}Cluster Representative RG-CGR (from Route {first_route_id}):{font_close}<br>{rg_cgr_svg}</td></tr>"
+            else:
+                    # Handle case where it's not SVG as expected
+                    table += f"<tr>{td}{font_normal}Cluster Representative RG-CGR (from Route {first_route_id}):{font_close}<br><i>Invalid SVG format retrieved.</i></td></tr>"
+                    print(f"Warning: Expected SVG for RG-CGR of node {first_route_id}, but got: {rg_cgr_svg[:100]}...") # Log a warning
+        except Exception as e:
+            table += f"<tr>{td}{font_normal}Cluster Representative RG-CGR (from Route {first_route_id}):{font_close}<br><i>Error retrieving/displaying RG-CGR: {e}</i></td></tr>"
+    else:
+        # Handle cases where RG-CGR data is missing
+        if first_route_id:
+                table += f"<tr>{td}{font_normal}Cluster Representative RG-CGR (from Route {first_route_id}):{font_close}<br><i>Not found in provided RG-CGR dictionary.</i></td></tr>"
+        else:
+                # This case shouldn't happen due to earlier check, but as fallback:
+                table += f"<tr>{td}{font_normal}Cluster Representative RG-CGR:{font_close}<br><i>No valid routes in cluster to select from.</i></td></tr>"
+    # --- Legend ---
+    table += f"""
+    <tr>{td}
+        <div style="display: flex; align-items: center; flex-wrap: wrap; gap: 15px;">
+            <span>{box_mark.replace("rgb()", "rgb(152, 238, 255)")} Target Molecule</span>
+            <span>{box_mark.replace("rgb()", "rgb(240, 171, 144)")} Molecule Not In Stock</span>
+            <span>{box_mark.replace("rgb()", "rgb(155, 250, 179)")} Molecule In Stock</span>
+        </div>
+    </td></tr>
+    """
+    # --- Add Routes for this Cluster ---
+    for route_id in valid_routes_in_cluster:
+        try:
+            svg = get_route_svg(tree, route_id)  # get SVG
+            full_route = tree.synthesis_route(route_id)  # get route steps
+            reactions = ""
+            for i, synth_step in enumerate(full_route):
+                reactions += f"<b>Step {i + 1}:</b> {str(synth_step)}<br>"
+            route_score = round(tree.route_score(route_id), 3)
+            table += (
+                f'<tr style="line-height: 1.8;">{td}{font_head}Route {route_id} | ' # Use | for separation
+                f"Steps: {len(full_route)} | "
+                f"Score: {route_score}{font_close}</td></tr>"
+            )
+            table += f"<tr>{td}{svg if svg else '<i>Error generating route visualization</i>'}</td></tr>"
+            table += f"<tr>{td}{reactions if reactions else '<i>No reaction steps found</i>'}</td></tr>"
+        except Exception as e:
+            table += f'<tr><td colspan="1" style="color: red;">Error processing route {route_id}: {e}</td></tr>' # Use colspan if needed based on final table structure
+    table += "</tbody></table>"
+    # --- Combine and Return Full HTML ---
+    full_html = template_begin + table + template_end
+    return full_html

cluster/visualize.py CHANGED Viewed

@@ -1,13 +1,187 @@
 import os
 import re
 import numpy as np
 import matplotlib.pyplot as plt
-from collections import Counter
-from synplan.utils.visualisation import get_route_svg
 import seaborn as sns
-def pie_chart(cluster_sizes):
-    labels = [f'Cluster {i+1}' for i in range(len(cluster_sizes))]
     sns.set_style("whitegrid")
@@ -16,11 +190,37 @@ def pie_chart(cluster_sizes):
         cluster_sizes, labels=None, autopct='%1.1f%%', colors=sns.color_palette("pastel"),
         startangle=140, wedgeprops={'edgecolor': 'black'}
     )
-    ax.legend(wedges, labels, title="Clusters", loc="center left", bbox_to_anchor=(1, 0.5))
     # plt.show()
     return fig
 def distribution_by_depth(tree, complex_cgr_dict):
     if len(complex_cgr_dict) == 0:
@@ -32,7 +232,7 @@ def distribution_by_depth(tree, complex_cgr_dict):
         depths[n] = len(reactions)
     return depths
-def histogram_by_depth(depths, mol_id, config):
     if len(depths) == 0:
         print('Error no depths')
@@ -47,8 +247,10 @@ def histogram_by_depth(depths, mol_id, config):
     plt.ylabel('Frequency')
     plt.title(f'Frequency Histogram of Number of reactions in one tree of total {len(depths)}')
     plt.xticks(bins)
-    # plt.show()
-    plt.savefig(f'histograms/by_depth_mol{mol_id}_{config}.png', dpi=100)
 def group_routes_by_depth(depths):
@@ -244,7 +446,7 @@ def create_route_svg_cluster(tree, node_ids, mol_id, config, depths, cluster_num
     print(f"Saved: {path_name}")
-def save_route_images(tree, depths, mol_id=1, config=1, cluster_dict=None):
     """
     Save route images grouped by depth and/or cluster.

 import os
 import re
+from collections import Counter
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
+from IPython.display import SVG, display
+import io
+import sys
+from synplan.utils.visualisation import get_route_svg
+from scipy.cluster.hierarchy import dendrogram
+from .reduced_g_cgr import extract_strategic_bonds, compare_rg_cgr_by_strategic_bonds
+def report_2_dissimilar(similarity_df, tree, rg_cgrs_dict):
+    min_index = similarity_df.stack().idxmin()
+    row_index, col_index = min_index
+    print(f'Most dissimilar routes are {row_index} and {col_index}, Tanimoto index = {"%.2f" % similarity_df[row_index][col_index]}')
+    print('Route ID', row_index)
+    rg_cgr_1 = rg_cgrs_dict[row_index]
+    rg_cgr_1.clean2d()
+    display(SVG(rg_cgr_1.depict()))
+    extract_strategic_bonds(rg_cgr_1)
+    display(SVG(get_route_svg(tree, row_index)))
+    print('Route ID', col_index)
+    rg_cgr_2 = rg_cgrs_dict[col_index]
+    rg_cgr_2.clean2d()
+    display(SVG(rg_cgr_2.depict()))
+    extract_strategic_bonds(rg_cgr_2)
+    display(SVG(get_route_svg(tree, col_index)))
+    print('Summary:')
+    compare_rg_cgr_by_strategic_bonds(rg_cgr_1, rg_cgr_2)
+def save_clusters_html(clusters, best_by_score, tree, rg_cgrs_dict, mol_id, config):
+    # Prepare a list to accumulate HTML parts for each cluster
+    os.makedirs("./final_clusters", exist_ok=True)
+    html_parts = []
+    # Loop over your clusters
+    for cluster_num, node_id_list in clusters.items():
+        parts = []  # to accumulate parts for this cluster
+        # Generate text output
+        best_route_in_cluster = best_by_score[cluster_num][0]
+        score = round(tree.route_score(best_route_in_cluster), 3)
+        parts.append(f"{cluster_num} ||| Size: {len(clusters[cluster_num])}\n")
+        parts.append(f"Example: {best_route_in_cluster}  Route score: {score}\n")
+        # Insert the first SVG immediately after its marker text
+        svg1 = get_route_svg(tree, best_route_in_cluster)
+        parts.append(svg1 + "\n")
+        # Continue with additional text and SVGs
+        parts.append("The RG-CGR:\n")
+        rg_cgr = rg_cgrs_dict[best_route_in_cluster]
+        rg_cgr.clean2d()
+        svg2 = rg_cgr.depict()
+        parts.append(svg2 + "\n")
+        # Capture output from extract_strategic_bonds, if it prints something
+        buf = io.StringIO()
+        old_stdout = sys.stdout
+        sys.stdout = buf
+        extract_strategic_bonds(rg_cgr)
+        sys.stdout = old_stdout
+        strategic_text = buf.getvalue()
+        parts.append(strategic_text + "\n")
+        # Wrap this cluster's output in a <pre> tag for formatting and add some spacing
+        cluster_html = f'<div class="cluster" style="margin-bottom: 2em;"><pre>{"".join(parts)}</pre></div>'
+        html_parts.append(cluster_html)
+    # Combine all parts into a full HTML document
+    html_content = f"""
+    <html>
+      <head>
+        <meta charset="utf-8">
+        <title>Captured Cluster Outputs</title>
+      </head>
+      <body>
+        {''.join(html_parts)}
+      </body>
+    </html>
+    """
+    # Write the HTML content to a file
+    with open(f"final_clusters/htmls/mol_{mol_id}_{config}.html", "w", encoding="utf-8") as f:
+        f.write(html_content)
+def report_2_dissimilar_to_html(similarity_df, tree, rg_cgrs_dict, mol_id=1, config=2,output_filename=None):
+    """Generates an HTML report of the two most dissimilar routes based on a similarity DataFrame."""
+    os.makedirs("./dissimilars", exist_ok=True)
+    output_filename=f"dissimilars/report_dissimilar_mol_{mol_id}_{config}.html"
+    # Identify the two most dissimilar routes
+    min_index = similarity_df.stack().idxmin()
+    row_index, col_index = min_index
+    # Capture text output in a buffer
+    buf = io.StringIO()
+    old_stdout = sys.stdout
+    sys.stdout = buf
+    print(f'Most dissimilar routes are {row_index} and {col_index}, Tanimoto index = {"%.2f" % similarity_df[row_index][col_index]}')
+    # Store HTML content
+    html_parts = []
+    # Function to capture and append text, SVGs, and function outputs
+    def capture_route_info(route_id):
+        rg_cgr = rg_cgrs_dict[route_id]
+        rg_cgr.clean2d()
+        # Capture the first SVG (RG-CGR depiction)
+        svg1 = rg_cgr.depict()
+        # Capture the second SVG (Route depiction)
+        svg2 = get_route_svg(tree, route_id)
+        # Capture output of extract_strategic_bonds
+        buf_extract = io.StringIO()
+        sys.stdout = buf_extract
+        extract_strategic_bonds(rg_cgr)
+        sys.stdout = old_stdout
+        extract_output = buf_extract.getvalue()
+        # Store text + SVGs in HTML format
+        html_parts.append(f"""
+        <div class="route-section">
+            <pre>{buf.getvalue()}</pre>
+            <div class="svg1">{svg1}</div>
+            <pre>{extract_output}</pre>
+            <div class="svg2">{svg2}</div>
+        </div>
+        """)
+        buf.truncate(0)  # Clear buffer for next route
+        buf.seek(0)
+    # Process the first route
+    capture_route_info(row_index)
+    # Process the second route
+    capture_route_info(col_index)
+    # Capture and store final summary
+    buf_summary = io.StringIO()
+    sys.stdout = buf_summary
+    compare_rg_cgr_by_strategic_bonds(rg_cgrs_dict[row_index], rg_cgrs_dict[col_index])
+    sys.stdout = old_stdout
+    summary_output = buf_summary.getvalue()
+    html_parts.append(f"<h2>Summary</h2><pre>{summary_output}</pre>")
+    # Restore standard stdout
+    sys.stdout = old_stdout
+    # Build the full HTML file
+    html_content = f"""
+    <html>
+    <head>
+        <meta charset="utf-8">
+        <title>Route Dissimilarity Report</title>
+    </head>
+    <body>
+        {''.join(html_parts)}
+    </body>
+    </html>
+    """
+    # Write the HTML file
+    with open(output_filename, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    print(f"Report saved as {output_filename}")
+def pie_chart(cluster_sizes, sub='', input_cluster_num=1, input_step_nums=None):
+    labels = [f'{sub}Cluster {i+1}' for i in range(len(cluster_sizes))]
     sns.set_style("whitegrid")
         cluster_sizes, labels=None, autopct='%1.1f%%', colors=sns.color_palette("pastel"),
         startangle=140, wedgeprops={'edgecolor': 'black'}
     )
+    ax.legend(wedges, labels, title=f"{sub}Clusters", loc="center left", bbox_to_anchor=(1, 0.5))
+    if sub == '':
+        ax.set_title(f"{sub}Cluster Size Distribution for {sum(cluster_sizes)} routes")
+    else:
+        ax.set_title(f"{sub}cluster Size Distribution for {sum(cluster_sizes)} routes in cluster {input_cluster_num} with number of steps {input_step_nums}")
+    plt.close(fig)
     # plt.show()
     return fig
+def save_dendrogram(df, Z, mol_id, config):
+    plt.figure(figsize=(14, 7)) # figsize=(14, 7)
+    dendrogram(Z, labels=df.columns, leaf_rotation=90)
+    plt.title(f"Hierarchical Clustering Dendrogram for routes generated for molecule #{mol_id}")
+    plt.xlabel("Route node id")
+    plt.ylabel("Distance (1 - Similarity)")
+    # Get current y-axis limits and add a gap below zero
+    ax = plt.gca()
+    ymin, ymax = ax.get_ylim()
+    # Add a gap that is 5% of the current y-range below zero
+    gap = 0.05 * (ymax - ymin)
+    ax.set_ylim(ymin - gap, ymax)
+    ax.grid(False)
+    ax.autoscale(enable=None, axis="x", tight=True)
+    plt.tight_layout()
+    plt.savefig(f'dendrograms/av_link_mol{mol_id}_{config}.png', dpi=100)
 def distribution_by_depth(tree, complex_cgr_dict):
     if len(complex_cgr_dict) == 0:
         depths[n] = len(reactions)
     return depths
+def histogram_by_depth(depths, mol_id=1, config=1, save=False):
     if len(depths) == 0:
         print('Error no depths')
     plt.ylabel('Frequency')
     plt.title(f'Frequency Histogram of Number of reactions in one tree of total {len(depths)}')
     plt.xticks(bins)
+    if save:
+        plt.savefig(f'histograms/by_depth_mol{mol_id}_{config}.png', dpi=100)
+    else:
+        plt.show()
 def group_routes_by_depth(depths):
     print(f"Saved: {path_name}")
+def save_route_images(tree, depths, mol_id, config, cluster_dict=None):
     """
     Save route images grouped by depth and/or cluster.