SynPlanner

Sleeping

App Files Files Community

Gilmullin Almaz commited on Mar 21, 2025

Commit

1c0b880

2 Parent(s): 27a7101 a227a1b

Merge branch 'main' of https://huggingface.co/spaces/Protolaw/SynPlanner

Browse files

Files changed (1) hide show

app.py +228 -2

app.py CHANGED Viewed

@@ -275,9 +275,235 @@ if st.session_state.planning_done and st.session_state.res is not None and st.se
                 image_counter += 1
                 num_steps = len(tree.synthesis_route(node_id))
                 route_score = round(tree.route_score(node_id), 3)
-                st.image(get_route_svg(tree, node_id),
-                        caption=f"Route {node_id}; {num_steps} steps; Route score: {route_score}")
         stat_col, download_col = st.columns(2, gap="medium")
         with stat_col:
             st.subheader("Statistics")

                 image_counter += 1
                 num_steps = len(tree.synthesis_route(node_id))
                 route_score = round(tree.route_score(node_id), 3)
+                st.image(get_route_svg(tree, node_id), caption=f"Route {node_id}; {num_steps} steps; Route score: {route_score}")
+        ### Modified part
+        # cluster_box, z = st.columns(2, gap="medium")
+        # with cluster_box:
+        #     num_clusters = st.slider('Number of clusters to display', min_value=2, max_value=10, value=2)
+        # submit_clustering = st.button('Start clustering')
+        # if submit_clustering:
+        #     st.subheader("Examples of clusters")
+        #     super_cgrs_dict = reassign_nums(tree)
+        #     reduced_super_cgrs_dict = process_all_rs_cgrs(super_cgrs_dict)
+        #     mfp = MorganFingerprint()
+        #     results = cluster_molecules(reduced_super_cgrs_dict, mfp, max_clusters=num_clusters)
+        # cluster_box, z = st.columns(2, gap="medium")
+        # with cluster_box:
+        #     # Initialize session state if not exists
+        #     if 'memory_warning_shown' not in st.session_state:
+        #         st.session_state.memory_warning_shown = False
+        #     current_memory = psutil.Process().memory_info().rss / 1024 / 1024
+        #     st.write(f"Current memory usage: {current_memory:.2f} MB")
+        #     st.write(f"Number of winning nodes: {len(tree.winning_nodes)}")
+        #     # Memory warning
+        #     if current_memory > 1000 and not st.session_state.memory_warning_shown:
+        #         st.warning("Memory usage is high. Consider reducing the number of routes or clearing cache.")
+        #         st.session_state.memory_warning_shown = True
+        #     # Store the previous value in session state
+        #     if 'prev_num_clusters' not in st.session_state:
+        #         st.session_state.prev_num_clusters = 2
+        #     num_clusters = st.slider(
+        #         'Number of clusters to display',
+        #         min_value=2,
+        #         max_value=min(10, len(tree.winning_nodes)),
+        #         value=st.session_state.prev_num_clusters
+        #     )
+        #     # Update the stored value only if it changed
+        #     if num_clusters != st.session_state.prev_num_clusters:
+        #         st.session_state.prev_num_clusters = num_clusters
+        # submit_clustering = st.button('Start clustering')
+        # if submit_clustering:
+        #     try:
+        #         with st.spinner("Processing clusters..."):
+        #             # Clear memory before starting
+        #             gc.collect()
+        #             st.write("Starting clustering process...")
+        #             memory_before = psutil.Process().memory_info().rss / 1024 / 1024
+        #             st.write(f"Memory before clustering: {memory_before:.2f} MB")
+        #             super_cgrs_dict = reassign_nums(tree)
+        #             del tree  # Free up memory from the tree object since we don't need it anymore
+        #             gc.collect()
+        #             reduced_super_cgrs_dict = process_all_rs_cgrs(super_cgrs_dict)
+        #             del super_cgrs_dict  # Free up memory
+        #             gc.collect()
+        #             memory_after = psutil.Process().memory_info().rss / 1024 / 1024
+        #             st.write(f"Memory after CGR processing: {memory_after:.2f} MB")
+        #             mfp = MorganFingerprint()
+        #             results = cluster_molecules(reduced_super_cgrs_dict, mfp, max_clusters=num_clusters)
+        #             del reduced_super_cgrs_dict  # Free up memory
+        #             gc.collect()
+        #             st.write("Clustering completed")
+        #     except Exception as e:
+        #         st.error(f"Clustering failed with error: {str(e)}")
+        #         st.write(f"Memory at error: {psutil.Process().memory_info().rss / 1024 / 1024:.2f} MB")
+        #         raise e
+            # Access results
+            # clusters = results['clusters_dict']
+            # for cluster_num, node_id_list in clusters.items():
+            #     st.markdown(f"Cluster's number: ``{cluster_num}``")
+            #     node_id = node_id_list[0]
+            #     num_steps = len(tree.synthesis_route(node_id))
+            #     route_score = round(tree.route_score(node_id), 3)
+            #     st.image(get_route_svg(tree, node_id), caption=f"Route {node_id}; {num_steps} steps; Route score: {route_score}")
+        @st.cache_data(hash_funcs={Tree: lambda _: None})
+        def prepare_clustering_data(tree):
+            try:
+                # Log the start and basic info from the Tree
+                print("Starting clustering data preparation.")
+                total_nodes = len(tree.winning_nodes)
+                print(f"Total winning nodes: {total_nodes}")
+                print(f"Tree id: {id(tree)}")
+                chunk_size = 10
+                super_cgrs_dict = {}
+                # Process winning nodes in chunks
+                for i in range(0, total_nodes, chunk_size):
+                    current_chunk = list(tree.winning_nodes)[i:i+chunk_size]
+                    print(f"Processing chunk {i // chunk_size + 1}: Nodes {current_chunk}")
+                    temp_dict = {}
+                    for node in current_chunk:
+                        try:
+                            # Log before processing each node
+                            print(f"Processing node {node}")
+                            route = tree.synthesis_route(node)
+                            temp_dict[node] = route
+                            print(f"Node {node} processed successfully (route length: {len(route)}).")
+                        except Exception as e:
+                            print(f"Error processing node {node}: {e}")
+                    # Log before calling reassign_nums_chunk
+                    print(f"Calling reassign_nums_chunk for nodes: {list(temp_dict.keys())}")
+                    chunk_super_cgrs = reassign_nums_chunk(temp_dict)
+                    super_cgrs_dict.update(chunk_super_cgrs)
+                    print(f"Chunk {i // chunk_size + 1} processed. Keys: {list(chunk_super_cgrs.keys())}")
+                    del temp_dict
+                    gc.collect()
+                # Process reduced CGRs in chunks
+                reduced_super_cgrs_dict = {}
+                for i in range(0, len(super_cgrs_dict), chunk_size):
+                    keys = list(super_cgrs_dict.keys())[i:i+chunk_size]
+                    chunk_dict = {k: super_cgrs_dict[k] for k in keys}
+                    print(f"Reducing chunk for keys: {keys}")
+                    reduced_chunk = process_all_rs_cgrs(chunk_dict)
+                    reduced_super_cgrs_dict.update(reduced_chunk)
+                    print(f"Reduced chunk processed for keys: {list(reduced_chunk.keys())}")
+                    del chunk_dict
+                    gc.collect()
+                print("Clustering data preparation complete.")
+                return reduced_super_cgrs_dict
+            except Exception as e:
+                print(f"Error in prepare_clustering_data: {str(e)}")
+                st.error(f"Error in prepare_clustering_data: {str(e)}")
+                return None
+        def memory_status():
+            """Get current memory status"""
+            process = psutil.Process()
+            memory = process.memory_info().rss / 1024 / 1024
+            return f"Memory usage: {memory:.2f} MB"
+        # Initialize session state for tree and clustering data
+        if 'tree_data' not in st.session_state:
+            st.session_state.tree_data = tree
+        if 'clustering_state' not in st.session_state:
+            st.session_state.clustering_state = {
+                'prepared': False,
+                'data': None,
+                'num_clusters': 2
+            }
+        cluster_box, z = st.columns(2, gap="medium")
+        with cluster_box:
+            st.write(memory_status())
+            st.write(f"Number of winning nodes: {len(st.session_state.tree_data.winning_nodes)}")
+            # Step 1: Prepare Data Button
+            if not st.session_state.clustering_state['prepared']:
+                if st.button('Step 1: Prepare clustering data'):
+                    with st.spinner("Preparing data..."):
+                        try:
+                            st.session_state.clustering_state['data'] = prepare_clustering_data(st.session_state.tree_data)
+                            st.session_state.clustering_state['prepared'] = True
+                            st.success("Data prepared! Now you can proceed to Step 2.")
+                        except Exception as e:
+                            st.error(f"Preparation failed: {str(e)}")
+            # Step 2: Only show clustering controls if data is prepared
+            if st.session_state.clustering_state['prepared']:
+                st.markdown("### Step 2: Select number of clusters")
+                # Store slider value in session state
+                st.session_state.clustering_state['num_clusters'] = st.slider(
+                    'Number of clusters',
+                    min_value=2,
+                    max_value=min(10, len(st.session_state.tree_data.winning_nodes)),
+                    value=st.session_state.clustering_state['num_clusters']
+                )
+                # Step 3: Generate Clusters Button
+                if st.button('Step 3: Generate clusters'):
+                    with st.spinner("Clustering..."):
+                        try:
+                            results = perform_clustering(
+                                st.session_state.clustering_state['data'],
+                                st.session_state.clustering_state['num_clusters']
+                            )
+                            if results:
+                                st.success("Clustering complete!")
+                                for cluster_num, node_ids in results['clusters_dict'].items():
+                                    with st.expander(f"Cluster {cluster_num}"):
+                                        if node_ids:
+                                            node_id = node_ids[0]
+                                            num_steps = len(st.session_state.tree_data.synthesis_route(node_id))
+                                            route_score = round(st.session_state.tree_data.route_score(node_id), 3)
+                                            st.image(
+                                                get_route_svg(st.session_state.tree_data, node_id),
+                                                caption=f"Route {node_id}; {num_steps} steps; Score: {route_score}"
+                                            )
+                        except Exception as e:
+                            st.error(f"Clustering failed: {str(e)}")
+            # Clear memory button
+            if st.button('Clear memory and start over'):
+                st.cache_data.clear()
+                del st.session_state.clustering_state
+                del st.session_state.tree_data
+                gc.collect()
+                st.success("Memory cleared! Please refresh the page to start over.")
+                st.rerun()
         stat_col, download_col = st.columns(2, gap="medium")
         with stat_col:
             st.subheader("Statistics")