Spaces:
Running
Running
Gilmullin Almaz
commited on
Commit
·
2e221c0
1
Parent(s):
1cc1548
bugs fixed
Browse files- app.py +16 -15
- cluster/clustering.py +2 -2
- cluster/rs_cgr.py +2 -0
app.py
CHANGED
|
@@ -250,27 +250,28 @@ if submit_planning:
|
|
| 250 |
cluster_box, z = st.columns(2, gap="medium")
|
| 251 |
with cluster_box:
|
| 252 |
num_clusters = st.slider('Number of clusters to display', min_value=2, max_value=10, value=2)
|
| 253 |
-
# submit_clustering = st.button('Start clustering')
|
| 254 |
|
| 255 |
-
|
| 256 |
-
st.subheader("Examples of clusters")
|
| 257 |
-
super_cgrs_dict = reassign_nums(tree)
|
| 258 |
|
| 259 |
-
|
|
|
|
|
|
|
| 260 |
|
| 261 |
-
|
| 262 |
|
| 263 |
-
|
| 264 |
|
| 265 |
-
|
| 266 |
-
clusters = results['clusters_dict']
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
stat_col, download_col = st.columns(2, gap="medium")
|
| 276 |
|
|
|
|
| 250 |
cluster_box, z = st.columns(2, gap="medium")
|
| 251 |
with cluster_box:
|
| 252 |
num_clusters = st.slider('Number of clusters to display', min_value=2, max_value=10, value=2)
|
|
|
|
| 253 |
|
| 254 |
+
submit_clustering = st.button('Start clustering')
|
|
|
|
|
|
|
| 255 |
|
| 256 |
+
if submit_clustering:
|
| 257 |
+
st.subheader("Examples of clusters")
|
| 258 |
+
super_cgrs_dict = reassign_nums(tree)
|
| 259 |
|
| 260 |
+
reduced_super_cgrs_dict = process_all_rs_cgrs(super_cgrs_dict)
|
| 261 |
|
| 262 |
+
mfp = MorganFingerprint()
|
| 263 |
|
| 264 |
+
results = cluster_molecules(reduced_super_cgrs_dict, mfp, max_clusters=num_clusters)
|
|
|
|
| 265 |
|
| 266 |
+
# Access results
|
| 267 |
+
clusters = results['clusters_dict']
|
| 268 |
+
|
| 269 |
+
for cluster_num, node_id_list in clusters.items():
|
| 270 |
+
st.markdown(f"Cluster's number: ``{cluster_num}``")
|
| 271 |
+
node_id = node_id_list[0]
|
| 272 |
+
num_steps = len(tree.synthesis_route(node_id))
|
| 273 |
+
route_score = round(tree.route_score(node_id), 3)
|
| 274 |
+
st.image(get_route_svg(tree, node_id), caption=f"Route {node_id}; {num_steps} steps; Route score: {route_score}")
|
| 275 |
|
| 276 |
stat_col, download_col = st.columns(2, gap="medium")
|
| 277 |
|
cluster/clustering.py
CHANGED
|
@@ -95,7 +95,7 @@ def optimal_cluster_num(Z, distance_matrix, max_clusters=10):
|
|
| 95 |
|
| 96 |
return cluster_range[np.argmax(silhouette_scores)]
|
| 97 |
|
| 98 |
-
def perform_clustering(Z, threshold=0.0, max_clusters=10):
|
| 99 |
"""Perform hierarchical clustering with automatic cluster number optimization.
|
| 100 |
|
| 101 |
Args:
|
|
@@ -158,7 +158,7 @@ def cluster_molecules(cgrs, fingerprint_method, threshold=0.0, max_clusters=10,
|
|
| 158 |
Z = calculate_linkage(similarity_df, method=linkage_method)
|
| 159 |
|
| 160 |
# Perform clustering
|
| 161 |
-
cluster_labels = perform_clustering(Z, threshold, max_clusters)
|
| 162 |
|
| 163 |
# Create clusters dictionary
|
| 164 |
clusters_dict = create_clusters_dict(cluster_labels, np.array(labels))
|
|
|
|
| 95 |
|
| 96 |
return cluster_range[np.argmax(silhouette_scores)]
|
| 97 |
|
| 98 |
+
def perform_clustering(Z, similarity_df, threshold=0.0, max_clusters=10):
|
| 99 |
"""Perform hierarchical clustering with automatic cluster number optimization.
|
| 100 |
|
| 101 |
Args:
|
|
|
|
| 158 |
Z = calculate_linkage(similarity_df, method=linkage_method)
|
| 159 |
|
| 160 |
# Perform clustering
|
| 161 |
+
cluster_labels = perform_clustering(Z, similarity_df, threshold, max_clusters)
|
| 162 |
|
| 163 |
# Create clusters dictionary
|
| 164 |
clusters_dict = create_clusters_dict(cluster_labels, np.array(labels))
|
cluster/rs_cgr.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
def s_cgr2rs_cgr(s_cgr):
|
| 2 |
cgr_prods = [s_cgr.substructure(c) for c in s_cgr.connected_components]
|
| 3 |
target_cgr = cgr_prods[0]
|
|
|
|
| 1 |
+
from CGRtools.containers.bonds import DynamicBond
|
| 2 |
+
|
| 3 |
def s_cgr2rs_cgr(s_cgr):
|
| 4 |
cgr_prods = [s_cgr.substructure(c) for c in s_cgr.connected_components]
|
| 5 |
target_cgr = cgr_prods[0]
|