Spaces:
Sleeping
Sleeping
Gilmullin Almaz
commited on
Commit
·
511ff8c
1
Parent(s):
52cfb6f
rerun bugs fix try
Browse files
app.py
CHANGED
|
@@ -454,102 +454,82 @@ if submit_planning:
|
|
| 454 |
st.error(f"Error in prepare_clustering_data: {str(e)}")
|
| 455 |
return None
|
| 456 |
|
| 457 |
-
@st.cache_data
|
| 458 |
-
def perform_clustering(_reduced_super_cgrs_dict, num_clusters, chunk_size=10):
|
| 459 |
-
"""Perform clustering with memory-efficient processing"""
|
| 460 |
-
try:
|
| 461 |
-
mfp = MorganFingerprint()
|
| 462 |
-
|
| 463 |
-
# Process fingerprints in chunks
|
| 464 |
-
all_fingerprints = {}
|
| 465 |
-
for i in range(0, len(_reduced_super_cgrs_dict), chunk_size):
|
| 466 |
-
keys = list(_reduced_super_cgrs_dict.keys())[i:i+chunk_size]
|
| 467 |
-
chunk_dict = {k: _reduced_super_cgrs_dict[k] for k in keys}
|
| 468 |
-
chunk_fingerprints = {k: mfp.calculate(v) for k, v in chunk_dict.items()}
|
| 469 |
-
all_fingerprints.update(chunk_fingerprints)
|
| 470 |
-
del chunk_dict
|
| 471 |
-
gc.collect()
|
| 472 |
-
|
| 473 |
-
return cluster_molecules_optimized(all_fingerprints, max_clusters=num_clusters)
|
| 474 |
-
except Exception as e:
|
| 475 |
-
st.error(f"Error in perform_clustering: {str(e)}")
|
| 476 |
-
return None
|
| 477 |
-
|
| 478 |
def memory_status():
|
| 479 |
"""Get current memory status"""
|
| 480 |
process = psutil.Process()
|
| 481 |
memory = process.memory_info().rss / 1024 / 1024
|
| 482 |
return f"Memory usage: {memory:.2f} MB"
|
| 483 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
cluster_box, z = st.columns(2, gap="medium")
|
| 485 |
with cluster_box:
|
| 486 |
-
# Initialize session state
|
| 487 |
-
if 'clustering_state' not in st.session_state:
|
| 488 |
-
st.session_state.clustering_state = {
|
| 489 |
-
'prepared': False,
|
| 490 |
-
'data': None,
|
| 491 |
-
'last_memory': 0
|
| 492 |
-
}
|
| 493 |
-
|
| 494 |
st.write(memory_status())
|
| 495 |
-
st.write(f"Number of winning nodes: {len(
|
| 496 |
-
|
| 497 |
-
# Memory management controls
|
| 498 |
-
if st.button('Clear memory'):
|
| 499 |
-
st.cache_data.clear()
|
| 500 |
-
st.session_state.clustering_state = {
|
| 501 |
-
'prepared': False,
|
| 502 |
-
'data': None,
|
| 503 |
-
'last_memory': 0
|
| 504 |
-
}
|
| 505 |
-
gc.collect()
|
| 506 |
-
st.success("Memory cleared!")
|
| 507 |
-
st.rerun()
|
| 508 |
|
| 509 |
-
#
|
| 510 |
if not st.session_state.clustering_state['prepared']:
|
| 511 |
-
if st.button('Prepare clustering data'):
|
| 512 |
with st.spinner("Preparing data..."):
|
| 513 |
try:
|
| 514 |
-
|
| 515 |
-
st.session_state.clustering_state['data'] = prepare_clustering_data(tree)
|
| 516 |
st.session_state.clustering_state['prepared'] = True
|
| 517 |
-
|
| 518 |
-
st.success("Data prepared!")
|
| 519 |
except Exception as e:
|
| 520 |
st.error(f"Preparation failed: {str(e)}")
|
| 521 |
|
| 522 |
-
#
|
| 523 |
if st.session_state.clustering_state['prepared']:
|
| 524 |
-
|
|
|
|
|
|
|
| 525 |
'Number of clusters',
|
| 526 |
min_value=2,
|
| 527 |
-
max_value=min(10, len(
|
| 528 |
-
value=
|
| 529 |
)
|
| 530 |
|
| 531 |
-
|
|
|
|
| 532 |
with st.spinner("Clustering..."):
|
| 533 |
try:
|
| 534 |
results = perform_clustering(
|
| 535 |
st.session_state.clustering_state['data'],
|
| 536 |
-
num_clusters
|
| 537 |
)
|
| 538 |
|
| 539 |
if results:
|
|
|
|
| 540 |
for cluster_num, node_ids in results['clusters_dict'].items():
|
| 541 |
with st.expander(f"Cluster {cluster_num}"):
|
| 542 |
if node_ids:
|
| 543 |
node_id = node_ids[0]
|
| 544 |
-
num_steps = len(
|
| 545 |
-
route_score = round(
|
| 546 |
st.image(
|
| 547 |
-
get_route_svg(
|
| 548 |
caption=f"Route {node_id}; {num_steps} steps; Score: {route_score}"
|
| 549 |
)
|
| 550 |
except Exception as e:
|
| 551 |
st.error(f"Clustering failed: {str(e)}")
|
| 552 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
stat_col, download_col = st.columns(2, gap="medium")
|
| 554 |
|
| 555 |
with stat_col:
|
|
|
|
| 454 |
st.error(f"Error in prepare_clustering_data: {str(e)}")
|
| 455 |
return None
|
| 456 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
def memory_status():
|
| 458 |
"""Get current memory status"""
|
| 459 |
process = psutil.Process()
|
| 460 |
memory = process.memory_info().rss / 1024 / 1024
|
| 461 |
return f"Memory usage: {memory:.2f} MB"
|
| 462 |
|
| 463 |
+
# Initialize session state for tree and clustering data
|
| 464 |
+
if 'tree_data' not in st.session_state:
|
| 465 |
+
st.session_state.tree_data = tree
|
| 466 |
+
if 'clustering_state' not in st.session_state:
|
| 467 |
+
st.session_state.clustering_state = {
|
| 468 |
+
'prepared': False,
|
| 469 |
+
'data': None,
|
| 470 |
+
'num_clusters': 2
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
cluster_box, z = st.columns(2, gap="medium")
|
| 474 |
with cluster_box:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
st.write(memory_status())
|
| 476 |
+
st.write(f"Number of winning nodes: {len(st.session_state.tree_data.winning_nodes)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
+
# Step 1: Prepare Data Button
|
| 479 |
if not st.session_state.clustering_state['prepared']:
|
| 480 |
+
if st.button('Step 1: Prepare clustering data'):
|
| 481 |
with st.spinner("Preparing data..."):
|
| 482 |
try:
|
| 483 |
+
st.session_state.clustering_state['data'] = prepare_clustering_data(st.session_state.tree_data)
|
|
|
|
| 484 |
st.session_state.clustering_state['prepared'] = True
|
| 485 |
+
st.success("Data prepared! Now you can proceed to Step 2.")
|
|
|
|
| 486 |
except Exception as e:
|
| 487 |
st.error(f"Preparation failed: {str(e)}")
|
| 488 |
|
| 489 |
+
# Step 2: Only show clustering controls if data is prepared
|
| 490 |
if st.session_state.clustering_state['prepared']:
|
| 491 |
+
st.markdown("### Step 2: Select number of clusters")
|
| 492 |
+
# Store slider value in session state
|
| 493 |
+
st.session_state.clustering_state['num_clusters'] = st.slider(
|
| 494 |
'Number of clusters',
|
| 495 |
min_value=2,
|
| 496 |
+
max_value=min(10, len(st.session_state.tree_data.winning_nodes)),
|
| 497 |
+
value=st.session_state.clustering_state['num_clusters']
|
| 498 |
)
|
| 499 |
|
| 500 |
+
# Step 3: Generate Clusters Button
|
| 501 |
+
if st.button('Step 3: Generate clusters'):
|
| 502 |
with st.spinner("Clustering..."):
|
| 503 |
try:
|
| 504 |
results = perform_clustering(
|
| 505 |
st.session_state.clustering_state['data'],
|
| 506 |
+
st.session_state.clustering_state['num_clusters']
|
| 507 |
)
|
| 508 |
|
| 509 |
if results:
|
| 510 |
+
st.success("Clustering complete!")
|
| 511 |
for cluster_num, node_ids in results['clusters_dict'].items():
|
| 512 |
with st.expander(f"Cluster {cluster_num}"):
|
| 513 |
if node_ids:
|
| 514 |
node_id = node_ids[0]
|
| 515 |
+
num_steps = len(st.session_state.tree_data.synthesis_route(node_id))
|
| 516 |
+
route_score = round(st.session_state.tree_data.route_score(node_id), 3)
|
| 517 |
st.image(
|
| 518 |
+
get_route_svg(st.session_state.tree_data, node_id),
|
| 519 |
caption=f"Route {node_id}; {num_steps} steps; Score: {route_score}"
|
| 520 |
)
|
| 521 |
except Exception as e:
|
| 522 |
st.error(f"Clustering failed: {str(e)}")
|
| 523 |
|
| 524 |
+
# Clear memory button
|
| 525 |
+
if st.button('Clear memory and start over'):
|
| 526 |
+
st.cache_data.clear()
|
| 527 |
+
del st.session_state.clustering_state
|
| 528 |
+
del st.session_state.tree_data
|
| 529 |
+
gc.collect()
|
| 530 |
+
st.success("Memory cleared! Please refresh the page to start over.")
|
| 531 |
+
st.rerun()
|
| 532 |
+
|
| 533 |
stat_col, download_col = st.columns(2, gap="medium")
|
| 534 |
|
| 535 |
with stat_col:
|