import streamlit as st import os, sys import numpy as np # โ”€โ”€ Page Config โ”€โ”€ st.set_page_config( page_title="PETIMOT Explorer", page_icon="๐Ÿงฌ", layout="wide", initial_sidebar_state="expanded", ) # โ”€โ”€ Ensure PETIMOT is importable โ”€โ”€ PETIMOT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if PETIMOT_ROOT not in sys.path: sys.path.insert(0, PETIMOT_ROOT) # โ”€โ”€ Custom CSS โ”€โ”€ st.markdown(""" """, unsafe_allow_html=True) # โ”€โ”€ Sidebar โ”€โ”€ with st.sidebar: st.markdown("""
๐Ÿงฌ
PETIMOT
Protein Motion from Sparse Data
SE(3)-Equivariant GNNs
""", unsafe_allow_html=True) st.divider() # Global settings st.markdown("### โš™๏ธ Settings") weights_dir = os.path.join(PETIMOT_ROOT, "weights") pt_files = [] if os.path.isdir(weights_dir): for root, dirs, files in os.walk(weights_dir): for f in files: if f.endswith(".pt"): pt_files.append(os.path.join(root, f)) if pt_files: selected_weights = st.selectbox( "Model weights", pt_files, format_func=lambda x: os.path.basename(x), key="weights" ) else: selected_weights = None st.warning("No weights found in `weights/`") st.divider() st.markdown(""" **Links** - [Paper](https://arxiv.org/abs/2504.02839) - [GitHub](https://github.com/PhyloSofS-Team/PETIMOT) - [Data](https://figshare.com/s/ab400d852b4669a83b64) """) st.caption("GPL-3.0 ยท Lombard, Grudinin & Laine") # โ”€โ”€ Hero Section โ”€โ”€ st.markdown("""

๐Ÿงฌ PETIMOT Explorer

Explore protein motion predictions at scale โ€” 36K+ proteins analyzed with SE(3)-Equivariant Graph Neural Networks

๐Ÿ”ฌ 36K+ Proteins ๐Ÿง  SE(3)-Equivariant ๐Ÿ“Š 4 Motion Modes โšก CPU Inference
""", unsafe_allow_html=True) # โ”€โ”€ Data Status โ”€โ”€ from app.utils.download import check_data_status, ensure_weights from app.utils.data_loader import find_predictions_dir, load_prediction_index status = check_data_status(PETIMOT_ROOT) # โ”€โ”€ Quick Search โ”€โ”€ st.markdown("### ๐Ÿ” Quick Search") quick_search = st.text_input( "Search proteins by name", placeholder="e.g. 1ake, 4ake, lysozyme...", label_visibility="collapsed", key="home_search", ) if quick_search: st.session_state["explorer_search"] = quick_search st.info(f"๐Ÿ” Navigate to the **Explorer** page to see results for **\"{quick_search}\"**") # โ”€โ”€ Metrics Row โ”€โ”€ st.markdown("### ๐Ÿ“Š Dataset Overview") col1, col2, col3, col4 = st.columns(4) with col1: n_pred = status['predictions'] pred_display = "36,675" if n_pred < 0 else f"{n_pred:,}" st.metric("๐Ÿงฌ Proteins", pred_display, delta="โœ…" if status['has_predictions'] else "โš ๏ธ No data") with col2: st.metric("๐ŸŽฏ Ground Truth", f"{status['ground_truth']:,}", delta="โœ…" if status['has_gt'] else "Not loaded") with col3: st.metric("โš–๏ธ Model Weights", "4.7M params", delta="โœ…" if status['has_weights'] else "Missing") with col4: st.metric("๐Ÿ”ฎ Motion Modes", "4 per protein", delta="Normal Modes" if status['has_predictions'] else "โ€”") # โ”€โ”€ Feature Cards โ”€โ”€ st.markdown("---") col1, col2, col3 = st.columns(3) with col1: st.markdown("""
๐Ÿ”
Explorer
Browse pre-computed predictions for 36K+ proteins. Filter by sequence length, displacement, and view 3D motion visualizations with interactive controls.
""", unsafe_allow_html=True) with col2: st.markdown("""
๐Ÿ”ฎ
Inference
Predict motion modes for any protein structure. Upload a PDB file or fetch from RCSB. Runs on CPU in 5โ€“30 seconds.
""", unsafe_allow_html=True) with col3: st.markdown("""
๐Ÿ“Š
Statistics
Dataset-wide analysis with interactive charts: displacement distributions, correlation heatmaps, leaderboards, and length-stratified analysis.
""", unsafe_allow_html=True) # โ”€โ”€ Featured Proteins โ”€โ”€ if status['has_predictions']: pred_dir = find_predictions_dir(PETIMOT_ROOT) if pred_dir: try: df = load_prediction_index(pred_dir) if not df.empty and len(df) > 0: st.markdown("---") st.markdown("### ๐Ÿ† Featured Proteins") col_flex, col_rigid = st.columns(2) with col_flex: st.markdown("**๐Ÿ”ด Most Flexible** (highest mean displacement)") top5 = df.nlargest(5, "mean_disp_m0") html_rows = "" for i, (_, row) in enumerate(top5.iterrows()): html_rows += f"""
#{i+1} {row['name']} {row['mean_disp_m0']:.3f} ร… ยท {int(row['seq_len'])} res
""" st.markdown(html_rows, unsafe_allow_html=True) with col_rigid: st.markdown("**๐Ÿ”ต Most Rigid** (lowest mean displacement)") bot5 = df.nsmallest(5, "mean_disp_m0") html_rows = "" for i, (_, row) in enumerate(bot5.iterrows()): html_rows += f"""
#{i+1} {row['name']} {row['mean_disp_m0']:.3f} ร… ยท {int(row['seq_len'])} res
""" st.markdown(html_rows, unsafe_allow_html=True) # โ”€โ”€ Sparkline overview โ”€โ”€ st.markdown("---") st.markdown("### ๐Ÿ“ˆ At a Glance") import plotly.graph_objects as go col_s1, col_s2, col_s3 = st.columns(3) def make_sparkline(values, title, color, unit=""): fig = go.Figure() fig.add_trace(go.Histogram( x=values, nbinsx=40, marker_color=color, marker_line_width=0, opacity=0.85, )) fig.update_layout( template="plotly_dark", height=140, margin=dict(l=0, r=0, t=30, b=0), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", showlegend=False, title=dict(text=f"{title}", font=dict(size=13, color="#a5b4fc"), x=0.02), xaxis=dict(showgrid=False, showticklabels=True, color="#6366f1", tickfont=dict(size=9)), yaxis=dict(showgrid=False, showticklabels=False), ) return fig with col_s1: fig = make_sparkline(df['seq_len'], "Sequence Length", "#6366f1") st.plotly_chart(fig, use_container_width=True, key="spark_len") with col_s2: fig = make_sparkline(df['mean_disp_m0'], "Mean Displacement (ร…)", "#10b981") st.plotly_chart(fig, use_container_width=True, key="spark_mean") with col_s3: fig = make_sparkline(df['max_disp_m0'], "Max Displacement (ร…)", "#f59e0b") st.plotly_chart(fig, use_container_width=True, key="spark_max") except Exception as e: st.warning(f"Could not load featured proteins: {e}") # โ”€โ”€ Auto-download if missing โ”€โ”€ if not status['has_weights']: st.divider() st.warning("โš ๏ธ Model weights not found.") if st.button("โฌ‡๏ธ Download weights from Figshare (18 MB)", type="primary"): with st.spinner("Downloading..."): wt = ensure_weights(PETIMOT_ROOT) if wt: st.success(f"โœ… Weights downloaded: {os.path.basename(wt)}") st.rerun() else: st.error("Download failed. Please manually download from " "[Figshare](https://figshare.com/s/ab400d852b4669a83b64) " "and place in `weights/`") if not status['has_predictions'] and status['has_weights']: st.info("๐Ÿ’ก No pre-computed predictions yet. Use the **Inference** page to predict " "individual proteins, or run batch inference from the Colab notebook.")