import streamlit as st
import os, sys
import numpy as np
# โโ Page Config โโ
st.set_page_config(
page_title="PETIMOT Explorer",
page_icon="๐งฌ",
layout="wide",
initial_sidebar_state="expanded",
)
# โโ Ensure PETIMOT is importable โโ
PETIMOT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if PETIMOT_ROOT not in sys.path:
sys.path.insert(0, PETIMOT_ROOT)
# โโ Custom CSS โโ
st.markdown("""
""", unsafe_allow_html=True)
# โโ Sidebar โโ
with st.sidebar:
st.markdown("""
๐งฌ
PETIMOT
Protein Motion from Sparse Data
SE(3)-Equivariant GNNs
""", unsafe_allow_html=True)
st.divider()
# Global settings
st.markdown("### โ๏ธ Settings")
weights_dir = os.path.join(PETIMOT_ROOT, "weights")
pt_files = []
if os.path.isdir(weights_dir):
for root, dirs, files in os.walk(weights_dir):
for f in files:
if f.endswith(".pt"):
pt_files.append(os.path.join(root, f))
if pt_files:
selected_weights = st.selectbox(
"Model weights",
pt_files,
format_func=lambda x: os.path.basename(x),
key="weights"
)
else:
selected_weights = None
st.warning("No weights found in `weights/`")
st.divider()
st.markdown("""
**Links**
- [Paper](https://arxiv.org/abs/2504.02839)
- [GitHub](https://github.com/PhyloSofS-Team/PETIMOT)
- [Data](https://figshare.com/s/ab400d852b4669a83b64)
""")
st.caption("GPL-3.0 ยท Lombard, Grudinin & Laine")
# โโ Hero Section โโ
st.markdown("""
๐งฌ PETIMOT Explorer
Explore protein motion predictions at scale โ 36K+ proteins analyzed with SE(3)-Equivariant Graph Neural Networks
๐ฌ 36K+ Proteins
๐ง SE(3)-Equivariant
๐ 4 Motion Modes
โก CPU Inference
""", unsafe_allow_html=True)
# โโ Data Status โโ
from app.utils.download import check_data_status, ensure_weights
from app.utils.data_loader import find_predictions_dir, load_prediction_index
status = check_data_status(PETIMOT_ROOT)
# โโ Quick Search โโ
st.markdown("### ๐ Quick Search")
quick_search = st.text_input(
"Search proteins by name",
placeholder="e.g. 1ake, 4ake, lysozyme...",
label_visibility="collapsed",
key="home_search",
)
if quick_search:
st.session_state["explorer_search"] = quick_search
st.info(f"๐ Navigate to the **Explorer** page to see results for **\"{quick_search}\"**")
# โโ Metrics Row โโ
st.markdown("### ๐ Dataset Overview")
col1, col2, col3, col4 = st.columns(4)
with col1:
n_pred = status['predictions']
pred_display = "36,675" if n_pred < 0 else f"{n_pred:,}"
st.metric("๐งฌ Proteins", pred_display,
delta="โ
" if status['has_predictions'] else "โ ๏ธ No data")
with col2:
st.metric("๐ฏ Ground Truth", f"{status['ground_truth']:,}",
delta="โ
" if status['has_gt'] else "Not loaded")
with col3:
st.metric("โ๏ธ Model Weights", "4.7M params",
delta="โ
" if status['has_weights'] else "Missing")
with col4:
st.metric("๐ฎ Motion Modes", "4 per protein",
delta="Normal Modes" if status['has_predictions'] else "โ")
# โโ Feature Cards โโ
st.markdown("---")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
๐
Explorer
Browse pre-computed predictions for 36K+ proteins. Filter by sequence length,
displacement, and view 3D motion visualizations with interactive controls.
""", unsafe_allow_html=True)
with col2:
st.markdown("""
๐ฎ
Inference
Predict motion modes for any protein structure. Upload a PDB file or fetch
from RCSB. Runs on CPU in 5โ30 seconds.
""", unsafe_allow_html=True)
with col3:
st.markdown("""
๐
Statistics
Dataset-wide analysis with interactive charts: displacement distributions,
correlation heatmaps, leaderboards, and length-stratified analysis.
""", unsafe_allow_html=True)
# โโ Featured Proteins โโ
if status['has_predictions']:
pred_dir = find_predictions_dir(PETIMOT_ROOT)
if pred_dir:
try:
df = load_prediction_index(pred_dir)
if not df.empty and len(df) > 0:
st.markdown("---")
st.markdown("### ๐ Featured Proteins")
col_flex, col_rigid = st.columns(2)
with col_flex:
st.markdown("**๐ด Most Flexible** (highest mean displacement)")
top5 = df.nlargest(5, "mean_disp_m0")
html_rows = ""
for i, (_, row) in enumerate(top5.iterrows()):
html_rows += f"""
#{i+1}
{row['name']}
{row['mean_disp_m0']:.3f} ร
ยท {int(row['seq_len'])} res
"""
st.markdown(html_rows, unsafe_allow_html=True)
with col_rigid:
st.markdown("**๐ต Most Rigid** (lowest mean displacement)")
bot5 = df.nsmallest(5, "mean_disp_m0")
html_rows = ""
for i, (_, row) in enumerate(bot5.iterrows()):
html_rows += f"""
#{i+1}
{row['name']}
{row['mean_disp_m0']:.3f} ร
ยท {int(row['seq_len'])} res
"""
st.markdown(html_rows, unsafe_allow_html=True)
# โโ Sparkline overview โโ
st.markdown("---")
st.markdown("### ๐ At a Glance")
import plotly.graph_objects as go
col_s1, col_s2, col_s3 = st.columns(3)
def make_sparkline(values, title, color, unit=""):
fig = go.Figure()
fig.add_trace(go.Histogram(
x=values, nbinsx=40,
marker_color=color,
marker_line_width=0,
opacity=0.85,
))
fig.update_layout(
template="plotly_dark",
height=140,
margin=dict(l=0, r=0, t=30, b=0),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
showlegend=False,
title=dict(text=f"{title}", font=dict(size=13, color="#a5b4fc"), x=0.02),
xaxis=dict(showgrid=False, showticklabels=True, color="#6366f1",
tickfont=dict(size=9)),
yaxis=dict(showgrid=False, showticklabels=False),
)
return fig
with col_s1:
fig = make_sparkline(df['seq_len'], "Sequence Length", "#6366f1")
st.plotly_chart(fig, use_container_width=True, key="spark_len")
with col_s2:
fig = make_sparkline(df['mean_disp_m0'], "Mean Displacement (ร
)", "#10b981")
st.plotly_chart(fig, use_container_width=True, key="spark_mean")
with col_s3:
fig = make_sparkline(df['max_disp_m0'], "Max Displacement (ร
)", "#f59e0b")
st.plotly_chart(fig, use_container_width=True, key="spark_max")
except Exception as e:
st.warning(f"Could not load featured proteins: {e}")
# โโ Auto-download if missing โโ
if not status['has_weights']:
st.divider()
st.warning("โ ๏ธ Model weights not found.")
if st.button("โฌ๏ธ Download weights from Figshare (18 MB)", type="primary"):
with st.spinner("Downloading..."):
wt = ensure_weights(PETIMOT_ROOT)
if wt:
st.success(f"โ
Weights downloaded: {os.path.basename(wt)}")
st.rerun()
else:
st.error("Download failed. Please manually download from "
"[Figshare](https://figshare.com/s/ab400d852b4669a83b64) "
"and place in `weights/`")
if not status['has_predictions'] and status['has_weights']:
st.info("๐ก No pre-computed predictions yet. Use the **Inference** page to predict "
"individual proteins, or run batch inference from the Colab notebook.")