Spaces:

Valmbd
/

Petimot

Running

App Files Files Community

Valmbd commited on 21 days ago

Commit

2d07a5e

verified ·

1 Parent(s): ee42d0e

[10:35 UTC] AA sequence viewer + rich correlation + fix fillcolor/statsmodels/disp_profile

Browse files

Files changed (4) hide show

app/pages/1_🔍_Explorer.py +16 -4
app/pages/3_📊_Statistics.py +109 -40
app/pages/5_🔬_Protein_Detail.py +16 -3
app/utils/bio_api.py +156 -32

app/pages/1_🔍_Explorer.py CHANGED Viewed

@@ -14,7 +14,7 @@ from app.utils.data_loader import (
     find_predictions_dir, load_prediction_index, load_modes, load_embeddings,
     load_ground_truth, load_pdb_text, PETIMOT_ROOT
 )
-from app.utils.bio_api import get_protein_mutations
 from app.components.embedding_viewer import render_embedding_viewer
 from app.components.viewer_3d import render_motion_viewer, render_mode_comparison, render_deformed_viewer, render_animated_viewer, render_pred_vs_gt_viewer
 from app.components.sequence_viewer import render_sequence_viewer, render_displacement_chart
@@ -68,11 +68,21 @@ def render_protein_detail(pred_dir, gt_dir, protein_name, key_suffix="", compact
         return
     n_res = len(list(modes.values())[0])
-    seq = gt.get("seq", "X" * n_res) if gt else "X" * n_res
     ca = gt["bb"][:, 1] if gt and "bb" in gt else np.zeros((n_res, 3))
     coverage = gt.get("coverage", np.ones(n_res)) if gt else np.ones(n_res)
     eigenvalues = gt.get("eigvals", None) if gt else None
-    pdb_text = None
     pdb_path = os.path.join(PETIMOT_ROOT, "pdbs", f"{protein_name}.pdb")
     if os.path.exists(pdb_path):
@@ -355,7 +365,9 @@ with col_dl:
 # ═══════════════════════════════════════
 if view_mode == "🔍 Browse":
     cols_to_show = ["name", "seq_len", "n_modes", "mean_disp_m0", "max_disp_m0", "top_residue"]
-    if "disp_profile" in df_filtered.columns:
         cols_to_show.append("disp_profile")
     selected_idx = st.dataframe(

     find_predictions_dir, load_prediction_index, load_modes, load_embeddings,
     load_ground_truth, load_pdb_text, PETIMOT_ROOT
 )
+from app.utils.bio_api import get_protein_mutations, get_sequence_from_pdb, render_sequence_aa
 from app.components.embedding_viewer import render_embedding_viewer
 from app.components.viewer_3d import render_motion_viewer, render_mode_comparison, render_deformed_viewer, render_animated_viewer, render_pred_vs_gt_viewer
 from app.components.sequence_viewer import render_sequence_viewer, render_displacement_chart
         return
     n_res = len(list(modes.values())[0])
+    seq = gt.get("seq", "") if gt else ""
     ca = gt["bb"][:, 1] if gt and "bb" in gt else np.zeros((n_res, 3))
     coverage = gt.get("coverage", np.ones(n_res)) if gt else np.ones(n_res)
     eigenvalues = gt.get("eigvals", None) if gt else None
+    # ── Amino acid sequence strip ────────────────────────────────────
+    if not seq or len(seq) < 3:
+        with st.spinner("Fetching sequence from RCSB..."):
+            seq = get_sequence_from_pdb(protein_name) or "X" * n_res
+    mode0_vecs = list(modes.values())[0]
+    mode0_mags = np.linalg.norm(mode0_vecs, axis=1) if mode0_vecs.ndim > 1 else np.abs(mode0_vecs)
+    mutations = get_protein_mutations(protein_name, n_res)
+    render_sequence_aa(seq[:n_res], displacements=mode0_mags, mutations=mutations,
+                       title=f"🔤 {protein_name} — sequence (opacity = mode 0 displacement)")
+    st.divider()
     pdb_path = os.path.join(PETIMOT_ROOT, "pdbs", f"{protein_name}.pdb")
     if os.path.exists(pdb_path):
 # ═══════════════════════════════════════
 if view_mode == "🔍 Browse":
     cols_to_show = ["name", "seq_len", "n_modes", "mean_disp_m0", "max_disp_m0", "top_residue"]
+    has_profiles = ("disp_profile" in df_filtered.columns and
+                    df_filtered["disp_profile"].apply(lambda x: len(x) if isinstance(x, list) else 0).max() > 0)
+    if has_profiles:
         cols_to_show.append("disp_profile")
     selected_idx = st.dataframe(

app/pages/3_📊_Statistics.py CHANGED Viewed

@@ -185,69 +185,138 @@ fig.update_layout(**PLOT_LAYOUT, height=400, showlegend=False)
 st.plotly_chart(fig, use_container_width=True, key="violins")
-# ═══════════════════════════════════════
-# SECTION 2: Correlation Heatmap + Scatter
-# ═══════════════════════════════════════
-st.markdown('<div class="section-header">🔗 Prediction Correlation Analysis <span style="color:#6366f1">[PREDICTIONS]</span></div>', unsafe_allow_html=True)
-col_heat, col_scatter = st.columns([1, 2])
-with col_heat:
-    _all_corr = {"seq_len": "Seq Length", "mean_disp_m0": "Mean Δ",
-                  "max_disp_m0": "Max Δ", "top_residue": "Top Residue",
-                  "n_modes": "# Modes"}
-    corr_cols = [c for c in _all_corr if c in df.columns and df[c].notna().sum() > 5]
-    labels = [_all_corr[c] for c in corr_cols]
-    corr_matrix = df[corr_cols].dropna().corr().fillna(0)
     fig_h = go.Figure(data=go.Heatmap(
-        z=corr_matrix.values,
-        x=labels, y=labels,
         colorscale=[[0, "#1e1b4b"], [0.5, "#4338ca"], [1, "#ec4899"]],
-        text=np.round(corr_matrix.values, 2),
         texttemplate="%{text}",
-        textfont=dict(size=13, color="white"),
         hovertemplate="<b>%{x}</b> vs <b>%{y}</b><br>r = %{z:.3f}<extra></extra>",
         zmin=-1, zmax=1,
         colorbar=dict(title="r", tickfont=dict(color="#a5b4fc")),
     ))
-    fig_h.update_layout(**PLOT_LAYOUT, height=380, title="Feature Correlation")
     st.plotly_chart(fig_h, use_container_width=True, key="heatmap")
-with col_scatter:
-    fig_s = go.Figure()
-    # 2D density contour
     fig_s.add_trace(go.Histogram2dContour(
         x=df.seq_len, y=df.mean_disp_m0,
         colorscale=[[0, "rgba(30,27,75,0)"], [0.3, "rgba(99,102,241,0.3)"],
                      [0.6, "rgba(139,92,246,0.5)"], [1, "rgba(236,72,153,0.7)"]],
-        ncontours=15,
-        showscale=False,
-        hoverinfo="skip",
     ))
-    # Scatter overlay
     fig_s.add_trace(go.Scattergl(
-        x=df.seq_len, y=df.mean_disp_m0,
-        mode="markers",
-        marker=dict(
-            size=3, color=df.max_disp_m0,
-            colorscale="Viridis", showscale=True,
-            colorbar=dict(title="Max Δ (Å)", tickfont=dict(color="#a5b4fc")),
-            opacity=0.6,
-        ),
         text=df.name,
         hovertemplate="<b>%{text}</b><br>Length: %{x}<br>Mean Δ: %{y:.3f} Å<extra></extra>",
     ))
-    fig_s.update_layout(
-        **PLOT_LAYOUT, height=380, showlegend=False,
-        title="Sequence Length vs Mean Displacement",
-        xaxis_title="Sequence Length",
-        yaxis_title="Mean Displacement (Å)",
-    )
     st.plotly_chart(fig_s, use_container_width=True, key="scatter")
 # ═══════════════════════════════════════
 # SECTION 3: Top-10 Leaderboards
 # ═══════════════════════════════════════

 st.plotly_chart(fig, use_container_width=True, key="violins")
+# Load the richer merged dataset (protein_stats.csv has 19 structural features + PETIMOT metrics)
+_stats_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data", "protein_stats.csv")
+try:
+    import pandas as _pd_st
+    df_stats = _pd_st.read_csv(_stats_path)
+    _has_stats = len(df_stats) > 100
+except Exception:
+    df_stats = None
+    _has_stats = False
+if _has_stats:
+    # ── Rich correlation heatmap using 7k merged proteins ──
+    _rich_cols = {
+        "rmsip_sq":    "RMSIP²",
+        "nsse_min":    "NSSE",
+        "ref_len":     "Seq Length",
+        "rmsd_mean":   "RMSD mean",
+        "percent_id":  "Seq %id",
+        "%var_1st":    "%var mode0",
+        "cov":         "Coverage",
+        "global_quality": "Cluster quality",
+        "nb_members":  "Cluster size",
+    }
+    _rc = [c for c in _rich_cols if c in df_stats.columns and df_stats[c].notna().sum() > 50]
+    _rl = [_rich_cols[c] for c in _rc]
+    _cm = df_stats[_rc].dropna().corr().round(2)
     fig_h = go.Figure(data=go.Heatmap(
+        z=_cm.values, x=_rl, y=_rl,
         colorscale=[[0, "#1e1b4b"], [0.5, "#4338ca"], [1, "#ec4899"]],
+        text=_cm.values.round(2),
         texttemplate="%{text}",
+        textfont=dict(size=11, color="white"),
         hovertemplate="<b>%{x}</b> vs <b>%{y}</b><br>r = %{z:.3f}<extra></extra>",
         zmin=-1, zmax=1,
         colorbar=dict(title="r", tickfont=dict(color="#a5b4fc")),
     ))
+    fig_h.update_layout(**PLOT_LAYOUT, height=420,
+        title=f"Feature Correlation — {len(df_stats):,} proteins (structural stats + PETIMOT metrics)")
     st.plotly_chart(fig_h, use_container_width=True, key="heatmap")
+    # ── Success vs Failure analysis ──
+    st.markdown("#### 🎯 What separates PETIMOT successes from failures?")
+    st.caption("Success = RMSIP² > 0.5 (PETIMOT outperforms NMA on directionality)")
+    _df_sv = df_stats.dropna(subset=["rmsip_sq", "ref_len"])
+    _df_sv = _df_sv.copy()
+    _df_sv["outcome"] = _df_sv["rmsip_sq"].apply(lambda x: "✅ Success" if x > 0.5 else "❌ Failure")
+    _violin_features = [
+        ("ref_len",    "Sequence Length (residues)"),
+        ("%var_1st",   "% Variance in NMA mode 0"),
+        ("rmsd_mean",  "Intra-cluster RMSD (Å)"),
+        ("percent_id", "Sequence identity"),
+    ]
+    _vcols = st.columns(2)
+    for i, (feat, label) in enumerate(_violin_features):
+        if feat not in _df_sv.columns: continue
+        fig_v = go.Figure()
+        for outcome, color in [("✅ Success", "#10b981"), ("❌ Failure", "#ef4444")]:
+            vals = _df_sv[_df_sv["outcome"] == outcome][feat].dropna()
+            fig_v.add_trace(go.Violin(
+                y=vals, name=outcome,
+                line_color=color,
+                fillcolor={"#10b981": "rgba(16,185,129,0.2)", "#ef4444": "rgba(239,68,68,0.2)"}.get(color, "rgba(99,102,241,0.2)"),
+                box_visible=True, meanline_visible=True, showlegend=(i == 0),
+            ))
+        fig_v.update_layout(**PLOT_LAYOUT, height=280, title=label,
+            yaxis_title=label, margin=dict(l=40, r=10, t=45, b=30))
+        with _vcols[i % 2]:
+            st.plotly_chart(fig_v, use_container_width=True, key=f"violin_sv_{feat}")
+    # ── Top predictors (sorted abs correlation with rmsip_sq) ──
+    _target = "rmsip_sq"
+    _predictor_cols = [c for c in _rc if c != _target]
+    _predictor_corrs = {_rich_cols[c]: abs(_cm.loc[_target, c]) for c in _predictor_cols if _target in _cm.index}
+    _predictor_corrs = dict(sorted(_predictor_corrs.items(), key=lambda x: -x[1]))
+    fig_imp = go.Figure(go.Bar(
+        x=list(_predictor_corrs.values()),
+        y=list(_predictor_corrs.keys()),
+        orientation="h",
+        marker_color=["#6366f1" if v > 0.15 else "#4338ca" for v in _predictor_corrs.values()],
+        text=[f"{v:.3f}" for v in _predictor_corrs.values()],
+        textposition="outside",
+    ))
+    fig_imp.update_layout(**PLOT_LAYOUT, height=280,
+        title="Absolute correlation with RMSIP² (feature importance proxy)",
+        xaxis=dict(title="|r|", range=[0, max(_predictor_corrs.values()) * 1.3]),
+        margin=dict(l=140, r=40, t=50, b=30))
+    st.plotly_chart(fig_imp, use_container_width=True, key="feat_imp")
+else:
+    # Fallback: basic correlations from predictions only
+    _all_corr = {"seq_len": "Seq Length", "mean_disp_m0": "Mean Δ",
+                  "max_disp_m0": "Max Δ"}
+    _rc = [c for c in _all_corr if c in df.columns and df[c].notna().sum() > 5]
+    _rl = [_all_corr[c] for c in _rc]
+    _cm = df[_rc].dropna().corr().fillna(0)
+    fig_h = go.Figure(data=go.Heatmap(
+        z=_cm.values, x=_rl, y=_rl,
+        colorscale=[[0, "#1e1b4b"], [0.5, "#4338ca"], [1, "#ec4899"]],
+        text=np.round(_cm.values, 2), texttemplate="%{text}",
+        zmin=-1, zmax=1,
+    ))
+    fig_h.update_layout(**PLOT_LAYOUT, height=300, title="Feature Correlation (predictions only)")
+    st.plotly_chart(fig_h, use_container_width=True, key="heatmap")
+col_scatter_dummy = st.container()
+with col_scatter_dummy:
+    fig_s = go.Figure()
     fig_s.add_trace(go.Histogram2dContour(
         x=df.seq_len, y=df.mean_disp_m0,
         colorscale=[[0, "rgba(30,27,75,0)"], [0.3, "rgba(99,102,241,0.3)"],
                      [0.6, "rgba(139,92,246,0.5)"], [1, "rgba(236,72,153,0.7)"]],
+        ncontours=15, showscale=False, hoverinfo="skip",
     ))
     fig_s.add_trace(go.Scattergl(
+        x=df.seq_len, y=df.mean_disp_m0, mode="markers",
+        marker=dict(size=3, color=df.max_disp_m0, colorscale="Viridis", showscale=True,
+                    colorbar=dict(title="Max Δ (Å)", tickfont=dict(color="#a5b4fc")), opacity=0.6),
         text=df.name,
         hovertemplate="<b>%{text}</b><br>Length: %{x}<br>Mean Δ: %{y:.3f} Å<extra></extra>",
     ))
+    fig_s.update_layout(**PLOT_LAYOUT, height=350, showlegend=False,
+        title="Sequence Length vs Mean Displacement (36k proteins)",
+        xaxis_title="Sequence Length", yaxis_title="Mean Displacement (Å)")
     st.plotly_chart(fig_s, use_container_width=True, key="scatter")
 # ═══════════════════════════════════════
 # SECTION 3: Top-10 Leaderboards
 # ═══════════════════════════════════════

app/pages/5_🔬_Protein_Detail.py CHANGED Viewed

@@ -7,6 +7,7 @@ import plotly.graph_objects as go
 import plotly.express as px
 from plotly.subplots import make_subplots
 from pathlib import Path
 from app.utils.data_loader import find_predictions_dir, load_modes, load_ground_truth, PETIMOT_ROOT
@@ -121,15 +122,27 @@ with st.expander(f"📦 Cluster info (n={int(row['nb_members'])} members)", expa
                         margin=dict(l=40,r=20,t=40,b=30))
     st.plotly_chart(fig_q, use_container_width=True)
-# ── Mode displacement ─────────────────────────────────────────────
-st.divider()
-st.markdown("### 🌊 Predicted Normal Modes (PETIMOT)")
 pred_dir = find_predictions_dir(PETIMOT_ROOT)
 modes = load_modes(pred_dir, selected_key) if pred_dir else {}
 gt_dir = os.path.join(PETIMOT_ROOT, "ground_truth")
 gt = load_ground_truth(gt_dir, selected_key)
 if modes:
     n_modes = len(modes)
     mode_tabs = st.tabs([f"Mode {k}" for k in sorted(modes.keys())])

 import plotly.express as px
 from plotly.subplots import make_subplots
 from pathlib import Path
+from app.utils.bio_api import get_sequence_from_pdb, render_sequence_aa, get_protein_mutations
 from app.utils.data_loader import find_predictions_dir, load_modes, load_ground_truth, PETIMOT_ROOT
                         margin=dict(l=40,r=20,t=40,b=30))
     st.plotly_chart(fig_q, use_container_width=True)
+# ── Amino acid sequence strip ─────────────────────────────────────────
 pred_dir = find_predictions_dir(PETIMOT_ROOT)
 modes = load_modes(pred_dir, selected_key) if pred_dir else {}
 gt_dir = os.path.join(PETIMOT_ROOT, "ground_truth")
 gt = load_ground_truth(gt_dir, selected_key)
+with st.spinner("Fetching sequence from RCSB..."):
+    seq = get_sequence_from_pdb(selected_key)
+_mode0_vecs = list(modes.values())[0] if modes else None
+_mode0_mags = (np.linalg.norm(_mode0_vecs, axis=1) if _mode0_vecs is not None and _mode0_vecs.ndim > 1
+               else (np.abs(_mode0_vecs) if _mode0_vecs is not None else None))
+_muts = get_protein_mutations(selected_key, int(row["ref_len"]))
+render_sequence_aa(seq or "", displacements=_mode0_mags, mutations=_muts,
+                   title=f"🔤 {selected_key} — AA sequence (opacity = mode 0 displacement)")
+# ── Mode displacement ─────────────────────────────────────────────
+st.divider()
+st.markdown("### 🌊 Predicted Normal Modes (PETIMOT)")
 if modes:
     n_modes = len(modes)
     mode_tabs = st.tabs([f"Mode {k}" for k in sorted(modes.keys())])

app/utils/bio_api.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""External biology APIs (EBI, UniProt) for mutation and sequence tracking."""
 import requests
 import numpy as np
 import streamlit as st
@@ -6,60 +6,184 @@ import logging
 logger = logging.getLogger(__name__)
 @st.cache_data(ttl=86400, show_spinner=False)
 def get_uniprot_id_from_pdb(pdb_id: str) -> str | None:
     """Map a 4-letter PDB ID to its primary UniProt accession using PDBe API."""
     pdb_id = pdb_id[:4].lower()
-    url = f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_id}"
     try:
-        r = requests.get(url, timeout=10)
-        if not r.ok:
-            return None
-        data = r.json()
-        if pdb_id in data and "UniProt" in data[pdb_id]:
-            # Just take the first UniProt accession mapped
-            return list(data[pdb_id]["UniProt"].keys())[0]
     except Exception as e:
         logger.warning(f"PDBe Mapping failed for {pdb_id}: {e}")
     return None
 @st.cache_data(ttl=86400, show_spinner="Fetching evolutionary mutations...")
-def fetch_mutation_frequency(uniprot_id: str, seq_length: int) -> np.ndarray | None:
-    """Fetch known natural variants from EBI and return frequency per residue."""
-    url = f"https://www.ebi.ac.uk/proteins/api/variation/{uniprot_id}"
     try:
-        r = requests.get(url, headers={"Accept": "application/json"}, timeout=15)
         if not r.ok:
             return None
-        data = r.json()
-        features = data.get("features", [])
-        # Array to store mutation counts per position
         freqs = np.zeros(seq_length)
-        for f in features:
             if f.get("type") == "VARIANT":
                 try:
-                    begin = int(f.get("begin", -1))
-                    # 1-indexed to 0-indexed
-                    if 1 <= begin <= seq_length:
-                        freqs[begin - 1] += 1
                 except ValueError:
                     continue
         return freqs
     except Exception as e:
         logger.warning(f"Variation API failed for {uniprot_id}: {e}")
     return None
 @st.cache_data(ttl=86400, show_spinner=False)
-def get_protein_mutations(protein_name: str, seq_length: int) -> np.ndarray | None:
-    """End-to-end: PDB Name -> UniProt -> Mutation Frequencies."""
-    # Assuming protein_name format corresponds to a PDB ID in its first 4 chars
     if len(protein_name) >= 4:
-        pdb_id = protein_name[:4]
-        uniprot_id = get_uniprot_id_from_pdb(pdb_id)
-        if uniprot_id:
-            return fetch_mutation_frequency(uniprot_id, seq_length)
     return None

+"""External biology APIs (EBI, UniProt, RCSB) for mutation and sequence tracking."""
 import requests
 import numpy as np
 import streamlit as st
 logger = logging.getLogger(__name__)
+# ── Amino acid property colours ──────────────────────────────────────
+AA_COLORS = {
+    # Hydrophobic
+    "A": "#7c3aed", "V": "#7c3aed", "I": "#6d28d9", "L": "#6d28d9",
+    "M": "#7c3aed", "F": "#5b21b6", "W": "#4c1d95", "P": "#8b5cf6",
+    # Charged positive
+    "K": "#0891b2", "R": "#0e7490", "H": "#06b6d4",
+    # Charged negative
+    "D": "#e11d48", "E": "#be123c",
+    # Polar
+    "S": "#0d9488", "T": "#0f766e", "N": "#115e59", "Q": "#134e4a",
+    # Special
+    "C": "#d97706", "G": "#b45309", "Y": "#92400e",
+    # Unknown
+    "X": "#475569",
+}
+AA_LABELS = {
+    "A": "Ala", "V": "Val", "I": "Ile", "L": "Leu", "M": "Met",
+    "F": "Phe", "W": "Trp", "P": "Pro", "K": "Lys", "R": "Arg",
+    "H": "His", "D": "Asp", "E": "Glu", "S": "Ser", "T": "Thr",
+    "N": "Asn", "Q": "Gln", "C": "Cys", "G": "Gly", "Y": "Tyr", "X": "Unk",
+}
+# ── Sequence fetching ─────────────────────────────────────────────────
+@st.cache_data(ttl=86400, show_spinner=False)
+def get_sequence_from_pdb(protein_name: str) -> str | None:
+    """Fetch amino acid sequence from RCSB for a protein name like '1HO5A'."""
+    if len(protein_name) < 4:
+        return None
+    pdb_id = protein_name[:4].upper()
+    chain  = protein_name[4].upper() if len(protein_name) >= 5 else "A"
+    # Try RCSB REST: entity instance → entity → sequence
+    try:
+        r = requests.get(
+            f"https://data.rcsb.org/rest/v1/core/polymer_entity_instance/{pdb_id}/{chain}",
+            timeout=10)
+        if r.ok:
+            entity_id = (r.json()
+                         .get("rcsb_polymer_entity_instance_container_identifiers", {})
+                         .get("entity_id"))
+            if entity_id:
+                r2 = requests.get(
+                    f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}",
+                    timeout=10)
+                if r2.ok:
+                    seq = (r2.json()
+                           .get("entity_poly", {})
+                           .get("pdbx_seq_one_letter_code_can", ""))
+                    seq = seq.replace("\n", "").strip()
+                    if seq:
+                        return seq
+    except Exception as e:
+        logger.warning(f"RCSB entity fetch failed for {protein_name}: {e}")
+    # Fallback: FASTA endpoint, pick the right chain
+    try:
+        r3 = requests.get(f"https://www.rcsb.org/fasta/entry/{pdb_id}/download", timeout=10)
+        if r3.ok:
+            seq, capture = "", False
+            for line in r3.text.strip().split("\n"):
+                if line.startswith(">"):
+                    capture = f"|Chain {chain}|" in line or f"Chain {chain}" in line
+                elif capture:
+                    seq += line.strip()
+            if seq:
+                return seq
+    except Exception as e:
+        logger.warning(f"RCSB FASTA fetch failed for {protein_name}: {e}")
+    return None
+# ── Coloured AA sequence renderer ────────────────────────────────────
+def render_sequence_aa(
+    sequence: str,
+    displacements: "np.ndarray | None" = None,
+    mutations: "np.ndarray | None" = None,
+    title: str = "Amino Acid Sequence",
+) -> None:
+    """
+    Render a coloured amino-acid strip in Streamlit.
+    - Tile colour = AA physicochemical property
+    - Opacity = predicted displacement magnitude (if provided)
+    - Red border = known mutation site (if provided)
+    """
+    if not sequence:
+        st.info("Sequence not available — fetching from RCSB failed.")
+        return
+    n = len(sequence)
+    max_disp = float(np.max(displacements)) if displacements is not None and len(displacements) > 0 else 1.0
+    st.markdown(f"**{title}** — {n} residues")
+    st.markdown("""
+<style>
+.seq-strip{display:flex;flex-wrap:wrap;gap:2px;margin-bottom:8px;}
+.aa-tile{width:22px;height:22px;border-radius:4px;display:flex;align-items:center;
+  justify-content:center;font-size:10px;font-weight:700;color:white;cursor:default;
+  border:2px solid transparent;transition:transform .1s;}
+.aa-tile:hover{transform:scale(1.35);z-index:10;}
+.aa-mut{border:2px solid #f43f5e!important;}
+</style>""", unsafe_allow_html=True)
+    tiles = []
+    for i, aa in enumerate(sequence):
+        color  = AA_COLORS.get(aa, "#475569")
+        h      = color.lstrip("#")
+        r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
+        alpha  = (0.35 + 0.65 * float(displacements[i]) / (max_disp + 1e-8)
+                  if displacements is not None and i < len(displacements) else 0.85)
+        bg     = f"rgba({r},{g},{b},{alpha:.2f})"
+        mut_cls = " aa-mut" if (mutations is not None and i < len(mutations) and mutations[i] > 0) else ""
+        tip    = f"{AA_LABELS.get(aa,aa)}{i+1}"
+        if displacements is not None and i < len(displacements):
+            tip += f" Δ={displacements[i]:.2f}Å"
+        if mutations is not None and i < len(mutations) and mutations[i] > 0:
+            tip += f" [{int(mutations[i])} variant(s)]"
+        tiles.append(f'<div class="aa-tile{mut_cls}" style="background:{bg}" title="{tip}">{aa}</div>')
+    st.markdown(f'<div class="seq-strip">{"".join(tiles)}</div>', unsafe_allow_html=True)
+    st.markdown("""
+<div style="display:flex;gap:12px;flex-wrap:wrap;font-size:11px;color:#94a3b8;margin-top:2px;">
+<span><span style="background:#7c3aed;padding:1px 5px;border-radius:3px;color:white">■</span> Hydrophobic</span>
+<span><span style="background:#0891b2;padding:1px 5px;border-radius:3px;color:white">■</span> (+) charged</span>
+<span><span style="background:#e11d48;padding:1px 5px;border-radius:3px;color:white">■</span> (−) charged</span>
+<span><span style="background:#0d9488;padding:1px 5px;border-radius:3px;color:white">■</span> Polar</span>
+<span><span style="background:#d97706;padding:1px 5px;border-radius:3px;color:white">■</span> Special</span>
+<span style="color:#f43f5e;">🔴 border = mutation site · opacity = predicted Δ</span>
+</div>""", unsafe_allow_html=True)
+# ── UniProt / EBI mutation fetching ──────────────────────────────────
 @st.cache_data(ttl=86400, show_spinner=False)
 def get_uniprot_id_from_pdb(pdb_id: str) -> str | None:
     """Map a 4-letter PDB ID to its primary UniProt accession using PDBe API."""
     pdb_id = pdb_id[:4].lower()
     try:
+        r = requests.get(f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_id}", timeout=10)
+        if r.ok:
+            data = r.json()
+            if pdb_id in data and "UniProt" in data[pdb_id]:
+                return list(data[pdb_id]["UniProt"].keys())[0]
     except Exception as e:
         logger.warning(f"PDBe Mapping failed for {pdb_id}: {e}")
     return None
 @st.cache_data(ttl=86400, show_spinner="Fetching evolutionary mutations...")
+def fetch_mutation_frequency(uniprot_id: str, seq_length: int) -> "np.ndarray | None":
+    """Fetch known natural variants from EBI and return count per residue."""
     try:
+        r = requests.get(
+            f"https://www.ebi.ac.uk/proteins/api/variation/{uniprot_id}",
+            headers={"Accept": "application/json"}, timeout=15)
         if not r.ok:
             return None
         freqs = np.zeros(seq_length)
+        for f in r.json().get("features", []):
             if f.get("type") == "VARIANT":
                 try:
+                    pos = int(f.get("begin", -1))
+                    if 1 <= pos <= seq_length:
+                        freqs[pos - 1] += 1
                 except ValueError:
                     continue
         return freqs
     except Exception as e:
         logger.warning(f"Variation API failed for {uniprot_id}: {e}")
     return None
 @st.cache_data(ttl=86400, show_spinner=False)
+def get_protein_mutations(protein_name: str, seq_length: int) -> "np.ndarray | None":
+    """End-to-end: PDB Name → UniProt → Mutation Frequencies."""
     if len(protein_name) >= 4:
+        uid = get_uniprot_id_from_pdb(protein_name[:4])
+        if uid:
+            return fetch_mutation_frequency(uid, seq_length)
     return None