ATLAS integration: RCSB on-demand PDB + pair metadata (query/target conformations)
Browse files- app/pages/1_π_Explorer.py +31 -1
- app/utils/bio_api.py +61 -0
app/pages/1_π_Explorer.py
CHANGED
|
@@ -14,7 +14,10 @@ from app.utils.data_loader import (
|
|
| 14 |
find_predictions_dir, load_prediction_index, load_modes, load_embeddings,
|
| 15 |
load_ground_truth, load_pdb_text, PETIMOT_ROOT
|
| 16 |
)
|
| 17 |
-
from app.utils.bio_api import
|
|
|
|
|
|
|
|
|
|
| 18 |
from app.components.embedding_viewer import render_embedding_viewer
|
| 19 |
from app.components.viewer_3d import render_motion_viewer, render_mode_comparison, render_deformed_viewer, render_animated_viewer, render_pred_vs_gt_viewer
|
| 20 |
from app.components.sequence_viewer import render_sequence_viewer, render_displacement_chart
|
|
@@ -89,6 +92,33 @@ def render_protein_detail(pred_dir, gt_dir, protein_name, key_suffix="", compact
|
|
| 89 |
if os.path.exists(pdb_path):
|
| 90 |
with open(pdb_path) as f:
|
| 91 |
pdb_text = f.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# Controls
|
| 94 |
mode_idx = st.slider("Mode", 0, len(modes) - 1, 0, key=f"mode_{key_suffix}")
|
|
|
|
| 14 |
find_predictions_dir, load_prediction_index, load_modes, load_embeddings,
|
| 15 |
load_ground_truth, load_pdb_text, PETIMOT_ROOT
|
| 16 |
)
|
| 17 |
+
from app.utils.bio_api import (
|
| 18 |
+
get_protein_mutations, get_sequence_from_pdb, render_sequence_aa,
|
| 19 |
+
fetch_pdb_structure, fetch_atlas_pair_info
|
| 20 |
+
)
|
| 21 |
from app.components.embedding_viewer import render_embedding_viewer
|
| 22 |
from app.components.viewer_3d import render_motion_viewer, render_mode_comparison, render_deformed_viewer, render_animated_viewer, render_pred_vs_gt_viewer
|
| 23 |
from app.components.sequence_viewer import render_sequence_viewer, render_displacement_chart
|
|
|
|
| 92 |
if os.path.exists(pdb_path):
|
| 93 |
with open(pdb_path) as f:
|
| 94 |
pdb_text = f.read()
|
| 95 |
+
else:
|
| 96 |
+
# Fallback: download from RCSB on-demand
|
| 97 |
+
# protein_name is like "1A3RH_5W23J" β use the query part (first)
|
| 98 |
+
query_part = protein_name.split("_")[0] if "_" in protein_name else protein_name
|
| 99 |
+
with st.spinner(f"Fetching structure {query_part[:4]} from RCSB..."):
|
| 100 |
+
pdb_text = fetch_pdb_structure(query_part)
|
| 101 |
+
|
| 102 |
+
# Show ATLAS pair metadata
|
| 103 |
+
if "_" in protein_name:
|
| 104 |
+
with st.expander("𧬠ATLAS conformational pair", expanded=False):
|
| 105 |
+
pair_info = fetch_atlas_pair_info(protein_name)
|
| 106 |
+
if pair_info:
|
| 107 |
+
_qc, _tc = st.columns(2)
|
| 108 |
+
q, t = pair_info.get("query", {}), pair_info.get("target", {})
|
| 109 |
+
with _qc:
|
| 110 |
+
st.markdown(f"**Query β {q.get('pdb','')} chain {q.get('chain','')}**")
|
| 111 |
+
st.caption(q.get('title','') or 'N/A')
|
| 112 |
+
st.caption(f"Resolution: {q.get('resolution','N/A')} Γ
| {q.get('year','')}")
|
| 113 |
+
st.markdown(f"[Open in RCSB π](https://www.rcsb.org/structure/{q.get('pdb','')})",
|
| 114 |
+
unsafe_allow_html=False)
|
| 115 |
+
with _tc:
|
| 116 |
+
st.markdown(f"**Target (ground truth) β {t.get('pdb','')} chain {t.get('chain','')}**")
|
| 117 |
+
st.caption(t.get('title','') or 'N/A')
|
| 118 |
+
st.caption(f"Resolution: {t.get('resolution','N/A')} Γ
| {t.get('year','')}")
|
| 119 |
+
st.markdown(f"[Open in RCSB π](https://www.rcsb.org/structure/{t.get('pdb','')})",
|
| 120 |
+
unsafe_allow_html=False)
|
| 121 |
+
|
| 122 |
|
| 123 |
# Controls
|
| 124 |
mode_idx = st.slider("Mode", 0, len(modes) - 1, 0, key=f"mode_{key_suffix}")
|
app/utils/bio_api.py
CHANGED
|
@@ -187,3 +187,64 @@ def get_protein_mutations(protein_name: str, seq_length: int) -> "np.ndarray | N
|
|
| 187 |
if uid:
|
| 188 |
return fetch_mutation_frequency(uid, seq_length)
|
| 189 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
if uid:
|
| 188 |
return fetch_mutation_frequency(uid, seq_length)
|
| 189 |
return None
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
# ββ PDB structure fetching (for 3D viewer & ATLAS pairs) βββββββββββββ
|
| 193 |
+
@st.cache_data(ttl=86400, show_spinner=False)
|
| 194 |
+
def fetch_pdb_structure(protein_name: str) -> "str | None":
|
| 195 |
+
"""
|
| 196 |
+
Download PDB text for a protein like '1A3RH' from RCSB.
|
| 197 |
+
Filters to the correct chain. Returns PDB text string or None.
|
| 198 |
+
"""
|
| 199 |
+
if len(protein_name) < 4:
|
| 200 |
+
return None
|
| 201 |
+
pdb_id = protein_name[:4].upper()
|
| 202 |
+
chain = protein_name[4].upper() if len(protein_name) >= 5 else None
|
| 203 |
+
try:
|
| 204 |
+
r = requests.get(f"https://files.rcsb.org/download/{pdb_id}.pdb", timeout=20)
|
| 205 |
+
if not r.ok:
|
| 206 |
+
return None
|
| 207 |
+
pdb_text = r.text
|
| 208 |
+
if chain:
|
| 209 |
+
lines = [l for l in pdb_text.split("\n")
|
| 210 |
+
if (l.startswith("ATOM") and len(l) > 21 and l[21] == chain)
|
| 211 |
+
or l.startswith(("HEADER", "TITLE", "REMARK", "END"))]
|
| 212 |
+
pdb_text = "\n".join(lines)
|
| 213 |
+
return pdb_text
|
| 214 |
+
except Exception as e:
|
| 215 |
+
logger.warning(f"PDB download failed for {pdb_id}: {e}")
|
| 216 |
+
return None
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
@st.cache_data(ttl=86400, show_spinner=False)
|
| 220 |
+
def fetch_atlas_pair_info(pair_name: str) -> dict:
|
| 221 |
+
"""
|
| 222 |
+
Parse an ATLAS-style pair '1A3RH_5W23J' into two conformations
|
| 223 |
+
and fetch metadata from RCSB for each.
|
| 224 |
+
Returns dict with query/target PDB IDs, chains, titles, and resolution.
|
| 225 |
+
"""
|
| 226 |
+
parts = pair_name.split("_")
|
| 227 |
+
if len(parts) < 2:
|
| 228 |
+
return {}
|
| 229 |
+
query_name, target_name = parts[0], parts[1]
|
| 230 |
+
query_pdb, query_chain = query_name[:4].upper(), (query_name[4].upper() if len(query_name) >= 5 else "A")
|
| 231 |
+
target_pdb, target_chain = target_name[:4].upper(), (target_name[4].upper() if len(target_name) >= 5 else "A")
|
| 232 |
+
|
| 233 |
+
result = {
|
| 234 |
+
"pair": pair_name,
|
| 235 |
+
"query": {"pdb": query_pdb, "chain": query_chain, "name": query_name},
|
| 236 |
+
"target": {"pdb": target_pdb, "chain": target_chain, "name": target_name},
|
| 237 |
+
}
|
| 238 |
+
for key, pdb_id in [("query", query_pdb), ("target", target_pdb)]:
|
| 239 |
+
try:
|
| 240 |
+
r = requests.get(f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}", timeout=8)
|
| 241 |
+
if r.ok:
|
| 242 |
+
d = r.json()
|
| 243 |
+
result[key]["title"] = d.get("struct", {}).get("title", "")[:60]
|
| 244 |
+
result[key]["resolution"] = (d.get("rcsb_entry_info", {})
|
| 245 |
+
.get("resolution_combined", [None]) or [None])[0]
|
| 246 |
+
result[key]["method"] = d.get("rcsb_entry_info", {}).get("experimental_method", "")
|
| 247 |
+
result[key]["year"] = d.get("rcsb_accession_info", {}).get("deposit_date", "")[:4]
|
| 248 |
+
except Exception:
|
| 249 |
+
pass
|
| 250 |
+
return result
|