Valmbd commited on
Commit
7cff683
Β·
verified Β·
1 Parent(s): d925573

ATLAS integration: RCSB on-demand PDB + pair metadata (query/target conformations)

Browse files
app/pages/1_πŸ”_Explorer.py CHANGED
@@ -14,7 +14,10 @@ from app.utils.data_loader import (
14
  find_predictions_dir, load_prediction_index, load_modes, load_embeddings,
15
  load_ground_truth, load_pdb_text, PETIMOT_ROOT
16
  )
17
- from app.utils.bio_api import get_protein_mutations, get_sequence_from_pdb, render_sequence_aa
 
 
 
18
  from app.components.embedding_viewer import render_embedding_viewer
19
  from app.components.viewer_3d import render_motion_viewer, render_mode_comparison, render_deformed_viewer, render_animated_viewer, render_pred_vs_gt_viewer
20
  from app.components.sequence_viewer import render_sequence_viewer, render_displacement_chart
@@ -89,6 +92,33 @@ def render_protein_detail(pred_dir, gt_dir, protein_name, key_suffix="", compact
89
  if os.path.exists(pdb_path):
90
  with open(pdb_path) as f:
91
  pdb_text = f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  # Controls
94
  mode_idx = st.slider("Mode", 0, len(modes) - 1, 0, key=f"mode_{key_suffix}")
 
14
  find_predictions_dir, load_prediction_index, load_modes, load_embeddings,
15
  load_ground_truth, load_pdb_text, PETIMOT_ROOT
16
  )
17
+ from app.utils.bio_api import (
18
+ get_protein_mutations, get_sequence_from_pdb, render_sequence_aa,
19
+ fetch_pdb_structure, fetch_atlas_pair_info
20
+ )
21
  from app.components.embedding_viewer import render_embedding_viewer
22
  from app.components.viewer_3d import render_motion_viewer, render_mode_comparison, render_deformed_viewer, render_animated_viewer, render_pred_vs_gt_viewer
23
  from app.components.sequence_viewer import render_sequence_viewer, render_displacement_chart
 
92
  if os.path.exists(pdb_path):
93
  with open(pdb_path) as f:
94
  pdb_text = f.read()
95
+ else:
96
+ # Fallback: download from RCSB on-demand
97
+ # protein_name is like "1A3RH_5W23J" β€” use the query part (first)
98
+ query_part = protein_name.split("_")[0] if "_" in protein_name else protein_name
99
+ with st.spinner(f"Fetching structure {query_part[:4]} from RCSB..."):
100
+ pdb_text = fetch_pdb_structure(query_part)
101
+
102
+ # Show ATLAS pair metadata
103
+ if "_" in protein_name:
104
+ with st.expander("🧬 ATLAS conformational pair", expanded=False):
105
+ pair_info = fetch_atlas_pair_info(protein_name)
106
+ if pair_info:
107
+ _qc, _tc = st.columns(2)
108
+ q, t = pair_info.get("query", {}), pair_info.get("target", {})
109
+ with _qc:
110
+ st.markdown(f"**Query β€” {q.get('pdb','')} chain {q.get('chain','')}**")
111
+ st.caption(q.get('title','') or 'N/A')
112
+ st.caption(f"Resolution: {q.get('resolution','N/A')} Γ… | {q.get('year','')}")
113
+ st.markdown(f"[Open in RCSB πŸ”—](https://www.rcsb.org/structure/{q.get('pdb','')})",
114
+ unsafe_allow_html=False)
115
+ with _tc:
116
+ st.markdown(f"**Target (ground truth) β€” {t.get('pdb','')} chain {t.get('chain','')}**")
117
+ st.caption(t.get('title','') or 'N/A')
118
+ st.caption(f"Resolution: {t.get('resolution','N/A')} Γ… | {t.get('year','')}")
119
+ st.markdown(f"[Open in RCSB πŸ”—](https://www.rcsb.org/structure/{t.get('pdb','')})",
120
+ unsafe_allow_html=False)
121
+
122
 
123
  # Controls
124
  mode_idx = st.slider("Mode", 0, len(modes) - 1, 0, key=f"mode_{key_suffix}")
app/utils/bio_api.py CHANGED
@@ -187,3 +187,64 @@ def get_protein_mutations(protein_name: str, seq_length: int) -> "np.ndarray | N
187
  if uid:
188
  return fetch_mutation_frequency(uid, seq_length)
189
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  if uid:
188
  return fetch_mutation_frequency(uid, seq_length)
189
  return None
190
+
191
+
192
+ # ── PDB structure fetching (for 3D viewer & ATLAS pairs) ─────────────
193
+ @st.cache_data(ttl=86400, show_spinner=False)
194
+ def fetch_pdb_structure(protein_name: str) -> "str | None":
195
+ """
196
+ Download PDB text for a protein like '1A3RH' from RCSB.
197
+ Filters to the correct chain. Returns PDB text string or None.
198
+ """
199
+ if len(protein_name) < 4:
200
+ return None
201
+ pdb_id = protein_name[:4].upper()
202
+ chain = protein_name[4].upper() if len(protein_name) >= 5 else None
203
+ try:
204
+ r = requests.get(f"https://files.rcsb.org/download/{pdb_id}.pdb", timeout=20)
205
+ if not r.ok:
206
+ return None
207
+ pdb_text = r.text
208
+ if chain:
209
+ lines = [l for l in pdb_text.split("\n")
210
+ if (l.startswith("ATOM") and len(l) > 21 and l[21] == chain)
211
+ or l.startswith(("HEADER", "TITLE", "REMARK", "END"))]
212
+ pdb_text = "\n".join(lines)
213
+ return pdb_text
214
+ except Exception as e:
215
+ logger.warning(f"PDB download failed for {pdb_id}: {e}")
216
+ return None
217
+
218
+
219
+ @st.cache_data(ttl=86400, show_spinner=False)
220
+ def fetch_atlas_pair_info(pair_name: str) -> dict:
221
+ """
222
+ Parse an ATLAS-style pair '1A3RH_5W23J' into two conformations
223
+ and fetch metadata from RCSB for each.
224
+ Returns dict with query/target PDB IDs, chains, titles, and resolution.
225
+ """
226
+ parts = pair_name.split("_")
227
+ if len(parts) < 2:
228
+ return {}
229
+ query_name, target_name = parts[0], parts[1]
230
+ query_pdb, query_chain = query_name[:4].upper(), (query_name[4].upper() if len(query_name) >= 5 else "A")
231
+ target_pdb, target_chain = target_name[:4].upper(), (target_name[4].upper() if len(target_name) >= 5 else "A")
232
+
233
+ result = {
234
+ "pair": pair_name,
235
+ "query": {"pdb": query_pdb, "chain": query_chain, "name": query_name},
236
+ "target": {"pdb": target_pdb, "chain": target_chain, "name": target_name},
237
+ }
238
+ for key, pdb_id in [("query", query_pdb), ("target", target_pdb)]:
239
+ try:
240
+ r = requests.get(f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}", timeout=8)
241
+ if r.ok:
242
+ d = r.json()
243
+ result[key]["title"] = d.get("struct", {}).get("title", "")[:60]
244
+ result[key]["resolution"] = (d.get("rcsb_entry_info", {})
245
+ .get("resolution_combined", [None]) or [None])[0]
246
+ result[key]["method"] = d.get("rcsb_entry_info", {}).get("experimental_method", "")
247
+ result[key]["year"] = d.get("rcsb_accession_info", {}).get("deposit_date", "")[:4]
248
+ except Exception:
249
+ pass
250
+ return result