| """External biology APIs (EBI, UniProt, RCSB) for mutation and sequence tracking.""" |
| import requests |
| import numpy as np |
| import streamlit as st |
| import logging |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| AA_COLORS = { |
| |
| "A": "#7c3aed", "V": "#7c3aed", "I": "#6d28d9", "L": "#6d28d9", |
| "M": "#7c3aed", "F": "#5b21b6", "W": "#4c1d95", "P": "#8b5cf6", |
| |
| "K": "#0891b2", "R": "#0e7490", "H": "#06b6d4", |
| |
| "D": "#e11d48", "E": "#be123c", |
| |
| "S": "#0d9488", "T": "#0f766e", "N": "#115e59", "Q": "#134e4a", |
| |
| "C": "#d97706", "G": "#b45309", "Y": "#92400e", |
| |
| "X": "#475569", |
| } |
|
|
| AA_LABELS = { |
| "A": "Ala", "V": "Val", "I": "Ile", "L": "Leu", "M": "Met", |
| "F": "Phe", "W": "Trp", "P": "Pro", "K": "Lys", "R": "Arg", |
| "H": "His", "D": "Asp", "E": "Glu", "S": "Ser", "T": "Thr", |
| "N": "Asn", "Q": "Gln", "C": "Cys", "G": "Gly", "Y": "Tyr", "X": "Unk", |
| } |
|
|
|
|
| |
| @st.cache_data(ttl=86400, show_spinner=False) |
| def get_sequence_from_pdb(protein_name: str) -> str | None: |
| """Fetch amino acid sequence from RCSB for a protein name like '1HO5A'.""" |
| if len(protein_name) < 4: |
| return None |
| pdb_id = protein_name[:4].upper() |
| chain = protein_name[4].upper() if len(protein_name) >= 5 else "A" |
|
|
| |
| try: |
| r = requests.get( |
| f"https://data.rcsb.org/rest/v1/core/polymer_entity_instance/{pdb_id}/{chain}", |
| timeout=10) |
| if r.ok: |
| entity_id = (r.json() |
| .get("rcsb_polymer_entity_instance_container_identifiers", {}) |
| .get("entity_id")) |
| if entity_id: |
| r2 = requests.get( |
| f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}", |
| timeout=10) |
| if r2.ok: |
| seq = (r2.json() |
| .get("entity_poly", {}) |
| .get("pdbx_seq_one_letter_code_can", "")) |
| seq = seq.replace("\n", "").strip() |
| if seq: |
| return seq |
| except Exception as e: |
| logger.warning(f"RCSB entity fetch failed for {protein_name}: {e}") |
|
|
| |
| try: |
| r3 = requests.get(f"https://www.rcsb.org/fasta/entry/{pdb_id}/download", timeout=10) |
| if r3.ok: |
| seq, capture = "", False |
| for line in r3.text.strip().split("\n"): |
| if line.startswith(">"): |
| capture = f"|Chain {chain}|" in line or f"Chain {chain}" in line |
| elif capture: |
| seq += line.strip() |
| if seq: |
| return seq |
| except Exception as e: |
| logger.warning(f"RCSB FASTA fetch failed for {protein_name}: {e}") |
| return None |
|
|
|
|
| |
| def render_sequence_aa( |
| sequence: str, |
| displacements: "np.ndarray | None" = None, |
| mutations: "np.ndarray | None" = None, |
| title: str = "Amino Acid Sequence", |
| ) -> None: |
| """ |
| Render a coloured amino-acid strip in Streamlit. |
| - Tile colour = AA physicochemical property |
| - Opacity = predicted displacement magnitude (if provided) |
| - Red border = known mutation site (if provided) |
| """ |
| if not sequence: |
| st.info("Sequence not available β fetching from RCSB failed.") |
| return |
|
|
| n = len(sequence) |
| max_disp = float(np.max(displacements)) if displacements is not None and len(displacements) > 0 else 1.0 |
|
|
| st.markdown(f"**{title}** β {n} residues") |
| st.markdown(""" |
| <style> |
| .seq-strip{display:flex;flex-wrap:wrap;gap:2px;margin-bottom:8px;} |
| .aa-tile{width:22px;height:22px;border-radius:4px;display:flex;align-items:center; |
| justify-content:center;font-size:10px;font-weight:700;color:white;cursor:default; |
| border:2px solid transparent;transition:transform .1s;} |
| .aa-tile:hover{transform:scale(1.35);z-index:10;} |
| .aa-mut{border:2px solid #f43f5e!important;} |
| </style>""", unsafe_allow_html=True) |
|
|
| tiles = [] |
| for i, aa in enumerate(sequence): |
| color = AA_COLORS.get(aa, "#475569") |
| h = color.lstrip("#") |
| r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16) |
| alpha = (0.35 + 0.65 * float(displacements[i]) / (max_disp + 1e-8) |
| if displacements is not None and i < len(displacements) else 0.85) |
| bg = f"rgba({r},{g},{b},{alpha:.2f})" |
| mut_cls = " aa-mut" if (mutations is not None and i < len(mutations) and mutations[i] > 0) else "" |
| tip = f"{AA_LABELS.get(aa,aa)}{i+1}" |
| if displacements is not None and i < len(displacements): |
| tip += f" Ξ={displacements[i]:.2f}Γ
" |
| if mutations is not None and i < len(mutations) and mutations[i] > 0: |
| tip += f" [{int(mutations[i])} variant(s)]" |
| tiles.append(f'<div class="aa-tile{mut_cls}" style="background:{bg}" title="{tip}">{aa}</div>') |
|
|
| st.markdown(f'<div class="seq-strip">{"".join(tiles)}</div>', unsafe_allow_html=True) |
| st.markdown(""" |
| <div style="display:flex;gap:12px;flex-wrap:wrap;font-size:11px;color:#94a3b8;margin-top:2px;"> |
| <span><span style="background:#7c3aed;padding:1px 5px;border-radius:3px;color:white">β </span> Hydrophobic</span> |
| <span><span style="background:#0891b2;padding:1px 5px;border-radius:3px;color:white">β </span> (+) charged</span> |
| <span><span style="background:#e11d48;padding:1px 5px;border-radius:3px;color:white">β </span> (β) charged</span> |
| <span><span style="background:#0d9488;padding:1px 5px;border-radius:3px;color:white">β </span> Polar</span> |
| <span><span style="background:#d97706;padding:1px 5px;border-radius:3px;color:white">β </span> Special</span> |
| <span style="color:#f43f5e;">π΄ border = mutation site Β· opacity = predicted Ξ</span> |
| </div>""", unsafe_allow_html=True) |
|
|
|
|
| |
| @st.cache_data(ttl=86400, show_spinner=False) |
| def get_uniprot_id_from_pdb(pdb_id: str) -> str | None: |
| """Map a 4-letter PDB ID to its primary UniProt accession using PDBe API.""" |
| pdb_id = pdb_id[:4].lower() |
| try: |
| r = requests.get(f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_id}", timeout=10) |
| if r.ok: |
| data = r.json() |
| if pdb_id in data and "UniProt" in data[pdb_id]: |
| return list(data[pdb_id]["UniProt"].keys())[0] |
| except Exception as e: |
| logger.warning(f"PDBe Mapping failed for {pdb_id}: {e}") |
| return None |
|
|
|
|
| @st.cache_data(ttl=86400, show_spinner="Fetching evolutionary mutations...") |
| def fetch_mutation_frequency(uniprot_id: str, seq_length: int) -> "np.ndarray | None": |
| """Fetch known natural variants from EBI and return count per residue.""" |
| try: |
| r = requests.get( |
| f"https://www.ebi.ac.uk/proteins/api/variation/{uniprot_id}", |
| headers={"Accept": "application/json"}, timeout=15) |
| if not r.ok: |
| return None |
| freqs = np.zeros(seq_length) |
| for f in r.json().get("features", []): |
| if f.get("type") == "VARIANT": |
| try: |
| pos = int(f.get("begin", -1)) |
| if 1 <= pos <= seq_length: |
| freqs[pos - 1] += 1 |
| except ValueError: |
| continue |
| return freqs |
| except Exception as e: |
| logger.warning(f"Variation API failed for {uniprot_id}: {e}") |
| return None |
|
|
|
|
| @st.cache_data(ttl=86400, show_spinner=False) |
| def get_protein_mutations(protein_name: str, seq_length: int) -> "np.ndarray | None": |
| """End-to-end: PDB Name β UniProt β Mutation Frequencies.""" |
| if len(protein_name) >= 4: |
| uid = get_uniprot_id_from_pdb(protein_name[:4]) |
| if uid: |
| return fetch_mutation_frequency(uid, seq_length) |
| return None |
|
|
|
|
| |
| @st.cache_data(ttl=86400, show_spinner=False) |
| def fetch_pdb_structure(protein_name: str) -> "str | None": |
| """ |
| Download PDB text for a protein like '1A3RH' from RCSB. |
| Filters to the correct chain. Returns PDB text string or None. |
| """ |
| if len(protein_name) < 4: |
| return None |
| pdb_id = protein_name[:4].upper() |
| chain = protein_name[4].upper() if len(protein_name) >= 5 else None |
| try: |
| r = requests.get(f"https://files.rcsb.org/download/{pdb_id}.pdb", timeout=20) |
| if not r.ok: |
| return None |
| pdb_text = r.text |
| if chain: |
| lines = [l for l in pdb_text.split("\n") |
| if (l.startswith("ATOM") and len(l) > 21 and l[21] == chain) |
| or l.startswith(("HEADER", "TITLE", "REMARK", "END"))] |
| pdb_text = "\n".join(lines) |
| return pdb_text |
| except Exception as e: |
| logger.warning(f"PDB download failed for {pdb_id}: {e}") |
| return None |
|
|
|
|
| @st.cache_data(ttl=86400, show_spinner=False) |
| def fetch_atlas_pair_info(pair_name: str) -> dict: |
| """ |
| Parse an ATLAS-style pair '1A3RH_5W23J' into two conformations |
| and fetch metadata from RCSB for each. |
| Returns dict with query/target PDB IDs, chains, titles, and resolution. |
| """ |
| parts = pair_name.split("_") |
| if len(parts) < 2: |
| return {} |
| query_name, target_name = parts[0], parts[1] |
| query_pdb, query_chain = query_name[:4].upper(), (query_name[4].upper() if len(query_name) >= 5 else "A") |
| target_pdb, target_chain = target_name[:4].upper(), (target_name[4].upper() if len(target_name) >= 5 else "A") |
|
|
| result = { |
| "pair": pair_name, |
| "query": {"pdb": query_pdb, "chain": query_chain, "name": query_name}, |
| "target": {"pdb": target_pdb, "chain": target_chain, "name": target_name}, |
| } |
| for key, pdb_id in [("query", query_pdb), ("target", target_pdb)]: |
| try: |
| r = requests.get(f"https://data.rcsb.org/rest/v1/core/entry/{pdb_id}", timeout=8) |
| if r.ok: |
| d = r.json() |
| result[key]["title"] = d.get("struct", {}).get("title", "")[:60] |
| result[key]["resolution"] = (d.get("rcsb_entry_info", {}) |
| .get("resolution_combined", [None]) or [None])[0] |
| result[key]["method"] = d.get("rcsb_entry_info", {}).get("experimental_method", "") |
| result[key]["year"] = d.get("rcsb_accession_info", {}).get("deposit_date", "")[:4] |
| except Exception: |
| pass |
| return result |
|
|