| |
| import streamlit as st |
| import requests |
| import pandas as pd |
| import altair as alt |
| from datetime import datetime |
|
|
| st.set_page_config(layout="wide") |
| st.title("𧬠RCSB Protein Data Bank Visual Explorer") |
|
|
| |
| @st.cache_data(ttl=3600) |
| def fetch_entry_details(): |
| search_url = "https://search.rcsb.org/rcsbsearch/v2/query?json=" |
| entry_query = { |
| "query": { |
| "type": "terminal", |
| "service": "text", |
| "parameters": { |
| "attribute": "rcsb_accession_info.initial_release_date", |
| "operator": "exists" |
| } |
| }, |
| "return_type": "entry", |
| "request_options": { |
| "paginate": {"start": 0, "rows": 100}, |
| "results_content_type": ["experimental"], |
| "scoring_strategy": "combined" |
| } |
| } |
| res = requests.get(search_url + str(entry_query).replace("'", '"')) |
| entry_ids = [r['identifier'] for r in res.json().get("result_set", [])] |
| |
| entry_details = [] |
| for eid in entry_ids: |
| detail_url = f"https://data.rcsb.org/rest/v1/core/entry/{eid}" |
| r = requests.get(detail_url) |
| if r.status_code == 200: |
| d = r.json() |
| entry_details.append({ |
| "id": eid, |
| "release_date": d.get("rcsb_accession_info", {}).get("initial_release_date"), |
| "method": d.get("exptl", [{}])[0].get("method", "Unknown"), |
| "resolution": d.get("rcsb_entry_info", {}).get("resolution_combined", [None])[0], |
| "institution": d.get("rcsb_accession_info", {}).get("deposit_site", "Unknown") |
| }) |
| return pd.DataFrame(entry_details) |
|
|
| if st.button("π Refresh Data"): |
| st.cache_data.clear() |
| st.experimental_rerun() |
|
|
| df = fetch_entry_details() |
| df.dropna(subset=["release_date"], inplace=True) |
| df["year"] = pd.to_datetime(df["release_date"]).dt.year |
|
|
| |
| st.header("π Yearly Growth of Protein Structure Submissions") |
| method_filter = st.selectbox("Filter by Experimental Method", ["All"] + sorted(df["method"].unique())) |
| plot_df = df if method_filter == "All" else df[df["method"] == method_filter] |
| yearly_counts = plot_df.groupby("year").size().reset_index(name="count") |
| line_chart = alt.Chart(yearly_counts).mark_line(point=True).encode( |
| x="year:O", y="count:Q", |
| tooltip=["year", "count"] |
| ).properties(height=300) |
| st.altair_chart(line_chart, use_container_width=True) |
|
|
| |
| st.header("π Top Contributing Institutions") |
| top_insts = df[df["institution"] != "Unknown"]["institution"].value_counts().nlargest(10).reset_index() |
| top_insts.columns = ["institution", "count"] |
| bar_chart = alt.Chart(top_insts).mark_bar().encode( |
| x="count:Q", y=alt.Y("institution:N", sort="-x"), |
| color="institution:N", tooltip=["institution", "count"] |
| ).properties(height=300) |
| st.altair_chart(bar_chart, use_container_width=True) |
|
|
| |
| st.header("𧬠Resolution vs Method") |
| scatter_df = df.dropna(subset=["resolution"]) |
| scatter = alt.Chart(scatter_df).mark_circle(size=80).encode( |
| x=alt.X("year:O"), |
| y=alt.Y("resolution:Q"), |
| color="method:N", |
| tooltip=["id", "resolution", "method", "year"] |
| ).interactive().properties(height=300) |
| st.altair_chart(scatter, use_container_width=True) |
|
|
| |
| st.header("π Select Chart Type for Resolution Analysis") |
| chart_type = st.selectbox("Choose Chart Type", ["Bar", "Area", "Line"]) |
| res_df = df.dropna(subset=["resolution"]) |
| agg_df = res_df.groupby("method")["resolution"].mean().reset_index() |
| if chart_type == "Bar": |
| chart = alt.Chart(agg_df).mark_bar().encode(x="method:N", y="resolution:Q", color="method:N") |
| elif chart_type == "Area": |
| chart = alt.Chart(agg_df).mark_area().encode(x="method:N", y="resolution:Q", color="method:N") |
| else: |
| chart = alt.Chart(agg_df).mark_line(point=True).encode(x="method:N", y="resolution:Q", color="method:N") |
| st.altair_chart(chart, use_container_width=True) |
|
|
| |
| st.header("π§Ύ Latest Structures Table") |
| latest_df = df.sort_values("release_date", ascending=False).head(20) |
| st.dataframe(latest_df[["id", "release_date", "method", "resolution", "institution"]].reset_index(drop=True)) |
|
|
| |
| st.header("π Submissions Over Time by Method") |
| method_trend = df.groupby(["year", "method"]).size().reset_index(name="count") |
| trend_chart = alt.Chart(method_trend).mark_line(point=True).encode( |
| x="year:O", y="count:Q", color="method:N", tooltip=["year", "method", "count"] |
| ).properties(height=300) |
| st.altair_chart(trend_chart, use_container_width=True) |
|
|
| |
| st.header("π¦ Method Usage Distribution") |
| method_dist = df["method"].value_counts().reset_index() |
| method_dist.columns = ["method", "count"] |
| pie_chart = alt.Chart(method_dist).mark_arc().encode( |
| theta="count:Q", color="method:N", tooltip=["method", "count"] |
| ) |
| st.altair_chart(pie_chart, use_container_width=True) |
|
|
| |
| st.header("π― Resolution Distribution by Method") |
| box_data = df.dropna(subset=["resolution"]) |
| box = alt.Chart(box_data).mark_boxplot().encode( |
| x="method:N", y="resolution:Q", color="method:N" |
| ) |
| st.altair_chart(box, use_container_width=True) |
|
|
| |
| st.header("π Compare Two Entries") |
| entry_ids = df["id"].tolist() |
| col1, col2 = st.columns(2) |
| entry1 = col1.selectbox("Select Entry 1", entry_ids, index=0) |
| entry2 = col2.selectbox("Select Entry 2", entry_ids, index=1) |
| info1 = df[df["id"] == entry1].iloc[0] |
| info2 = df[df["id"] == entry2].iloc[0] |
|
|
| col1.subheader(f"Entry: {entry1}") |
| col1.write(info1) |
| col2.subheader(f"Entry: {entry2}") |
| col2.write(info2) |
|
|
| |
| st.header("π
Monthly Submissions (Last 12 Months)") |
| df["month"] = pd.to_datetime(df["release_date"]).dt.to_period("M").astype(str) |
| last_year_df = df[df["release_date"] >= (pd.to_datetime("today") - pd.DateOffset(months=12))] |
| monthly = last_year_df.groupby("month").size().reset_index(name="count") |
| month_chart = alt.Chart(monthly).mark_bar().encode( |
| x="month:O", y="count:Q", tooltip=["month", "count"] |
| ).properties(height=300) |
| st.altair_chart(month_chart, use_container_width=True) |
|
|