rcsd / app.py
SANIDHYAG's picture
Update app.py
ea6493b verified
# app.py
import streamlit as st
import requests
import pandas as pd
import altair as alt
from datetime import datetime
st.set_page_config(layout="wide")
st.title("🧬 RCSB Protein Data Bank Visual Explorer")
# ------------------ REFRESH DATA BUTTON ------------------ #
@st.cache_data(ttl=3600)
def fetch_entry_details():
search_url = "https://search.rcsb.org/rcsbsearch/v2/query?json="
entry_query = {
"query": {
"type": "terminal",
"service": "text",
"parameters": {
"attribute": "rcsb_accession_info.initial_release_date",
"operator": "exists"
}
},
"return_type": "entry",
"request_options": {
"paginate": {"start": 0, "rows": 100},
"results_content_type": ["experimental"],
"scoring_strategy": "combined"
}
}
res = requests.get(search_url + str(entry_query).replace("'", '"'))
entry_ids = [r['identifier'] for r in res.json().get("result_set", [])]
entry_details = []
for eid in entry_ids:
detail_url = f"https://data.rcsb.org/rest/v1/core/entry/{eid}"
r = requests.get(detail_url)
if r.status_code == 200:
d = r.json()
entry_details.append({
"id": eid,
"release_date": d.get("rcsb_accession_info", {}).get("initial_release_date"),
"method": d.get("exptl", [{}])[0].get("method", "Unknown"),
"resolution": d.get("rcsb_entry_info", {}).get("resolution_combined", [None])[0],
"institution": d.get("rcsb_accession_info", {}).get("deposit_site", "Unknown")
})
return pd.DataFrame(entry_details)
if st.button("πŸ” Refresh Data"):
st.cache_data.clear()
st.experimental_rerun()
df = fetch_entry_details()
df.dropna(subset=["release_date"], inplace=True)
df["year"] = pd.to_datetime(df["release_date"]).dt.year
# ------------------ VISUALIZATION 1 ------------------ #
st.header("πŸ“ˆ Yearly Growth of Protein Structure Submissions")
method_filter = st.selectbox("Filter by Experimental Method", ["All"] + sorted(df["method"].unique()))
plot_df = df if method_filter == "All" else df[df["method"] == method_filter]
yearly_counts = plot_df.groupby("year").size().reset_index(name="count")
line_chart = alt.Chart(yearly_counts).mark_line(point=True).encode(
x="year:O", y="count:Q",
tooltip=["year", "count"]
).properties(height=300)
st.altair_chart(line_chart, use_container_width=True)
# ------------------ VISUALIZATION 2 ------------------ #
st.header("🌍 Top Contributing Institutions")
top_insts = df[df["institution"] != "Unknown"]["institution"].value_counts().nlargest(10).reset_index()
top_insts.columns = ["institution", "count"]
bar_chart = alt.Chart(top_insts).mark_bar().encode(
x="count:Q", y=alt.Y("institution:N", sort="-x"),
color="institution:N", tooltip=["institution", "count"]
).properties(height=300)
st.altair_chart(bar_chart, use_container_width=True)
# ------------------ VISUALIZATION 3 ------------------ #
st.header("🧬 Resolution vs Method")
scatter_df = df.dropna(subset=["resolution"])
scatter = alt.Chart(scatter_df).mark_circle(size=80).encode(
x=alt.X("year:O"),
y=alt.Y("resolution:Q"),
color="method:N",
tooltip=["id", "resolution", "method", "year"]
).interactive().properties(height=300)
st.altair_chart(scatter, use_container_width=True)
# ------------------ VISUALIZATION 4 ------------------ #
st.header("πŸ“Š Select Chart Type for Resolution Analysis")
chart_type = st.selectbox("Choose Chart Type", ["Bar", "Area", "Line"])
res_df = df.dropna(subset=["resolution"])
agg_df = res_df.groupby("method")["resolution"].mean().reset_index()
if chart_type == "Bar":
chart = alt.Chart(agg_df).mark_bar().encode(x="method:N", y="resolution:Q", color="method:N")
elif chart_type == "Area":
chart = alt.Chart(agg_df).mark_area().encode(x="method:N", y="resolution:Q", color="method:N")
else:
chart = alt.Chart(agg_df).mark_line(point=True).encode(x="method:N", y="resolution:Q", color="method:N")
st.altair_chart(chart, use_container_width=True)
# ------------------ VISUALIZATION 5 ------------------ #
st.header("🧾 Latest Structures Table")
latest_df = df.sort_values("release_date", ascending=False).head(20)
st.dataframe(latest_df[["id", "release_date", "method", "resolution", "institution"]].reset_index(drop=True))
# ------------------ VISUALIZATION 6 ------------------ #
st.header("πŸ“ˆ Submissions Over Time by Method")
method_trend = df.groupby(["year", "method"]).size().reset_index(name="count")
trend_chart = alt.Chart(method_trend).mark_line(point=True).encode(
x="year:O", y="count:Q", color="method:N", tooltip=["year", "method", "count"]
).properties(height=300)
st.altair_chart(trend_chart, use_container_width=True)
# ------------------ VISUALIZATION 7 ------------------ #
st.header("πŸ“¦ Method Usage Distribution")
method_dist = df["method"].value_counts().reset_index()
method_dist.columns = ["method", "count"]
pie_chart = alt.Chart(method_dist).mark_arc().encode(
theta="count:Q", color="method:N", tooltip=["method", "count"]
)
st.altair_chart(pie_chart, use_container_width=True)
# ------------------ VISUALIZATION 8 ------------------ #
st.header("🎯 Resolution Distribution by Method")
box_data = df.dropna(subset=["resolution"])
box = alt.Chart(box_data).mark_boxplot().encode(
x="method:N", y="resolution:Q", color="method:N"
)
st.altair_chart(box, use_container_width=True)
# ------------------ VISUALIZATION 9 ------------------ #
st.header("πŸ” Compare Two Entries")
entry_ids = df["id"].tolist()
col1, col2 = st.columns(2)
entry1 = col1.selectbox("Select Entry 1", entry_ids, index=0)
entry2 = col2.selectbox("Select Entry 2", entry_ids, index=1)
info1 = df[df["id"] == entry1].iloc[0]
info2 = df[df["id"] == entry2].iloc[0]
col1.subheader(f"Entry: {entry1}")
col1.write(info1)
col2.subheader(f"Entry: {entry2}")
col2.write(info2)
# ------------------ VISUALIZATION 10 ------------------ #
st.header("πŸ“… Monthly Submissions (Last 12 Months)")
df["month"] = pd.to_datetime(df["release_date"]).dt.to_period("M").astype(str)
last_year_df = df[df["release_date"] >= (pd.to_datetime("today") - pd.DateOffset(months=12))]
monthly = last_year_df.groupby("month").size().reset_index(name="count")
month_chart = alt.Chart(monthly).mark_bar().encode(
x="month:O", y="count:Q", tooltip=["month", "count"]
).properties(height=300)
st.altair_chart(month_chart, use_container_width=True)