Dashboard / src /streamlit_app.py
DataWizard9742's picture
Update src/streamlit_app.py
e171752 verified
# app.py
import os
import pandas as pd
import numpy as np
import streamlit as st
import plotly.express as px
from st_aggrid import AgGrid, GridOptionsBuilder
# --- 1) Data Loading & Cleaning ---
def load_data(uploaded_file):
# Read & skip top‑3 metadata rows; drop the extra header row
df_raw = pd.read_excel(uploaded_file, sheet_name=0, skiprows=3)
df = df_raw.iloc[1:].reset_index(drop=True)
# Rename columns
df.columns = [
'S_No', 'District', 'Institution',
'V_Minority_S', 'V_Minority_A',
'V_NonMinority_S', 'V_NonMinority_A',
'Course',
'Inter1_Minority_S', 'Inter1_Minority_A',
'Inter1_NonMinority_S', 'Inter1_NonMinority_A'
]
# Drop helper serial column
df = df.drop(columns=['S_No'])
# Force numeric columns
num_cols = [
'V_Minority_S','V_Minority_A',
'V_NonMinority_S','V_NonMinority_A',
'Inter1_Minority_S','Inter1_Minority_A',
'Inter1_NonMinority_S','Inter1_NonMinority_A'
]
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')
# Coerce all other columns to plain Python strings
for c in df.columns:
if c not in num_cols:
df[c] = df[c].fillna("").astype(str)
return df
# --- 2) Streamlit App ---
def main():
st.set_page_config(page_title="TMREIS Admissions Dashboard", layout="wide")
st.title("📊 TMREIS Admissions & Vacancy Dashboard")
st.markdown(
"Upload a monthly admissions report, filter by district/course, and explore "
"KPIs, interactive tables, and rich visualizations."
)
# Sidebar: upload
uploaded = pd.ExcelFile('src/Dataset2.xlsx')
if not uploaded:
st.sidebar.info("Awaiting your Excel file…")
return
# Load data
df = load_data(uploaded)
# --- Sidebar Filters ---
districts = sorted(df['District'].unique().tolist())
selected_districts = st.sidebar.multiselect("Filter: District(s)", districts, default=districts)
courses = sorted(df['Course'].unique().tolist())
selected_course = st.sidebar.selectbox("Filter: Course", ["All"] + courses)
level = st.sidebar.radio("Select Level", ["Class V", "Inter 1"])
metric = st.sidebar.radio("Metric", ["Admission", "Vacancies"])
breakdown = st.sidebar.multiselect("Breakdown by", ["Minority", "Non-Minority"], default=["Minority", "Non-Minority"])
# Apply filters
df_f = df[df['District'].isin(selected_districts)].copy()
if selected_course != "All":
df_f = df_f[df_f['Course'] == selected_course]
# Determine columns for admissions vs sanctioned
if level == "Class V":
adm_cols = {"Minority": "V_Minority_A", "Non-Minority": "V_NonMinority_A"}
sanc_cols = {"Minority": "V_Minority_S", "Non-Minority": "V_NonMinority_S"}
else:
adm_cols = {"Minority": "Inter1_Minority_A", "Non-Minority": "Inter1_NonMinority_A"}
sanc_cols = {"Minority": "Inter1_Minority_S", "Non-Minority": "Inter1_NonMinority_S"}
# --- KPIs ---
st.subheader("Key Performance Indicators")
kpi_cols = st.columns(len(breakdown))
for idx, grp in enumerate(breakdown):
total_san = int(df_f[sanc_cols[grp]].sum())
total_adm = int(df_f[adm_cols[grp]].sum())
vac = total_san - total_adm
kpi_cols[idx].metric(f"{grp} Sanctioned", total_san)
kpi_cols[idx].metric(f"{grp} Admitted", total_adm, f"{vac} Vacancies")
# --- Interactive Table via AgGrid ---
st.subheader("Detailed Institution‑Level Data")
gb = GridOptionsBuilder.from_dataframe(df_f)
gb.configure_default_column(filter=True, sortable=True, resizable=True)
gb.configure_pagination(paginationAutoPageSize=True)
AgGrid(df_f, gridOptions=gb.build(), enable_enterprise_modules=False)
# --- 1) Admissions / Vacancies by District ---
st.subheader(f"{metric} by District")
if metric == "Admission":
summary = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum().reset_index()
fig1 = px.bar(
summary,
x="District",
y=[adm_cols[g] for g in breakdown],
barmode="group",
labels={"value":"Count","variable":"Category"},
title=f"{level} Admissions by District"
)
else:
sum_s = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum()
sum_a = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum()
vac_df = (sum_s - sum_a).reset_index()
vac_df.columns = ["District"] + breakdown
fig1 = px.bar(
vac_df,
x="District",
y=breakdown,
barmode="group",
labels={"value":"Vacancies","variable":"Category"},
title=f"{level} Vacancies by District"
)
st.plotly_chart(fig1, use_container_width=True)
# --- 2) Vacancy Rate Heatmap ---
st.subheader("Vacancy Rate Heatmap")
sum_san = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum()
sum_adm = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum()
vr_df = pd.DataFrame(index=sum_san.index)
for grp in breakdown:
vr_df[grp] = (sum_san[sanc_cols[grp]] - sum_adm[adm_cols[grp]]) / sum_san[sanc_cols[grp]].replace({0: np.nan})
fig_hm = px.imshow(
vr_df,
labels={"x":"Category","y":"District","color":"Vacancy Rate"},
text_auto=".0%",
aspect="auto",
color_continuous_scale="Reds",
title=f"{level} Vacancy Rate by District"
)
st.plotly_chart(fig_hm, use_container_width=True)
# --- 3) Overall Admitted vs Vacant Donut ---
st.subheader(f"{level} Seat Distribution")
total_san = df_f[[sanc_cols[grp] for grp in breakdown]].sum().sum()
total_adm = df_f[[adm_cols[grp] for grp in breakdown]].sum().sum()
pie_df = pd.DataFrame({
"Status": ["Admitted", "Vacant"],
"Count": [total_adm, total_san - total_adm]
})
fig_pie = px.pie(
pie_df,
names="Status",
values="Count",
hole=0.4,
title=f"{level}: Admitted vs Vacant"
)
st.plotly_chart(fig_pie, use_container_width=True)
# --- 4) Top 10 Institutions by Vacancies (H‑Bar) ---
st.subheader("Top 10 Institutions by Vacancies")
df_f["Vacancies"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1) \
- df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1)
top10 = df_f.nlargest(10, "Vacancies")[["Institution","Vacancies"]]
top10["Institution"] = top10["Institution"].astype(str)
fig_hbar = px.bar(
top10.sort_values("Vacancies"),
x="Vacancies",
y="Institution",
orientation="h",
labels={"Vacancies":"Vacant Seats","Institution":""},
title="Top 10 Institutions by Vacancies"
)
st.plotly_chart(fig_hbar, use_container_width=True)
# --- 5) Admission Efficiency Scatter (Bubble) ---
st.subheader("Sanctioned vs Admitted (Bubble = Vacancy Rate)")
# compute totals
df_f["Total_Sanctioned"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1)
df_f["Total_Admitted"] = df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1)
# vacancy rate, clipped at 0 so Plotly can use it as a marker size
df_f["Vacancy_Rate"] = (
(df_f["Total_Sanctioned"] - df_f["Total_Admitted"])
/ df_f["Total_Sanctioned"].replace({0: np.nan})
).clip(lower=0)
fig_sc = px.scatter(
df_f,
x="Total_Sanctioned",
y="Total_Admitted",
size="Vacancy_Rate", # now guaranteed ≥0
color="District",
hover_data=["Institution"],
labels={
"Total_Sanctioned": "Sanctioned Seats",
"Total_Admitted": "Admitted Seats",
"Vacancy_Rate": "Vacancy Rate"
},
title="Sanctioned vs Admitted (Bubble size = Vacancy Rate)"
)
st.plotly_chart(fig_sc, use_container_width=True)
# --- Optional LLM Q&A ---
if os.getenv("OPENAI_API_KEY"):
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
st.subheader("🤖 Ask the Dashboard (LLM Insight)")
q = st.text_input("Enter your question about this data:")
if q:
with st.spinner("Generating answer…"):
resp = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role":"system","content":"You are a senior data analyst."},
{"role":"user",
"content":(
f"Data summary: {df_f.describe().to_dict()}\n"
f"Question: {q}"
)
}
],
max_tokens=200
)
st.write(resp.choices[0].message.content)
if __name__ == "__main__":
main()