Spaces:
Sleeping
Sleeping
| # app.py | |
| import os | |
| import pandas as pd | |
| import numpy as np | |
| import streamlit as st | |
| import plotly.express as px | |
| from st_aggrid import AgGrid, GridOptionsBuilder | |
| # --- 1) Data Loading & Cleaning --- | |
| def load_data(uploaded_file): | |
| # Read & skip top‑3 metadata rows; drop the extra header row | |
| df_raw = pd.ExcelFile(uploaded_file, sheet_name=0, skiprows=3) | |
| df = df_raw.iloc[1:].reset_index(drop=True) | |
| # Rename columns | |
| df.columns = [ | |
| 'S_No', 'District', 'Institution', | |
| 'V_Minority_S', 'V_Minority_A', | |
| 'V_NonMinority_S', 'V_NonMinority_A', | |
| 'Course', | |
| 'Inter1_Minority_S', 'Inter1_Minority_A', | |
| 'Inter1_NonMinority_S', 'Inter1_NonMinority_A' | |
| ] | |
| # Drop helper serial column | |
| df = df.drop(columns=['S_No']) | |
| # Force numeric columns | |
| num_cols = [ | |
| 'V_Minority_S','V_Minority_A', | |
| 'V_NonMinority_S','V_NonMinority_A', | |
| 'Inter1_Minority_S','Inter1_Minority_A', | |
| 'Inter1_NonMinority_S','Inter1_NonMinority_A' | |
| ] | |
| df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce') | |
| # Coerce all other columns to plain Python strings | |
| for c in df.columns: | |
| if c not in num_cols: | |
| df[c] = df[c].fillna("").astype(str) | |
| return df | |
| # --- 2) Streamlit App --- | |
| def main(): | |
| st.set_page_config(page_title="TMREIS Admissions Dashboard", layout="wide") | |
| st.title("📊 TMREIS Admissions & Vacancy Dashboard") | |
| st.markdown( | |
| "Upload a monthly admissions report, filter by district/course, and explore " | |
| "KPIs, interactive tables, and rich visualizations." | |
| ) | |
| # Sidebar: upload | |
| uploaded = pd.ExcelFile('Dataset.xlsx') | |
| if not uploaded: | |
| st.sidebar.info("Awaiting your Excel file…") | |
| return | |
| # Load data | |
| df = load_data(uploaded) | |
| # --- Sidebar Filters --- | |
| districts = sorted(df['District'].unique().tolist()) | |
| selected_districts = st.sidebar.multiselect("Filter: District(s)", districts, default=districts) | |
| courses = sorted(df['Course'].unique().tolist()) | |
| selected_course = st.sidebar.selectbox("Filter: Course", ["All"] + courses) | |
| level = st.sidebar.radio("Select Level", ["Class V", "Inter 1"]) | |
| metric = st.sidebar.radio("Metric", ["Admission", "Vacancies"]) | |
| breakdown = st.sidebar.multiselect("Breakdown by", ["Minority", "Non-Minority"], default=["Minority", "Non-Minority"]) | |
| # Apply filters | |
| df_f = df[df['District'].isin(selected_districts)].copy() | |
| if selected_course != "All": | |
| df_f = df_f[df_f['Course'] == selected_course] | |
| # Determine columns for admissions vs sanctioned | |
| if level == "Class V": | |
| adm_cols = {"Minority": "V_Minority_A", "Non-Minority": "V_NonMinority_A"} | |
| sanc_cols = {"Minority": "V_Minority_S", "Non-Minority": "V_NonMinority_S"} | |
| else: | |
| adm_cols = {"Minority": "Inter1_Minority_A", "Non-Minority": "Inter1_NonMinority_A"} | |
| sanc_cols = {"Minority": "Inter1_Minority_S", "Non-Minority": "Inter1_NonMinority_S"} | |
| # --- KPIs --- | |
| st.subheader("Key Performance Indicators") | |
| kpi_cols = st.columns(len(breakdown)) | |
| for idx, grp in enumerate(breakdown): | |
| total_san = int(df_f[sanc_cols[grp]].sum()) | |
| total_adm = int(df_f[adm_cols[grp]].sum()) | |
| vac = total_san - total_adm | |
| kpi_cols[idx].metric(f"{grp} Sanctioned", total_san) | |
| kpi_cols[idx].metric(f"{grp} Admitted", total_adm, f"{vac} Vacancies") | |
| # --- Interactive Table via AgGrid --- | |
| st.subheader("Detailed Institution‑Level Data") | |
| gb = GridOptionsBuilder.from_dataframe(df_f) | |
| gb.configure_default_column(filter=True, sortable=True, resizable=True) | |
| gb.configure_pagination(paginationAutoPageSize=True) | |
| AgGrid(df_f, gridOptions=gb.build(), enable_enterprise_modules=False) | |
| # --- 1) Admissions / Vacancies by District --- | |
| st.subheader(f"{metric} by District") | |
| if metric == "Admission": | |
| summary = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum().reset_index() | |
| fig1 = px.bar( | |
| summary, | |
| x="District", | |
| y=[adm_cols[g] for g in breakdown], | |
| barmode="group", | |
| labels={"value":"Count","variable":"Category"}, | |
| title=f"{level} Admissions by District" | |
| ) | |
| else: | |
| sum_s = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum() | |
| sum_a = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum() | |
| vac_df = (sum_s - sum_a).reset_index() | |
| vac_df.columns = ["District"] + breakdown | |
| fig1 = px.bar( | |
| vac_df, | |
| x="District", | |
| y=breakdown, | |
| barmode="group", | |
| labels={"value":"Vacancies","variable":"Category"}, | |
| title=f"{level} Vacancies by District" | |
| ) | |
| st.plotly_chart(fig1, use_container_width=True) | |
| # --- 2) Vacancy Rate Heatmap --- | |
| st.subheader("Vacancy Rate Heatmap") | |
| sum_san = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum() | |
| sum_adm = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum() | |
| vr_df = pd.DataFrame(index=sum_san.index) | |
| for grp in breakdown: | |
| vr_df[grp] = (sum_san[sanc_cols[grp]] - sum_adm[adm_cols[grp]]) / sum_san[sanc_cols[grp]].replace({0: np.nan}) | |
| fig_hm = px.imshow( | |
| vr_df, | |
| labels={"x":"Category","y":"District","color":"Vacancy Rate"}, | |
| text_auto=".0%", | |
| aspect="auto", | |
| color_continuous_scale="Reds", | |
| title=f"{level} Vacancy Rate by District" | |
| ) | |
| st.plotly_chart(fig_hm, use_container_width=True) | |
| # --- 3) Overall Admitted vs Vacant Donut --- | |
| st.subheader(f"{level} Seat Distribution") | |
| total_san = df_f[[sanc_cols[grp] for grp in breakdown]].sum().sum() | |
| total_adm = df_f[[adm_cols[grp] for grp in breakdown]].sum().sum() | |
| pie_df = pd.DataFrame({ | |
| "Status": ["Admitted", "Vacant"], | |
| "Count": [total_adm, total_san - total_adm] | |
| }) | |
| fig_pie = px.pie( | |
| pie_df, | |
| names="Status", | |
| values="Count", | |
| hole=0.4, | |
| title=f"{level}: Admitted vs Vacant" | |
| ) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| # --- 4) Top 10 Institutions by Vacancies (H‑Bar) --- | |
| st.subheader("Top 10 Institutions by Vacancies") | |
| df_f["Vacancies"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1) \ | |
| - df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1) | |
| top10 = df_f.nlargest(10, "Vacancies")[["Institution","Vacancies"]] | |
| top10["Institution"] = top10["Institution"].astype(str) | |
| fig_hbar = px.bar( | |
| top10.sort_values("Vacancies"), | |
| x="Vacancies", | |
| y="Institution", | |
| orientation="h", | |
| labels={"Vacancies":"Vacant Seats","Institution":""}, | |
| title="Top 10 Institutions by Vacancies" | |
| ) | |
| st.plotly_chart(fig_hbar, use_container_width=True) | |
| # --- 5) Admission Efficiency Scatter (Bubble) --- | |
| st.subheader("Sanctioned vs Admitted (Bubble = Vacancy Rate)") | |
| # compute totals | |
| df_f["Total_Sanctioned"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1) | |
| df_f["Total_Admitted"] = df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1) | |
| # vacancy rate, clipped at 0 so Plotly can use it as a marker size | |
| df_f["Vacancy_Rate"] = ( | |
| (df_f["Total_Sanctioned"] - df_f["Total_Admitted"]) | |
| / df_f["Total_Sanctioned"].replace({0: np.nan}) | |
| ).clip(lower=0) | |
| fig_sc = px.scatter( | |
| df_f, | |
| x="Total_Sanctioned", | |
| y="Total_Admitted", | |
| size="Vacancy_Rate", # now guaranteed ≥0 | |
| color="District", | |
| hover_data=["Institution"], | |
| labels={ | |
| "Total_Sanctioned": "Sanctioned Seats", | |
| "Total_Admitted": "Admitted Seats", | |
| "Vacancy_Rate": "Vacancy Rate" | |
| }, | |
| title="Sanctioned vs Admitted (Bubble size = Vacancy Rate)" | |
| ) | |
| st.plotly_chart(fig_sc, use_container_width=True) | |
| # --- Optional LLM Q&A --- | |
| if os.getenv("OPENAI_API_KEY"): | |
| import openai | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| st.subheader("🤖 Ask the Dashboard (LLM Insight)") | |
| q = st.text_input("Enter your question about this data:") | |
| if q: | |
| with st.spinner("Generating answer…"): | |
| resp = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role":"system","content":"You are a senior data analyst."}, | |
| {"role":"user", | |
| "content":( | |
| f"Data summary: {df_f.describe().to_dict()}\n" | |
| f"Question: {q}" | |
| ) | |
| } | |
| ], | |
| max_tokens=200 | |
| ) | |
| st.write(resp.choices[0].message.content) | |
| if __name__ == "__main__": | |
| main() | |