# app.py import os import pandas as pd import numpy as np import streamlit as st import plotly.express as px from st_aggrid import AgGrid, GridOptionsBuilder # --- 1) Data Loading & Cleaning --- def load_data(uploaded_file): # Read & skip top‑3 metadata rows; drop the extra header row df_raw = pd.ExcelFile(uploaded_file, sheet_name=0, skiprows=3) df = df_raw.iloc[1:].reset_index(drop=True) # Rename columns df.columns = [ 'S_No', 'District', 'Institution', 'V_Minority_S', 'V_Minority_A', 'V_NonMinority_S', 'V_NonMinority_A', 'Course', 'Inter1_Minority_S', 'Inter1_Minority_A', 'Inter1_NonMinority_S', 'Inter1_NonMinority_A' ] # Drop helper serial column df = df.drop(columns=['S_No']) # Force numeric columns num_cols = [ 'V_Minority_S','V_Minority_A', 'V_NonMinority_S','V_NonMinority_A', 'Inter1_Minority_S','Inter1_Minority_A', 'Inter1_NonMinority_S','Inter1_NonMinority_A' ] df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce') # Coerce all other columns to plain Python strings for c in df.columns: if c not in num_cols: df[c] = df[c].fillna("").astype(str) return df # --- 2) Streamlit App --- def main(): st.set_page_config(page_title="TMREIS Admissions Dashboard", layout="wide") st.title("📊 TMREIS Admissions & Vacancy Dashboard") st.markdown( "Upload a monthly admissions report, filter by district/course, and explore " "KPIs, interactive tables, and rich visualizations." ) # Sidebar: upload uploaded = pd.ExcelFile('Dataset.xlsx') if not uploaded: st.sidebar.info("Awaiting your Excel file…") return # Load data df = load_data(uploaded) # --- Sidebar Filters --- districts = sorted(df['District'].unique().tolist()) selected_districts = st.sidebar.multiselect("Filter: District(s)", districts, default=districts) courses = sorted(df['Course'].unique().tolist()) selected_course = st.sidebar.selectbox("Filter: Course", ["All"] + courses) level = st.sidebar.radio("Select Level", ["Class V", "Inter 1"]) metric = st.sidebar.radio("Metric", ["Admission", "Vacancies"]) breakdown = st.sidebar.multiselect("Breakdown by", ["Minority", "Non-Minority"], default=["Minority", "Non-Minority"]) # Apply filters df_f = df[df['District'].isin(selected_districts)].copy() if selected_course != "All": df_f = df_f[df_f['Course'] == selected_course] # Determine columns for admissions vs sanctioned if level == "Class V": adm_cols = {"Minority": "V_Minority_A", "Non-Minority": "V_NonMinority_A"} sanc_cols = {"Minority": "V_Minority_S", "Non-Minority": "V_NonMinority_S"} else: adm_cols = {"Minority": "Inter1_Minority_A", "Non-Minority": "Inter1_NonMinority_A"} sanc_cols = {"Minority": "Inter1_Minority_S", "Non-Minority": "Inter1_NonMinority_S"} # --- KPIs --- st.subheader("Key Performance Indicators") kpi_cols = st.columns(len(breakdown)) for idx, grp in enumerate(breakdown): total_san = int(df_f[sanc_cols[grp]].sum()) total_adm = int(df_f[adm_cols[grp]].sum()) vac = total_san - total_adm kpi_cols[idx].metric(f"{grp} Sanctioned", total_san) kpi_cols[idx].metric(f"{grp} Admitted", total_adm, f"{vac} Vacancies") # --- Interactive Table via AgGrid --- st.subheader("Detailed Institution‑Level Data") gb = GridOptionsBuilder.from_dataframe(df_f) gb.configure_default_column(filter=True, sortable=True, resizable=True) gb.configure_pagination(paginationAutoPageSize=True) AgGrid(df_f, gridOptions=gb.build(), enable_enterprise_modules=False) # --- 1) Admissions / Vacancies by District --- st.subheader(f"{metric} by District") if metric == "Admission": summary = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum().reset_index() fig1 = px.bar( summary, x="District", y=[adm_cols[g] for g in breakdown], barmode="group", labels={"value":"Count","variable":"Category"}, title=f"{level} Admissions by District" ) else: sum_s = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum() sum_a = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum() vac_df = (sum_s - sum_a).reset_index() vac_df.columns = ["District"] + breakdown fig1 = px.bar( vac_df, x="District", y=breakdown, barmode="group", labels={"value":"Vacancies","variable":"Category"}, title=f"{level} Vacancies by District" ) st.plotly_chart(fig1, use_container_width=True) # --- 2) Vacancy Rate Heatmap --- st.subheader("Vacancy Rate Heatmap") sum_san = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum() sum_adm = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum() vr_df = pd.DataFrame(index=sum_san.index) for grp in breakdown: vr_df[grp] = (sum_san[sanc_cols[grp]] - sum_adm[adm_cols[grp]]) / sum_san[sanc_cols[grp]].replace({0: np.nan}) fig_hm = px.imshow( vr_df, labels={"x":"Category","y":"District","color":"Vacancy Rate"}, text_auto=".0%", aspect="auto", color_continuous_scale="Reds", title=f"{level} Vacancy Rate by District" ) st.plotly_chart(fig_hm, use_container_width=True) # --- 3) Overall Admitted vs Vacant Donut --- st.subheader(f"{level} Seat Distribution") total_san = df_f[[sanc_cols[grp] for grp in breakdown]].sum().sum() total_adm = df_f[[adm_cols[grp] for grp in breakdown]].sum().sum() pie_df = pd.DataFrame({ "Status": ["Admitted", "Vacant"], "Count": [total_adm, total_san - total_adm] }) fig_pie = px.pie( pie_df, names="Status", values="Count", hole=0.4, title=f"{level}: Admitted vs Vacant" ) st.plotly_chart(fig_pie, use_container_width=True) # --- 4) Top 10 Institutions by Vacancies (H‑Bar) --- st.subheader("Top 10 Institutions by Vacancies") df_f["Vacancies"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1) \ - df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1) top10 = df_f.nlargest(10, "Vacancies")[["Institution","Vacancies"]] top10["Institution"] = top10["Institution"].astype(str) fig_hbar = px.bar( top10.sort_values("Vacancies"), x="Vacancies", y="Institution", orientation="h", labels={"Vacancies":"Vacant Seats","Institution":""}, title="Top 10 Institutions by Vacancies" ) st.plotly_chart(fig_hbar, use_container_width=True) # --- 5) Admission Efficiency Scatter (Bubble) --- st.subheader("Sanctioned vs Admitted (Bubble = Vacancy Rate)") # compute totals df_f["Total_Sanctioned"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1) df_f["Total_Admitted"] = df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1) # vacancy rate, clipped at 0 so Plotly can use it as a marker size df_f["Vacancy_Rate"] = ( (df_f["Total_Sanctioned"] - df_f["Total_Admitted"]) / df_f["Total_Sanctioned"].replace({0: np.nan}) ).clip(lower=0) fig_sc = px.scatter( df_f, x="Total_Sanctioned", y="Total_Admitted", size="Vacancy_Rate", # now guaranteed ≥0 color="District", hover_data=["Institution"], labels={ "Total_Sanctioned": "Sanctioned Seats", "Total_Admitted": "Admitted Seats", "Vacancy_Rate": "Vacancy Rate" }, title="Sanctioned vs Admitted (Bubble size = Vacancy Rate)" ) st.plotly_chart(fig_sc, use_container_width=True) # --- Optional LLM Q&A --- if os.getenv("OPENAI_API_KEY"): import openai openai.api_key = os.getenv("OPENAI_API_KEY") st.subheader("🤖 Ask the Dashboard (LLM Insight)") q = st.text_input("Enter your question about this data:") if q: with st.spinner("Generating answer…"): resp = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role":"system","content":"You are a senior data analyst."}, {"role":"user", "content":( f"Data summary: {df_f.describe().to_dict()}\n" f"Question: {q}" ) } ], max_tokens=200 ) st.write(resp.choices[0].message.content) if __name__ == "__main__": main()