DataWizard9742 commited on
Commit
ffe8b32
·
verified ·
1 Parent(s): b72b7cb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -0
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import os
4
+ import pandas as pd
5
+ import numpy as np
6
+ import streamlit as st
7
+ import plotly.express as px
8
+ from st_aggrid import AgGrid, GridOptionsBuilder
9
+
10
+ # --- 1) Data Loading & Cleaning ---
11
+ def load_data(uploaded_file):
12
+ # Read & skip top‑3 metadata rows; drop the extra header row
13
+ df_raw = pd.read_excel(uploaded_file, sheet_name=0, skiprows=3)
14
+ df = df_raw.iloc[1:].reset_index(drop=True)
15
+
16
+ # Rename columns
17
+ df.columns = [
18
+ 'S_No', 'District', 'Institution',
19
+ 'V_Minority_S', 'V_Minority_A',
20
+ 'V_NonMinority_S', 'V_NonMinority_A',
21
+ 'Course',
22
+ 'Inter1_Minority_S', 'Inter1_Minority_A',
23
+ 'Inter1_NonMinority_S', 'Inter1_NonMinority_A'
24
+ ]
25
+
26
+ # Drop helper serial column
27
+ df = df.drop(columns=['S_No'])
28
+
29
+ # Force numeric columns
30
+ num_cols = [
31
+ 'V_Minority_S','V_Minority_A',
32
+ 'V_NonMinority_S','V_NonMinority_A',
33
+ 'Inter1_Minority_S','Inter1_Minority_A',
34
+ 'Inter1_NonMinority_S','Inter1_NonMinority_A'
35
+ ]
36
+ df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')
37
+
38
+ # Coerce all other columns to plain Python strings
39
+ for c in df.columns:
40
+ if c not in num_cols:
41
+ df[c] = df[c].fillna("").astype(str)
42
+
43
+ return df
44
+
45
+ # --- 2) Streamlit App ---
46
+ def main():
47
+ st.set_page_config(page_title="TMREIS Admissions Dashboard", layout="wide")
48
+ st.title("📊 TMREIS Admissions & Vacancy Dashboard")
49
+ st.markdown(
50
+ "Upload a monthly admissions report, filter by district/course, and explore "
51
+ "KPIs, interactive tables, and rich visualizations."
52
+ )
53
+
54
+ # Sidebar: upload
55
+ uploaded = pd.read_excel('/.Dataset.xlsx')
56
+ if not uploaded:
57
+ st.sidebar.info("Awaiting your Excel file…")
58
+ return
59
+
60
+ # Load data
61
+ df = load_data(uploaded)
62
+
63
+ # --- Sidebar Filters ---
64
+ districts = sorted(df['District'].unique().tolist())
65
+ selected_districts = st.sidebar.multiselect("Filter: District(s)", districts, default=districts)
66
+
67
+ courses = sorted(df['Course'].unique().tolist())
68
+ selected_course = st.sidebar.selectbox("Filter: Course", ["All"] + courses)
69
+
70
+ level = st.sidebar.radio("Select Level", ["Class V", "Inter 1"])
71
+ metric = st.sidebar.radio("Metric", ["Admission", "Vacancies"])
72
+ breakdown = st.sidebar.multiselect("Breakdown by", ["Minority", "Non-Minority"], default=["Minority", "Non-Minority"])
73
+
74
+ # Apply filters
75
+ df_f = df[df['District'].isin(selected_districts)].copy()
76
+ if selected_course != "All":
77
+ df_f = df_f[df_f['Course'] == selected_course]
78
+
79
+ # Determine columns for admissions vs sanctioned
80
+ if level == "Class V":
81
+ adm_cols = {"Minority": "V_Minority_A", "Non-Minority": "V_NonMinority_A"}
82
+ sanc_cols = {"Minority": "V_Minority_S", "Non-Minority": "V_NonMinority_S"}
83
+ else:
84
+ adm_cols = {"Minority": "Inter1_Minority_A", "Non-Minority": "Inter1_NonMinority_A"}
85
+ sanc_cols = {"Minority": "Inter1_Minority_S", "Non-Minority": "Inter1_NonMinority_S"}
86
+
87
+ # --- KPIs ---
88
+ st.subheader("Key Performance Indicators")
89
+ kpi_cols = st.columns(len(breakdown))
90
+ for idx, grp in enumerate(breakdown):
91
+ total_san = int(df_f[sanc_cols[grp]].sum())
92
+ total_adm = int(df_f[adm_cols[grp]].sum())
93
+ vac = total_san - total_adm
94
+ kpi_cols[idx].metric(f"{grp} Sanctioned", total_san)
95
+ kpi_cols[idx].metric(f"{grp} Admitted", total_adm, f"{vac} Vacancies")
96
+
97
+ # --- Interactive Table via AgGrid ---
98
+ st.subheader("Detailed Institution‑Level Data")
99
+ gb = GridOptionsBuilder.from_dataframe(df_f)
100
+ gb.configure_default_column(filter=True, sortable=True, resizable=True)
101
+ gb.configure_pagination(paginationAutoPageSize=True)
102
+ AgGrid(df_f, gridOptions=gb.build(), enable_enterprise_modules=False)
103
+
104
+ # --- 1) Admissions / Vacancies by District ---
105
+ st.subheader(f"{metric} by District")
106
+ if metric == "Admission":
107
+ summary = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum().reset_index()
108
+ fig1 = px.bar(
109
+ summary,
110
+ x="District",
111
+ y=[adm_cols[g] for g in breakdown],
112
+ barmode="group",
113
+ labels={"value":"Count","variable":"Category"},
114
+ title=f"{level} Admissions by District"
115
+ )
116
+ else:
117
+ sum_s = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum()
118
+ sum_a = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum()
119
+ vac_df = (sum_s - sum_a).reset_index()
120
+ vac_df.columns = ["District"] + breakdown
121
+ fig1 = px.bar(
122
+ vac_df,
123
+ x="District",
124
+ y=breakdown,
125
+ barmode="group",
126
+ labels={"value":"Vacancies","variable":"Category"},
127
+ title=f"{level} Vacancies by District"
128
+ )
129
+ st.plotly_chart(fig1, use_container_width=True)
130
+
131
+ # --- 2) Vacancy Rate Heatmap ---
132
+ st.subheader("Vacancy Rate Heatmap")
133
+ sum_san = df_f.groupby("District")[[sanc_cols[g] for g in breakdown]].sum()
134
+ sum_adm = df_f.groupby("District")[[adm_cols[g] for g in breakdown]].sum()
135
+ vr_df = pd.DataFrame(index=sum_san.index)
136
+ for grp in breakdown:
137
+ vr_df[grp] = (sum_san[sanc_cols[grp]] - sum_adm[adm_cols[grp]]) / sum_san[sanc_cols[grp]].replace({0: np.nan})
138
+ fig_hm = px.imshow(
139
+ vr_df,
140
+ labels={"x":"Category","y":"District","color":"Vacancy Rate"},
141
+ text_auto=".0%",
142
+ aspect="auto",
143
+ color_continuous_scale="Reds",
144
+ title=f"{level} Vacancy Rate by District"
145
+ )
146
+ st.plotly_chart(fig_hm, use_container_width=True)
147
+
148
+ # --- 3) Overall Admitted vs Vacant Donut ---
149
+ st.subheader(f"{level} Seat Distribution")
150
+ total_san = df_f[[sanc_cols[grp] for grp in breakdown]].sum().sum()
151
+ total_adm = df_f[[adm_cols[grp] for grp in breakdown]].sum().sum()
152
+ pie_df = pd.DataFrame({
153
+ "Status": ["Admitted", "Vacant"],
154
+ "Count": [total_adm, total_san - total_adm]
155
+ })
156
+ fig_pie = px.pie(
157
+ pie_df,
158
+ names="Status",
159
+ values="Count",
160
+ hole=0.4,
161
+ title=f"{level}: Admitted vs Vacant"
162
+ )
163
+ st.plotly_chart(fig_pie, use_container_width=True)
164
+
165
+ # --- 4) Top 10 Institutions by Vacancies (H‑Bar) ---
166
+ st.subheader("Top 10 Institutions by Vacancies")
167
+ df_f["Vacancies"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1) \
168
+ - df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1)
169
+ top10 = df_f.nlargest(10, "Vacancies")[["Institution","Vacancies"]]
170
+ top10["Institution"] = top10["Institution"].astype(str)
171
+ fig_hbar = px.bar(
172
+ top10.sort_values("Vacancies"),
173
+ x="Vacancies",
174
+ y="Institution",
175
+ orientation="h",
176
+ labels={"Vacancies":"Vacant Seats","Institution":""},
177
+ title="Top 10 Institutions by Vacancies"
178
+ )
179
+ st.plotly_chart(fig_hbar, use_container_width=True)
180
+
181
+ # --- 5) Admission Efficiency Scatter (Bubble) ---
182
+ st.subheader("Sanctioned vs Admitted (Bubble = Vacancy Rate)")
183
+
184
+ # compute totals
185
+ df_f["Total_Sanctioned"] = df_f[[sanc_cols[grp] for grp in breakdown]].sum(axis=1)
186
+ df_f["Total_Admitted"] = df_f[[adm_cols[grp] for grp in breakdown]].sum(axis=1)
187
+
188
+ # vacancy rate, clipped at 0 so Plotly can use it as a marker size
189
+ df_f["Vacancy_Rate"] = (
190
+ (df_f["Total_Sanctioned"] - df_f["Total_Admitted"])
191
+ / df_f["Total_Sanctioned"].replace({0: np.nan})
192
+ ).clip(lower=0)
193
+
194
+ fig_sc = px.scatter(
195
+ df_f,
196
+ x="Total_Sanctioned",
197
+ y="Total_Admitted",
198
+ size="Vacancy_Rate", # now guaranteed ≥0
199
+ color="District",
200
+ hover_data=["Institution"],
201
+ labels={
202
+ "Total_Sanctioned": "Sanctioned Seats",
203
+ "Total_Admitted": "Admitted Seats",
204
+ "Vacancy_Rate": "Vacancy Rate"
205
+ },
206
+ title="Sanctioned vs Admitted (Bubble size = Vacancy Rate)"
207
+ )
208
+ st.plotly_chart(fig_sc, use_container_width=True)
209
+
210
+ # --- Optional LLM Q&A ---
211
+ if os.getenv("OPENAI_API_KEY"):
212
+ import openai
213
+ openai.api_key = os.getenv("OPENAI_API_KEY")
214
+ st.subheader("🤖 Ask the Dashboard (LLM Insight)")
215
+ q = st.text_input("Enter your question about this data:")
216
+ if q:
217
+ with st.spinner("Generating answer…"):
218
+ resp = openai.ChatCompletion.create(
219
+ model="gpt-3.5-turbo",
220
+ messages=[
221
+ {"role":"system","content":"You are a senior data analyst."},
222
+ {"role":"user",
223
+ "content":(
224
+ f"Data summary: {df_f.describe().to_dict()}\n"
225
+ f"Question: {q}"
226
+ )
227
+ }
228
+ ],
229
+ max_tokens=200
230
+ )
231
+ st.write(resp.choices[0].message.content)
232
+
233
+ if __name__ == "__main__":
234
+ main()