DataWizard9742 commited on
Commit
b37e3fd
Β·
verified Β·
1 Parent(s): 52a9fef

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -0
app.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import openai
5
+
6
+ openai.api_key = "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"
7
+
8
+
9
+ # Page configuration for a wide dashboard layout
10
+ st.set_page_config(page_title="District Admissions Dashboard", page_icon=":bar_chart:", layout="wide")
11
+ st.markdown("<style> footer {visibility: hidden;} </style>", unsafe_allow_html=True) # Hide Streamlit footer for cleaner UI
12
+
13
+ @st.cache_data
14
+ def load_data():
15
+ # Load all sheets from the Excel into one DataFrame
16
+ xls = pd.ExcelFile("data_clean.xlsx")
17
+ all_data = []
18
+ for sheet in xls.sheet_names:
19
+ # Read each sheet, skip irrelevant top rows (different for Adilabad sheet)
20
+ df = pd.read_excel(xls, sheet_name=sheet, header=1)
21
+ if str(df.columns[0]).startswith("Erstwhile"):
22
+ df = pd.read_excel(xls, sheet_name=sheet, header=2)
23
+ df['ErstwhileDistrict'] = sheet.strip() # tag the source region
24
+ all_data.append(df)
25
+ df_all = pd.concat(all_data, ignore_index=True)
26
+ # Rename columns for convenience
27
+ df_all = df_all.rename(columns={
28
+ 'S.No as per source': 'Serial',
29
+ 'District': 'District',
30
+ 'Name of the TMR Institution': 'Institution',
31
+ 'V-Class\nSanction': 'V_Sanction',
32
+ 'V-Class\nAdmitted': 'V_Admitted',
33
+ 'V-Class\nVacant': 'V_Vacant',
34
+ 'V-Class\nPercentage Vacant': 'V_VacancyPercent',
35
+ 'Course - \nI year': 'I_Course',
36
+ 'I year\nSanction': 'I_Sanction',
37
+ 'I year\nAdmitted': 'I_Admitted',
38
+ 'I year\nVacant': 'I_Vacant',
39
+ 'I year\nPercentage': 'I_VacancyPercent',
40
+ 'ErstwhileDistrict': 'Region'
41
+ })
42
+ # Strip whitespace from string columns
43
+ df_all['District'] = df_all['District'].astype(str).str.strip()
44
+ df_all['Institution'] = df_all['Institution'].astype(str).str.strip()
45
+ return df_all
46
+
47
+ # Load data (cached for efficiency)
48
+ df_all = load_data()
49
+
50
+ # Sidebar filter - select district region
51
+ st.sidebar.header("Select District")
52
+ regions = sorted(df_all['Region'].unique())
53
+ selected_region = st.sidebar.selectbox("Erstwhile District", options=regions)
54
+
55
+ # Filter the data for the selected region
56
+ df_region = df_all[df_all['Region'] == selected_region]
57
+
58
+ # Compute KPI metrics for selected region
59
+ num_institutions = len(df_region)
60
+ V_san = int(df_region['V_Sanction'].sum())
61
+ V_adm = int(df_region['V_Admitted'].sum())
62
+ V_vac = int(df_region['V_Vacant'].sum())
63
+ I_san = int(df_region['I_Sanction'].sum())
64
+ I_adm = int(df_region['I_Admitted'].sum())
65
+ I_vac = int(df_region['I_Vacant'].sum())
66
+ V_fill_rate = (V_adm / V_san) if V_san else 0
67
+ I_fill_rate = (I_adm / I_san) if I_san else 0
68
+ V_vac_rate = 1 - V_fill_rate
69
+ I_vac_rate = 1 - I_fill_rate
70
+
71
+ # Title and overview
72
+ st.title("Minority Institutions Admissions Dashboard")
73
+ st.subheader(f"{selected_region} – Summary")
74
+ st.markdown(f"**Total Institutions:** {num_institutions} ")
75
+ st.markdown(f"**Class V:** {V_adm} students admitted out of {V_san} seats (πŸ”΄ *{V_vac} vacant*, πŸ“ˆ {V_fill_rate:.0%} fill rate) ")
76
+ st.markdown(f"**Intermediate I Year:** {I_adm} students admitted out of {I_san} seats (πŸ”΄ *{I_vac} vacant*, πŸ“ˆ {I_fill_rate:.0%} fill rate) ")
77
+
78
+ # KPI metric cards
79
+ kpi1, kpi2, kpi3, kpi4, kpi5 = st.columns(5)
80
+ kpi1.metric("Institutions", num_institutions)
81
+ kpi2.metric("Class V Admitted", V_adm)
82
+ kpi3.metric("Class V Vacant %", f"{V_vac_rate*100:.1f}%")
83
+ kpi4.metric("I Year Admitted", I_adm)
84
+ kpi5.metric("I Year Vacant %", f"{I_vac_rate*100:.1f}%")
85
+
86
+ # Pie charts for overall fill vs vacant
87
+ fig_v = px.pie(values=[V_adm, V_vac], names=["Filled", "Vacant"], title="Class V Seats Filled vs Vacant",
88
+ hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})
89
+ fig_i = px.pie(values=[I_adm, I_vac], names=["Filled", "Vacant"], title="Intermediate I-Year Filled vs Vacant",
90
+ hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})
91
+
92
+ col1, col2 = st.columns(2)
93
+ col1.plotly_chart(fig_v, use_container_width=True)
94
+ col2.plotly_chart(fig_i, use_container_width=True)
95
+
96
+ # Bar charts for per-sub-district breakdown
97
+ sub_df = df_region.groupby('District').agg({"V_Admitted":"sum", "V_Vacant":"sum", "I_Admitted":"sum", "I_Vacant":"sum"}).reset_index()
98
+ # Class V bar
99
+ sub_v = sub_df.rename(columns={"V_Admitted": "Admitted", "V_Vacant": "Vacant"})
100
+ fig_bar_v = px.bar(sub_v, x="District", y=["Admitted","Vacant"], title="Class V – Admitted vs Vacant by District",
101
+ barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])
102
+ # I-Year bar
103
+ sub_i = sub_df.rename(columns={"I_Admitted": "Admitted", "I_Vacant": "Vacant"})
104
+ fig_bar_i = px.bar(sub_i, x="District", y=["Admitted","Vacant"], title="Intermediate I-Year – Admitted vs Vacant by District",
105
+ barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])
106
+
107
+ st.plotly_chart(fig_bar_v, use_container_width=True)
108
+ st.plotly_chart(fig_bar_i, use_container_width=True)
109
+
110
+ # ----------------------------
111
+ # Institution-level Vacancy Bars
112
+ # ----------------------------
113
+
114
+ st.subheader("Vacant Seats by Institution")
115
+
116
+ # Filter institutions with any Class V vacancies
117
+ df_vac_v = df_region[df_region['V_Vacant'] > 0][['Institution', 'V_Vacant']].sort_values('V_Vacant', ascending=True)
118
+ fig_inst_v = px.bar(df_vac_v,
119
+ x='V_Vacant',
120
+ y='Institution',
121
+ orientation='h',
122
+ title="Class V – Vacant Seats by Institution",
123
+ labels={'V_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
124
+ color_discrete_sequence=["#d62728"])
125
+ st.plotly_chart(fig_inst_v, use_container_width=True)
126
+
127
+ # Filter institutions with any I-Year vacancies
128
+ df_vac_i = df_region[df_region['I_Vacant'] > 0][['Institution', 'I_Vacant']].sort_values('I_Vacant', ascending=True)
129
+ fig_inst_i = px.bar(df_vac_i,
130
+ x='I_Vacant',
131
+ y='Institution',
132
+ orientation='h',
133
+ title="Intermediate I-Year – Vacant Seats by Institution",
134
+ labels={'I_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
135
+ color_discrete_sequence=["#d62728"])
136
+ st.plotly_chart(fig_inst_i, use_container_width=True)
137
+
138
+ # ----------------------------
139
+ # Ask AI Assistant
140
+ # ----------------------------
141
+ st.markdown("---")
142
+ st.subheader("🧠 Ask AI About This Data")
143
+
144
+ st.markdown("Ask in plain English – for example: *β€œWhich institutions in this district have the most vacancies?”* or *β€œWhat’s the fill rate in Class V?”*")
145
+
146
+ # Suggested questions
147
+ suggested = [
148
+ "Which institution has the highest vacant seats in this district?",
149
+ "How many students are admitted in Class V?",
150
+ "What is the fill rate for Intermediate I-Year?",
151
+ "Which sub-district is performing the best?",
152
+ ]
153
+ selected_q = st.selectbox("Suggested Questions", options=[""] + suggested)
154
+
155
+ user_question = st.text_input("Ask your question:", value=selected_q if selected_q else "")
156
+
157
+ if st.button("Get Answer"):
158
+ if not user_question.strip():
159
+ st.warning("Please enter a question.")
160
+ else:
161
+ with st.spinner("Thinking..."):
162
+ # Convert current df_region to string for LLM context
163
+ context = df_region.to_csv(index=False)
164
+
165
+ prompt = f"""You are a helpful data analyst. Based on this district-level dataset below, answer the following question clearly and concisely.
166
+ Dataset (CSV):
167
+ {context}
168
+
169
+ User question: {user_question}
170
+ Answer:"""
171
+
172
+ try:
173
+ response = openai.ChatCompletion.create(
174
+ model="gpt-4",
175
+ messages=[
176
+ {"role": "system", "content": "You are a helpful and analytical assistant."},
177
+ {"role": "user", "content": prompt}
178
+ ],
179
+ temperature=0.4,
180
+ max_tokens=500
181
+ )
182
+ ai_answer = response['choices'][0]['message']['content']
183
+ st.success("AI Answer:")
184
+ st.markdown(ai_answer)
185
+ except Exception as e:
186
+ st.error(f"Error: {e}")