Spaces:
Sleeping
Sleeping
File size: 8,252 Bytes
b37e3fd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | import streamlit as st
import pandas as pd
import plotly.express as px
import openai
openai.api_key = "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"
# Page configuration for a wide dashboard layout
st.set_page_config(page_title="District Admissions Dashboard", page_icon=":bar_chart:", layout="wide")
st.markdown("<style> footer {visibility: hidden;} </style>", unsafe_allow_html=True) # Hide Streamlit footer for cleaner UI
@st.cache_data
def load_data():
# Load all sheets from the Excel into one DataFrame
xls = pd.ExcelFile("data_clean.xlsx")
all_data = []
for sheet in xls.sheet_names:
# Read each sheet, skip irrelevant top rows (different for Adilabad sheet)
df = pd.read_excel(xls, sheet_name=sheet, header=1)
if str(df.columns[0]).startswith("Erstwhile"):
df = pd.read_excel(xls, sheet_name=sheet, header=2)
df['ErstwhileDistrict'] = sheet.strip() # tag the source region
all_data.append(df)
df_all = pd.concat(all_data, ignore_index=True)
# Rename columns for convenience
df_all = df_all.rename(columns={
'S.No as per source': 'Serial',
'District': 'District',
'Name of the TMR Institution': 'Institution',
'V-Class\nSanction': 'V_Sanction',
'V-Class\nAdmitted': 'V_Admitted',
'V-Class\nVacant': 'V_Vacant',
'V-Class\nPercentage Vacant': 'V_VacancyPercent',
'Course - \nI year': 'I_Course',
'I year\nSanction': 'I_Sanction',
'I year\nAdmitted': 'I_Admitted',
'I year\nVacant': 'I_Vacant',
'I year\nPercentage': 'I_VacancyPercent',
'ErstwhileDistrict': 'Region'
})
# Strip whitespace from string columns
df_all['District'] = df_all['District'].astype(str).str.strip()
df_all['Institution'] = df_all['Institution'].astype(str).str.strip()
return df_all
# Load data (cached for efficiency)
df_all = load_data()
# Sidebar filter - select district region
st.sidebar.header("Select District")
regions = sorted(df_all['Region'].unique())
selected_region = st.sidebar.selectbox("Erstwhile District", options=regions)
# Filter the data for the selected region
df_region = df_all[df_all['Region'] == selected_region]
# Compute KPI metrics for selected region
num_institutions = len(df_region)
V_san = int(df_region['V_Sanction'].sum())
V_adm = int(df_region['V_Admitted'].sum())
V_vac = int(df_region['V_Vacant'].sum())
I_san = int(df_region['I_Sanction'].sum())
I_adm = int(df_region['I_Admitted'].sum())
I_vac = int(df_region['I_Vacant'].sum())
V_fill_rate = (V_adm / V_san) if V_san else 0
I_fill_rate = (I_adm / I_san) if I_san else 0
V_vac_rate = 1 - V_fill_rate
I_vac_rate = 1 - I_fill_rate
# Title and overview
st.title("Minority Institutions Admissions Dashboard")
st.subheader(f"{selected_region} β Summary")
st.markdown(f"**Total Institutions:** {num_institutions} ")
st.markdown(f"**Class V:** {V_adm} students admitted out of {V_san} seats (π΄ *{V_vac} vacant*, π {V_fill_rate:.0%} fill rate) ")
st.markdown(f"**Intermediate I Year:** {I_adm} students admitted out of {I_san} seats (π΄ *{I_vac} vacant*, π {I_fill_rate:.0%} fill rate) ")
# KPI metric cards
kpi1, kpi2, kpi3, kpi4, kpi5 = st.columns(5)
kpi1.metric("Institutions", num_institutions)
kpi2.metric("Class V Admitted", V_adm)
kpi3.metric("Class V Vacant %", f"{V_vac_rate*100:.1f}%")
kpi4.metric("I Year Admitted", I_adm)
kpi5.metric("I Year Vacant %", f"{I_vac_rate*100:.1f}%")
# Pie charts for overall fill vs vacant
fig_v = px.pie(values=[V_adm, V_vac], names=["Filled", "Vacant"], title="Class V Seats Filled vs Vacant",
hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})
fig_i = px.pie(values=[I_adm, I_vac], names=["Filled", "Vacant"], title="Intermediate I-Year Filled vs Vacant",
hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})
col1, col2 = st.columns(2)
col1.plotly_chart(fig_v, use_container_width=True)
col2.plotly_chart(fig_i, use_container_width=True)
# Bar charts for per-sub-district breakdown
sub_df = df_region.groupby('District').agg({"V_Admitted":"sum", "V_Vacant":"sum", "I_Admitted":"sum", "I_Vacant":"sum"}).reset_index()
# Class V bar
sub_v = sub_df.rename(columns={"V_Admitted": "Admitted", "V_Vacant": "Vacant"})
fig_bar_v = px.bar(sub_v, x="District", y=["Admitted","Vacant"], title="Class V β Admitted vs Vacant by District",
barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])
# I-Year bar
sub_i = sub_df.rename(columns={"I_Admitted": "Admitted", "I_Vacant": "Vacant"})
fig_bar_i = px.bar(sub_i, x="District", y=["Admitted","Vacant"], title="Intermediate I-Year β Admitted vs Vacant by District",
barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])
st.plotly_chart(fig_bar_v, use_container_width=True)
st.plotly_chart(fig_bar_i, use_container_width=True)
# ----------------------------
# Institution-level Vacancy Bars
# ----------------------------
st.subheader("Vacant Seats by Institution")
# Filter institutions with any Class V vacancies
df_vac_v = df_region[df_region['V_Vacant'] > 0][['Institution', 'V_Vacant']].sort_values('V_Vacant', ascending=True)
fig_inst_v = px.bar(df_vac_v,
x='V_Vacant',
y='Institution',
orientation='h',
title="Class V β Vacant Seats by Institution",
labels={'V_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
color_discrete_sequence=["#d62728"])
st.plotly_chart(fig_inst_v, use_container_width=True)
# Filter institutions with any I-Year vacancies
df_vac_i = df_region[df_region['I_Vacant'] > 0][['Institution', 'I_Vacant']].sort_values('I_Vacant', ascending=True)
fig_inst_i = px.bar(df_vac_i,
x='I_Vacant',
y='Institution',
orientation='h',
title="Intermediate I-Year β Vacant Seats by Institution",
labels={'I_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
color_discrete_sequence=["#d62728"])
st.plotly_chart(fig_inst_i, use_container_width=True)
# ----------------------------
# Ask AI Assistant
# ----------------------------
st.markdown("---")
st.subheader("π§ Ask AI About This Data")
st.markdown("Ask in plain English β for example: *βWhich institutions in this district have the most vacancies?β* or *βWhatβs the fill rate in Class V?β*")
# Suggested questions
suggested = [
"Which institution has the highest vacant seats in this district?",
"How many students are admitted in Class V?",
"What is the fill rate for Intermediate I-Year?",
"Which sub-district is performing the best?",
]
selected_q = st.selectbox("Suggested Questions", options=[""] + suggested)
user_question = st.text_input("Ask your question:", value=selected_q if selected_q else "")
if st.button("Get Answer"):
if not user_question.strip():
st.warning("Please enter a question.")
else:
with st.spinner("Thinking..."):
# Convert current df_region to string for LLM context
context = df_region.to_csv(index=False)
prompt = f"""You are a helpful data analyst. Based on this district-level dataset below, answer the following question clearly and concisely.
Dataset (CSV):
{context}
User question: {user_question}
Answer:"""
try:
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful and analytical assistant."},
{"role": "user", "content": prompt}
],
temperature=0.4,
max_tokens=500
)
ai_answer = response['choices'][0]['message']['content']
st.success("AI Answer:")
st.markdown(ai_answer)
except Exception as e:
st.error(f"Error: {e}")
|