Spaces:

DataWizard9742
/

MinorityData

Sleeping

App Files Files Community

DataWizard9742 commited on Jul 14, 2025

Commit

b37e3fd

verified ·

1 Parent(s): 52a9fef

Create app.py

Browse files

Files changed (1) hide show

app.py +186 -0

app.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import openai
+openai.api_key = "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"
+# Page configuration for a wide dashboard layout
+st.set_page_config(page_title="District Admissions Dashboard", page_icon=":bar_chart:", layout="wide")
+st.markdown("<style> footer {visibility: hidden;} </style>", unsafe_allow_html=True)  # Hide Streamlit footer for cleaner UI
+@st.cache_data
+def load_data():
+    # Load all sheets from the Excel into one DataFrame
+    xls = pd.ExcelFile("data_clean.xlsx")
+    all_data = []
+    for sheet in xls.sheet_names:
+        # Read each sheet, skip irrelevant top rows (different for Adilabad sheet)
+        df = pd.read_excel(xls, sheet_name=sheet, header=1)
+        if str(df.columns[0]).startswith("Erstwhile"):
+            df = pd.read_excel(xls, sheet_name=sheet, header=2)
+        df['ErstwhileDistrict'] = sheet.strip()  # tag the source region
+        all_data.append(df)
+    df_all = pd.concat(all_data, ignore_index=True)
+    # Rename columns for convenience
+    df_all = df_all.rename(columns={
+        'S.No as per source': 'Serial',
+        'District': 'District',
+        'Name of the TMR Institution': 'Institution',
+        'V-Class\nSanction': 'V_Sanction',
+        'V-Class\nAdmitted': 'V_Admitted',
+        'V-Class\nVacant': 'V_Vacant',
+        'V-Class\nPercentage Vacant': 'V_VacancyPercent',
+        'Course - \nI year': 'I_Course',
+        'I year\nSanction': 'I_Sanction',
+        'I year\nAdmitted': 'I_Admitted',
+        'I year\nVacant': 'I_Vacant',
+        'I year\nPercentage': 'I_VacancyPercent',
+        'ErstwhileDistrict': 'Region'
+    })
+    # Strip whitespace from string columns
+    df_all['District'] = df_all['District'].astype(str).str.strip()
+    df_all['Institution'] = df_all['Institution'].astype(str).str.strip()
+    return df_all
+# Load data (cached for efficiency)
+df_all = load_data()
+# Sidebar filter - select district region
+st.sidebar.header("Select District")
+regions = sorted(df_all['Region'].unique())
+selected_region = st.sidebar.selectbox("Erstwhile District", options=regions)
+# Filter the data for the selected region
+df_region = df_all[df_all['Region'] == selected_region]
+# Compute KPI metrics for selected region
+num_institutions = len(df_region)
+V_san = int(df_region['V_Sanction'].sum())
+V_adm = int(df_region['V_Admitted'].sum())
+V_vac = int(df_region['V_Vacant'].sum())
+I_san = int(df_region['I_Sanction'].sum())
+I_adm = int(df_region['I_Admitted'].sum())
+I_vac = int(df_region['I_Vacant'].sum())
+V_fill_rate = (V_adm / V_san) if V_san else 0
+I_fill_rate = (I_adm / I_san) if I_san else 0
+V_vac_rate = 1 - V_fill_rate
+I_vac_rate = 1 - I_fill_rate
+# Title and overview
+st.title("Minority Institutions Admissions Dashboard")
+st.subheader(f"{selected_region} – Summary")
+st.markdown(f"**Total Institutions:** {num_institutions}  ")
+st.markdown(f"**Class V:** {V_adm} students admitted out of {V_san} seats (🔴 *{V_vac} vacant*, 📈 {V_fill_rate:.0%} fill rate)  ")
+st.markdown(f"**Intermediate I Year:** {I_adm} students admitted out of {I_san} seats (🔴 *{I_vac} vacant*, 📈 {I_fill_rate:.0%} fill rate)  ")
+# KPI metric cards
+kpi1, kpi2, kpi3, kpi4, kpi5 = st.columns(5)
+kpi1.metric("Institutions", num_institutions)
+kpi2.metric("Class V Admitted", V_adm)
+kpi3.metric("Class V Vacant %", f"{V_vac_rate*100:.1f}%")
+kpi4.metric("I Year Admitted", I_adm)
+kpi5.metric("I Year Vacant %", f"{I_vac_rate*100:.1f}%")
+# Pie charts for overall fill vs vacant
+fig_v = px.pie(values=[V_adm, V_vac], names=["Filled", "Vacant"], title="Class V Seats Filled vs Vacant",
+               hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})
+fig_i = px.pie(values=[I_adm, I_vac], names=["Filled", "Vacant"], title="Intermediate I-Year Filled vs Vacant",
+               hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})
+col1, col2 = st.columns(2)
+col1.plotly_chart(fig_v, use_container_width=True)
+col2.plotly_chart(fig_i, use_container_width=True)
+# Bar charts for per-sub-district breakdown
+sub_df = df_region.groupby('District').agg({"V_Admitted":"sum", "V_Vacant":"sum", "I_Admitted":"sum", "I_Vacant":"sum"}).reset_index()
+# Class V bar
+sub_v = sub_df.rename(columns={"V_Admitted": "Admitted", "V_Vacant": "Vacant"})
+fig_bar_v = px.bar(sub_v, x="District", y=["Admitted","Vacant"], title="Class V – Admitted vs Vacant by District",
+                   barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])
+# I-Year bar
+sub_i = sub_df.rename(columns={"I_Admitted": "Admitted", "I_Vacant": "Vacant"})
+fig_bar_i = px.bar(sub_i, x="District", y=["Admitted","Vacant"], title="Intermediate I-Year – Admitted vs Vacant by District",
+                   barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])
+st.plotly_chart(fig_bar_v, use_container_width=True)
+st.plotly_chart(fig_bar_i, use_container_width=True)
+# ----------------------------
+# Institution-level Vacancy Bars
+# ----------------------------
+st.subheader("Vacant Seats by Institution")
+# Filter institutions with any Class V vacancies
+df_vac_v = df_region[df_region['V_Vacant'] > 0][['Institution', 'V_Vacant']].sort_values('V_Vacant', ascending=True)
+fig_inst_v = px.bar(df_vac_v,
+                    x='V_Vacant',
+                    y='Institution',
+                    orientation='h',
+                    title="Class V – Vacant Seats by Institution",
+                    labels={'V_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
+                    color_discrete_sequence=["#d62728"])
+st.plotly_chart(fig_inst_v, use_container_width=True)
+# Filter institutions with any I-Year vacancies
+df_vac_i = df_region[df_region['I_Vacant'] > 0][['Institution', 'I_Vacant']].sort_values('I_Vacant', ascending=True)
+fig_inst_i = px.bar(df_vac_i,
+                    x='I_Vacant',
+                    y='Institution',
+                    orientation='h',
+                    title="Intermediate I-Year – Vacant Seats by Institution",
+                    labels={'I_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
+                    color_discrete_sequence=["#d62728"])
+st.plotly_chart(fig_inst_i, use_container_width=True)
+# ----------------------------
+# Ask AI Assistant
+# ----------------------------
+st.markdown("---")
+st.subheader("🧠 Ask AI About This Data")
+st.markdown("Ask in plain English – for example: *“Which institutions in this district have the most vacancies?”* or *“What’s the fill rate in Class V?”*")
+# Suggested questions
+suggested = [
+    "Which institution has the highest vacant seats in this district?",
+    "How many students are admitted in Class V?",
+    "What is the fill rate for Intermediate I-Year?",
+    "Which sub-district is performing the best?",
+]
+selected_q = st.selectbox("Suggested Questions", options=[""] + suggested)
+user_question = st.text_input("Ask your question:", value=selected_q if selected_q else "")
+if st.button("Get Answer"):
+    if not user_question.strip():
+        st.warning("Please enter a question.")
+    else:
+        with st.spinner("Thinking..."):
+            # Convert current df_region to string for LLM context
+            context = df_region.to_csv(index=False)
+            prompt = f"""You are a helpful data analyst. Based on this district-level dataset below, answer the following question clearly and concisely.
+Dataset (CSV):
+{context}
+User question: {user_question}
+Answer:"""
+            try:
+                response = openai.ChatCompletion.create(
+                    model="gpt-4",
+                    messages=[
+                        {"role": "system", "content": "You are a helpful and analytical assistant."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    temperature=0.4,
+                    max_tokens=500
+                )
+                ai_answer = response['choices'][0]['message']['content']
+                st.success("AI Answer:")
+                st.markdown(ai_answer)
+            except Exception as e:
+                st.error(f"Error: {e}")