File size: 8,252 Bytes
b37e3fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import streamlit as st
import pandas as pd
import plotly.express as px
import openai

openai.api_key = "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"


# Page configuration for a wide dashboard layout
st.set_page_config(page_title="District Admissions Dashboard", page_icon=":bar_chart:", layout="wide")
st.markdown("<style> footer {visibility: hidden;} </style>", unsafe_allow_html=True)  # Hide Streamlit footer for cleaner UI

@st.cache_data
def load_data():
    # Load all sheets from the Excel into one DataFrame
    xls = pd.ExcelFile("data_clean.xlsx")
    all_data = []
    for sheet in xls.sheet_names:
        # Read each sheet, skip irrelevant top rows (different for Adilabad sheet)
        df = pd.read_excel(xls, sheet_name=sheet, header=1)
        if str(df.columns[0]).startswith("Erstwhile"):
            df = pd.read_excel(xls, sheet_name=sheet, header=2)
        df['ErstwhileDistrict'] = sheet.strip()  # tag the source region
        all_data.append(df)
    df_all = pd.concat(all_data, ignore_index=True)
    # Rename columns for convenience
    df_all = df_all.rename(columns={
        'S.No as per source': 'Serial',
        'District': 'District',
        'Name of the TMR Institution': 'Institution',
        'V-Class\nSanction': 'V_Sanction',
        'V-Class\nAdmitted': 'V_Admitted',
        'V-Class\nVacant': 'V_Vacant',
        'V-Class\nPercentage Vacant': 'V_VacancyPercent',
        'Course - \nI year': 'I_Course',
        'I year\nSanction': 'I_Sanction',
        'I year\nAdmitted': 'I_Admitted',
        'I year\nVacant': 'I_Vacant',
        'I year\nPercentage': 'I_VacancyPercent',
        'ErstwhileDistrict': 'Region'
    })
    # Strip whitespace from string columns
    df_all['District'] = df_all['District'].astype(str).str.strip()
    df_all['Institution'] = df_all['Institution'].astype(str).str.strip()
    return df_all

# Load data (cached for efficiency)
df_all = load_data()

# Sidebar filter - select district region
st.sidebar.header("Select District")
regions = sorted(df_all['Region'].unique())
selected_region = st.sidebar.selectbox("Erstwhile District", options=regions)

# Filter the data for the selected region
df_region = df_all[df_all['Region'] == selected_region]

# Compute KPI metrics for selected region
num_institutions = len(df_region)
V_san = int(df_region['V_Sanction'].sum())
V_adm = int(df_region['V_Admitted'].sum())
V_vac = int(df_region['V_Vacant'].sum())
I_san = int(df_region['I_Sanction'].sum())
I_adm = int(df_region['I_Admitted'].sum())
I_vac = int(df_region['I_Vacant'].sum())
V_fill_rate = (V_adm / V_san) if V_san else 0
I_fill_rate = (I_adm / I_san) if I_san else 0
V_vac_rate = 1 - V_fill_rate
I_vac_rate = 1 - I_fill_rate

# Title and overview
st.title("Minority Institutions Admissions Dashboard")
st.subheader(f"{selected_region} – Summary")
st.markdown(f"**Total Institutions:** {num_institutions}  ")
st.markdown(f"**Class V:** {V_adm} students admitted out of {V_san} seats (πŸ”΄ *{V_vac} vacant*, πŸ“ˆ {V_fill_rate:.0%} fill rate)  ")
st.markdown(f"**Intermediate I Year:** {I_adm} students admitted out of {I_san} seats (πŸ”΄ *{I_vac} vacant*, πŸ“ˆ {I_fill_rate:.0%} fill rate)  ")

# KPI metric cards
kpi1, kpi2, kpi3, kpi4, kpi5 = st.columns(5)
kpi1.metric("Institutions", num_institutions)
kpi2.metric("Class V Admitted", V_adm)
kpi3.metric("Class V Vacant %", f"{V_vac_rate*100:.1f}%")
kpi4.metric("I Year Admitted", I_adm)
kpi5.metric("I Year Vacant %", f"{I_vac_rate*100:.1f}%")

# Pie charts for overall fill vs vacant
fig_v = px.pie(values=[V_adm, V_vac], names=["Filled", "Vacant"], title="Class V Seats Filled vs Vacant",
               hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})
fig_i = px.pie(values=[I_adm, I_vac], names=["Filled", "Vacant"], title="Intermediate I-Year Filled vs Vacant",
               hole=0.4, color_discrete_map={"Filled": "#2ca02c", "Vacant": "#d62728"})

col1, col2 = st.columns(2)
col1.plotly_chart(fig_v, use_container_width=True)
col2.plotly_chart(fig_i, use_container_width=True)

# Bar charts for per-sub-district breakdown
sub_df = df_region.groupby('District').agg({"V_Admitted":"sum", "V_Vacant":"sum", "I_Admitted":"sum", "I_Vacant":"sum"}).reset_index()
# Class V bar
sub_v = sub_df.rename(columns={"V_Admitted": "Admitted", "V_Vacant": "Vacant"})
fig_bar_v = px.bar(sub_v, x="District", y=["Admitted","Vacant"], title="Class V – Admitted vs Vacant by District",
                   barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])
# I-Year bar
sub_i = sub_df.rename(columns={"I_Admitted": "Admitted", "I_Vacant": "Vacant"})
fig_bar_i = px.bar(sub_i, x="District", y=["Admitted","Vacant"], title="Intermediate I-Year – Admitted vs Vacant by District",
                   barmode="stack", color_discrete_sequence=["#2ca02c", "#d62728"])

st.plotly_chart(fig_bar_v, use_container_width=True)
st.plotly_chart(fig_bar_i, use_container_width=True)

# ----------------------------
# Institution-level Vacancy Bars
# ----------------------------

st.subheader("Vacant Seats by Institution")

# Filter institutions with any Class V vacancies
df_vac_v = df_region[df_region['V_Vacant'] > 0][['Institution', 'V_Vacant']].sort_values('V_Vacant', ascending=True)
fig_inst_v = px.bar(df_vac_v,
                    x='V_Vacant',
                    y='Institution',
                    orientation='h',
                    title="Class V – Vacant Seats by Institution",
                    labels={'V_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
                    color_discrete_sequence=["#d62728"])
st.plotly_chart(fig_inst_v, use_container_width=True)

# Filter institutions with any I-Year vacancies
df_vac_i = df_region[df_region['I_Vacant'] > 0][['Institution', 'I_Vacant']].sort_values('I_Vacant', ascending=True)
fig_inst_i = px.bar(df_vac_i,
                    x='I_Vacant',
                    y='Institution',
                    orientation='h',
                    title="Intermediate I-Year – Vacant Seats by Institution",
                    labels={'I_Vacant': 'Vacant Seats', 'Institution': 'TMR Institution'},
                    color_discrete_sequence=["#d62728"])
st.plotly_chart(fig_inst_i, use_container_width=True)

# ----------------------------
# Ask AI Assistant
# ----------------------------
st.markdown("---")
st.subheader("🧠 Ask AI About This Data")

st.markdown("Ask in plain English – for example: *β€œWhich institutions in this district have the most vacancies?”* or *β€œWhat’s the fill rate in Class V?”*")

# Suggested questions
suggested = [
    "Which institution has the highest vacant seats in this district?",
    "How many students are admitted in Class V?",
    "What is the fill rate for Intermediate I-Year?",
    "Which sub-district is performing the best?",
]
selected_q = st.selectbox("Suggested Questions", options=[""] + suggested)

user_question = st.text_input("Ask your question:", value=selected_q if selected_q else "")

if st.button("Get Answer"):
    if not user_question.strip():
        st.warning("Please enter a question.")
    else:
        with st.spinner("Thinking..."):
            # Convert current df_region to string for LLM context
            context = df_region.to_csv(index=False)

            prompt = f"""You are a helpful data analyst. Based on this district-level dataset below, answer the following question clearly and concisely.
Dataset (CSV):
{context}

User question: {user_question}
Answer:"""

            try:
                response = openai.ChatCompletion.create(
                    model="gpt-4",
                    messages=[
                        {"role": "system", "content": "You are a helpful and analytical assistant."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.4,
                    max_tokens=500
                )
                ai_answer = response['choices'][0]['message']['content']
                st.success("AI Answer:")
                st.markdown(ai_answer)
            except Exception as e:
                st.error(f"Error: {e}")