Deevyankar commited on
Commit
bb85f6c
Β·
verified Β·
1 Parent(s): 9655019

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -105
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import io
2
- from datetime import datetime
3
-
4
  import numpy as np
5
  import pandas as pd
6
  import gradio as gr
@@ -23,11 +21,7 @@ def _read_file_bytes(file_obj):
23
  return b
24
 
25
 
26
- def _safe_numeric(series):
27
- return pd.to_numeric(series, errors="coerce")
28
-
29
-
30
- def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
31
  df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
32
  unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
33
  if unnamed:
@@ -35,26 +29,31 @@ def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
35
  return df
36
 
37
 
38
- def _guess_cols(df: pd.DataFrame):
39
- cols = list(df.columns)
40
- lower = {c: str(c).strip().lower() for c in cols}
41
 
42
- # marks guess = column with most numeric values
43
- best_marks, best_score = cols[0], -1
 
44
  for c in cols:
45
- s = _safe_numeric(df[c])
46
- score = s.notna().mean()
47
  if score > best_score:
48
  best_score = score
49
- best_marks = c
 
 
50
 
51
- grade_guess = next((c for c in cols if "grade" in lower[c] or "grde" in lower[c]), cols[0])
 
 
52
 
53
- # optional columns
54
- course_guess = next((c for c in cols if any(k in lower[c] for k in ["course", "module", "subject"])), None)
55
- section_guess = next((c for c in cols if any(k in lower[c] for k in ["section", "group", "batch", "class"])), None)
56
 
57
- return best_marks, grade_guess, course_guess, section_guess
 
 
 
 
58
 
59
 
60
  def apply_filters(df, course_col, section_col, course_filter, section_filter):
@@ -67,9 +66,9 @@ def apply_filters(df, course_col, section_col, course_filter, section_filter):
67
 
68
 
69
  # =============================
70
- # Analytics (NO student tables)
71
  # =============================
72
- def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
73
  if df is None or df.empty:
74
  raise gr.Error("Sheet is empty.")
75
 
@@ -83,30 +82,47 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
83
  n = int(len(valid))
84
  missing = int(d["_marks"].isna().sum())
85
 
 
86
  mean = float(valid["_marks"].mean()) if n else 0.0
87
  std = float(valid["_marks"].std(ddof=0)) if n else 0.0
88
  minv = float(valid["_marks"].min()) if n else 0.0
89
  maxv = float(valid["_marks"].max()) if n else 0.0
90
 
91
- pass_mark = int(pass_mark)
92
  pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
93
  pass_rate = (pass_count / n * 100.0) if n else 0.0
94
 
 
 
 
 
 
95
  skew = float(valid["_marks"].skew()) if n else 0.0
96
  kurt = float(valid["_marks"].kurt()) if n else 0.0
97
 
98
- # Percentiles
99
- pct_rows = []
 
100
  if n:
101
- for p in [10, 25, 50, 75, 90]:
102
- pct_rows.append((f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)))
103
- percentiles_df = pd.DataFrame(pct_rows, columns=["Percentile", "Marks"]) if pct_rows else pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
104
 
105
  # Grade distribution
106
  grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
107
  grade_dist.columns = [grade_col, "count"]
 
108
 
109
- # Grade to marks mapping
110
  grade_stats = (
111
  valid.groupby(d["_grade"])["_marks"]
112
  .agg(["count", "mean", "std", "min", "median", "max"])
@@ -116,25 +132,14 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
116
  )
117
 
118
  # Mark heaping (repeated marks)
119
- heaping_df = (
120
  valid["_marks"].round(0).astype(int)
121
  .value_counts().head(12)
122
  .rename("count").reset_index()
123
  .rename(columns={"index": "Mark"})
124
  )
125
 
126
- # Outlier count (IQR)
127
- outlier_count = 0
128
- low_thr = high_thr = 0.0
129
- if n:
130
- q1 = float(np.percentile(valid["_marks"], 25))
131
- q3 = float(np.percentile(valid["_marks"], 75))
132
- iqr = q3 - q1
133
- low_thr = q1 - 1.5 * iqr
134
- high_thr = q3 + 1.5 * iqr
135
- outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())
136
-
137
- # Status
138
  if pass_rate >= 80:
139
  status = "GREEN"
140
  elif pass_rate >= 60:
@@ -142,26 +147,34 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
142
  else:
143
  status = "RED"
144
 
145
- # Teacher flags
146
  flags = []
147
  if missing > 0:
148
- flags.append(f"{missing} missing mark(s) β†’ verify.")
 
 
 
 
149
  if abs(skew) > 0.7:
150
- flags.append("Skewed distribution β†’ performance not balanced.")
151
- if len(heaping_df) and heaping_df["count"].iloc[0] >= max(10, 0.06 * n):
152
- flags.append("Heaping β†’ many students share same mark (rounding/marking pattern).")
153
  if outlier_count > 0:
154
- flags.append(f"{outlier_count} outlier(s) by IQR β†’ check special cases.")
 
 
 
155
  flags_text = " | ".join(flags) if flags else "No major warning patterns detected."
156
 
157
- insight_text = (
158
- f"Status: {status} | Pass rate: {pass_rate:.1f}% (Pass mark {pass_mark}) | "
159
- f"Avg: {mean:.1f} (Std: {std:.1f}) | Min/Max: {minv:.1f}/{maxv:.1f} | "
160
- f"Skew: {skew:.2f} | Kurtosis: {kurt:.2f} | Outliers: {outlier_count} | Missing: {missing}\n"
161
- f"Flags: {flags_text}"
 
 
162
  )
163
 
164
- kpi_df = pd.DataFrame(
 
165
  [
166
  ("Total rows (filtered)", total),
167
  ("Students with numeric marks", n),
@@ -169,6 +182,8 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
169
  ("Pass mark", pass_mark),
170
  ("Pass count", pass_count),
171
  ("Pass rate (%)", round(pass_rate, 2)),
 
 
172
  ("Average", round(mean, 2)),
173
  ("Std deviation", round(std, 2)),
174
  ("Minimum", round(minv, 2)),
@@ -178,29 +193,32 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
178
  ("Outlier low threshold (IQR)", round(low_thr, 2)),
179
  ("Outlier high threshold (IQR)", round(high_thr, 2)),
180
  ("Outlier count (IQR)", outlier_count),
 
181
  ],
182
  columns=["Metric", "Value"],
183
  )
184
 
185
  # Charts
186
- # 1) Histogram
187
  fig1 = plt.figure()
188
  plt.hist(valid["_marks"].dropna(), bins=12)
 
189
  plt.title("Marks distribution (Histogram)")
190
  plt.xlabel("Marks")
191
  plt.ylabel("Students")
192
 
193
- # 2) CDF
194
  fig2 = plt.figure()
195
  xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
196
  ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
197
  if len(xs):
198
  plt.plot(xs, ys)
 
199
  plt.title("CDF (Proportion of students ≀ mark)")
200
  plt.xlabel("Marks")
201
  plt.ylabel("Proportion")
202
 
203
- # 3) Grade bar
204
  fig3 = plt.figure()
205
  gd = grade_dist.set_index(grade_col)["count"]
206
  plt.bar(gd.index.astype(str), gd.values)
@@ -209,12 +227,12 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
209
  plt.ylabel("Count")
210
  plt.xticks(rotation=45, ha="right")
211
 
212
- # 4) Boxplot by grade
213
  fig4 = plt.figure()
214
  if not grade_stats.empty:
215
- order = grade_stats[grade_stats.columns[0]].tolist()
216
  data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
217
- plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True) # βœ… tick_labels
218
  plt.title("Marks spread by Grade (Boxplot)")
219
  plt.xlabel("Grade")
220
  plt.ylabel("Marks")
@@ -222,17 +240,35 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
222
  else:
223
  plt.title("Marks spread by Grade (Boxplot)")
224
 
225
- return kpi_df, percentiles_df, grade_dist, grade_stats, heaping_df, insight_text, fig1, fig2, fig3, fig4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
 
228
  # =============================
229
  # UI
230
  # =============================
231
  with gr.Blocks(title="HoD Result Dashboard") as demo:
232
- gr.Markdown("## πŸ“Š HoD Result Dashboard β€” Teacher Insights Dashboard (No PDF, No Student Tables)")
233
 
234
- file_state = gr.State(None) # bytes
235
- sheet_state = gr.State(None) # sheet name string
236
 
237
  with gr.Row():
238
  upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
@@ -251,18 +287,19 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
251
  course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
252
  section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
253
 
254
- analyze_btn = gr.Button("πŸ” Refresh Dashboard")
255
 
256
  insight_md = gr.Markdown("")
257
 
258
  with gr.Tab("Tables"):
259
  with gr.Row():
260
  kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
261
- percentiles_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
262
  with gr.Row():
263
- grade_dist_table = gr.Dataframe(label="Grade distribution", interactive=False, wrap=True)
264
- heaping_table = gr.Dataframe(label="Mark heaping (top repeated marks)", interactive=False, wrap=True)
265
- grade_stats_table = gr.Dataframe(label="Grade β†’ Marks mapping (min/max/mean/median)", interactive=False, wrap=True)
 
266
 
267
  with gr.Tab("Charts"):
268
  with gr.Row():
@@ -271,61 +308,62 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
271
  with gr.Row():
272
  grade_plot = gr.Plot(label="Grade distribution")
273
  grade_box = gr.Plot(label="Boxplot by grade")
 
274
 
275
- # ---- callbacks
276
  def on_upload(file_obj):
277
- file_bytes = _read_file_bytes(file_obj)
278
- xls = pd.ExcelFile(io.BytesIO(file_bytes), engine="openpyxl")
279
  sheets = xls.sheet_names or []
280
  if not sheets:
281
- raise gr.Error("No sheets found.")
282
 
283
  sheet0 = sheets[0]
284
- df0 = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet0, engine="openpyxl")
285
- df0 = _drop_useless_cols(df0)
286
 
287
- m_guess, g_guess, c_guess, s_guess = _guess_cols(df0)
288
  cols = list(df0.columns)
 
 
 
289
 
290
- # optional filter dropdown setup
291
- course_col_update = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
292
- section_col_update = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))
293
 
294
- course_filter_update = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
295
- section_filter_update = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
296
 
297
  if c_guess and c_guess in df0.columns:
298
  vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
299
- course_filter_update = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
300
 
301
  if s_guess and s_guess in df0.columns:
302
  vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
303
- section_filter_update = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
304
 
305
  return (
306
  gr.update(choices=sheets, value=sheet0, interactive=True), # sheet_dd
307
  gr.update(choices=cols, value=m_guess, interactive=True), # marks_col
308
  gr.update(choices=cols, value=g_guess, interactive=True), # grade_col
309
- course_col_update,
310
- section_col_update,
311
- course_filter_update,
312
- section_filter_update,
313
- file_bytes, # file_state BYTES
314
- sheet0, # sheet_state STRING
315
  )
316
 
317
  upload.change(
318
  fn=on_upload,
319
  inputs=[upload],
320
- outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_state, sheet_state],
321
  )
322
 
323
  def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
324
  if not file_bytes:
325
  raise gr.Error("Upload Excel first.")
326
 
327
- df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
328
- df = _drop_useless_cols(df)
329
 
330
  cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
331
  sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
@@ -342,7 +380,7 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
342
 
343
  sheet_dd.change(
344
  fn=on_sheet_change,
345
- inputs=[sheet_dd, file_state, course_col, section_col],
346
  outputs=[course_filter, section_filter, sheet_state],
347
  )
348
 
@@ -352,27 +390,23 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
352
  if not sheet_name:
353
  raise gr.Error("Select a sheet.")
354
 
355
- df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
356
- df = _drop_useless_cols(df)
357
 
358
- kpi_df, pct_df, gdist_df, gstats_df, heap_df, insight_text, f1, f2, f3, f4 = compute_insights(
359
  df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
360
  )
361
 
362
  return (
363
- f"### Teacher Insight\n\n{insight_text}",
364
- kpi_df,
365
- pct_df,
366
- gdist_df,
367
- heap_df,
368
- gstats_df,
369
- f1, f2, f3, f4
370
  )
371
 
372
  analyze_btn.click(
373
  fn=on_refresh,
374
- inputs=[file_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
375
- outputs=[insight_md, kpi_table, percentiles_table, grade_dist_table, heaping_table, grade_stats_table, hist_plot, cdf_plot, grade_plot, grade_box],
 
376
  )
377
 
378
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
 
1
  import io
 
 
2
  import numpy as np
3
  import pandas as pd
4
  import gradio as gr
 
21
  return b
22
 
23
 
24
+ def _drop_useless(df: pd.DataFrame) -> pd.DataFrame:
 
 
 
 
25
  df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
26
  unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
27
  if unnamed:
 
29
  return df
30
 
31
 
32
+ def _safe_numeric(s):
33
+ return pd.to_numeric(s, errors="coerce")
34
+
35
 
36
+ def _guess_marks_col(df: pd.DataFrame):
37
+ cols = list(df.columns)
38
+ best, best_score = cols[0], -1
39
  for c in cols:
40
+ score = _safe_numeric(df[c]).notna().mean()
 
41
  if score > best_score:
42
  best_score = score
43
+ best = c
44
+ return best
45
+
46
 
47
+ def _guess_grade_col(cols):
48
+ low = {c: str(c).strip().lower() for c in cols}
49
+ return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0])
50
 
 
 
 
51
 
52
+ def _guess_optional(cols):
53
+ low = {c: str(c).strip().lower() for c in cols}
54
+ course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None)
55
+ section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None)
56
+ return course, section
57
 
58
 
59
  def apply_filters(df, course_col, section_col, course_filter, section_filter):
 
66
 
67
 
68
  # =============================
69
+ # Core HoD Insights (no student tables)
70
  # =============================
71
+ def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
72
  if df is None or df.empty:
73
  raise gr.Error("Sheet is empty.")
74
 
 
82
  n = int(len(valid))
83
  missing = int(d["_marks"].isna().sum())
84
 
85
+ pass_mark = int(pass_mark)
86
  mean = float(valid["_marks"].mean()) if n else 0.0
87
  std = float(valid["_marks"].std(ddof=0)) if n else 0.0
88
  minv = float(valid["_marks"].min()) if n else 0.0
89
  maxv = float(valid["_marks"].max()) if n else 0.0
90
 
 
91
  pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
92
  pass_rate = (pass_count / n * 100.0) if n else 0.0
93
 
94
+ # Borderline (pass to pass+5) and just-below (pass-5 to pass-1)
95
+ borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0
96
+ borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0
97
+
98
+ # Distribution shape
99
  skew = float(valid["_marks"].skew()) if n else 0.0
100
  kurt = float(valid["_marks"].kurt()) if n else 0.0
101
 
102
+ # Outliers by IQR
103
+ outlier_count = 0
104
+ low_thr = high_thr = 0.0
105
  if n:
106
+ q1 = float(np.percentile(valid["_marks"], 25))
107
+ q3 = float(np.percentile(valid["_marks"], 75))
108
+ iqr = q3 - q1
109
+ low_thr = q1 - 1.5 * iqr
110
+ high_thr = q3 + 1.5 * iqr
111
+ outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())
112
+
113
+ # Percentiles
114
+ pct_df = pd.DataFrame(
115
+ [(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]]
116
+ if n else [],
117
+ columns=["Percentile", "Marks"]
118
+ )
119
 
120
  # Grade distribution
121
  grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
122
  grade_dist.columns = [grade_col, "count"]
123
+ grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0
124
 
125
+ # Grade ↔ marks mapping (moderation evidence)
126
  grade_stats = (
127
  valid.groupby(d["_grade"])["_marks"]
128
  .agg(["count", "mean", "std", "min", "median", "max"])
 
132
  )
133
 
134
  # Mark heaping (repeated marks)
135
+ heaping = (
136
  valid["_marks"].round(0).astype(int)
137
  .value_counts().head(12)
138
  .rename("count").reset_index()
139
  .rename(columns={"index": "Mark"})
140
  )
141
 
142
+ # Course status
 
 
 
 
 
 
 
 
 
 
 
143
  if pass_rate >= 80:
144
  status = "GREEN"
145
  elif pass_rate >= 60:
 
147
  else:
148
  status = "RED"
149
 
150
+ # Flags
151
  flags = []
152
  if missing > 0:
153
+ flags.append(f"{missing} missing mark(s) β€” verify completeness.")
154
+ if borderline_fail > max(5, 0.03 * n):
155
+ flags.append("Many students just below pass β€” consider targeted support / moderation review.")
156
+ if borderline_pass > max(5, 0.03 * n):
157
+ flags.append("Many students just above pass β€” borderline attainment cluster.")
158
  if abs(skew) > 0.7:
159
+ flags.append("Skewed distribution β€” check assessment balance and marking consistency.")
 
 
160
  if outlier_count > 0:
161
+ flags.append(f"{outlier_count} outlier(s) by IQR β€” spot-check extremes.")
162
+ if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n):
163
+ flags.append("Heaping detected β€” many students share identical marks (rounding/marking pattern).")
164
+
165
  flags_text = " | ".join(flags) if flags else "No major warning patterns detected."
166
 
167
+ insight = (
168
+ f"**Status:** {status} \n"
169
+ f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark}) \n"
170
+ f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f} \n"
171
+ f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass} \n"
172
+ f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing} \n"
173
+ f"**Flags:** {flags_text}"
174
  )
175
 
176
+ # KPI table
177
+ kpi = pd.DataFrame(
178
  [
179
  ("Total rows (filtered)", total),
180
  ("Students with numeric marks", n),
 
182
  ("Pass mark", pass_mark),
183
  ("Pass count", pass_count),
184
  ("Pass rate (%)", round(pass_rate, 2)),
185
+ ("Borderline just below pass", borderline_fail),
186
+ ("Borderline just above pass", borderline_pass),
187
  ("Average", round(mean, 2)),
188
  ("Std deviation", round(std, 2)),
189
  ("Minimum", round(minv, 2)),
 
193
  ("Outlier low threshold (IQR)", round(low_thr, 2)),
194
  ("Outlier high threshold (IQR)", round(high_thr, 2)),
195
  ("Outlier count (IQR)", outlier_count),
196
+ ("Status", status),
197
  ],
198
  columns=["Metric", "Value"],
199
  )
200
 
201
  # Charts
202
+ # 1 Histogram
203
  fig1 = plt.figure()
204
  plt.hist(valid["_marks"].dropna(), bins=12)
205
+ plt.axvline(pass_mark, linestyle="--")
206
  plt.title("Marks distribution (Histogram)")
207
  plt.xlabel("Marks")
208
  plt.ylabel("Students")
209
 
210
+ # 2 CDF
211
  fig2 = plt.figure()
212
  xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
213
  ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
214
  if len(xs):
215
  plt.plot(xs, ys)
216
+ plt.axvline(pass_mark, linestyle="--")
217
  plt.title("CDF (Proportion of students ≀ mark)")
218
  plt.xlabel("Marks")
219
  plt.ylabel("Proportion")
220
 
221
+ # 3 Grade distribution
222
  fig3 = plt.figure()
223
  gd = grade_dist.set_index(grade_col)["count"]
224
  plt.bar(gd.index.astype(str), gd.values)
 
227
  plt.ylabel("Count")
228
  plt.xticks(rotation=45, ha="right")
229
 
230
+ # 4 Boxplot by grade (moderation)
231
  fig4 = plt.figure()
232
  if not grade_stats.empty:
233
+ order = grade_stats["Grade"].tolist()
234
  data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
235
+ plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
236
  plt.title("Marks spread by Grade (Boxplot)")
237
  plt.xlabel("Grade")
238
  plt.ylabel("Marks")
 
240
  else:
241
  plt.title("Marks spread by Grade (Boxplot)")
242
 
243
+ # 5 Section comparison (optional)
244
+ fig5 = plt.figure()
245
+ section_table = pd.DataFrame()
246
+ if section_col and section_col in d.columns and n:
247
+ sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index()
248
+ sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values
249
+ sec = sec.rename(columns={section_col: "Section"})
250
+ section_table = sec.sort_values("pass_rate_%", ascending=False)
251
+
252
+ plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values)
253
+ plt.title("Section-wise Pass Rate (%)")
254
+ plt.xlabel("Section")
255
+ plt.ylabel("Pass rate (%)")
256
+ plt.xticks(rotation=45, ha="right")
257
+ else:
258
+ plt.title("Section-wise Pass Rate (%) β€” not available (no section column)")
259
+ plt.axis("off")
260
+
261
+ return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5
262
 
263
 
264
  # =============================
265
  # UI
266
  # =============================
267
  with gr.Blocks(title="HoD Result Dashboard") as demo:
268
+ gr.Markdown("## πŸ“Š HoD Result Dashboard β€” Insights & Patterns (No Student Tables, No PDF)")
269
 
270
+ file_bytes_state = gr.State(None) # bytes
271
+ sheet_state = gr.State(None) # sheet name string
272
 
273
  with gr.Row():
274
  upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
 
287
  course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
288
  section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
289
 
290
+ analyze_btn = gr.Button("πŸ” Refresh HoD Dashboard")
291
 
292
  insight_md = gr.Markdown("")
293
 
294
  with gr.Tab("Tables"):
295
  with gr.Row():
296
  kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
297
+ pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
298
  with gr.Row():
299
+ grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True)
300
+ heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True)
301
+ grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True)
302
+ section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True)
303
 
304
  with gr.Tab("Charts"):
305
  with gr.Row():
 
308
  with gr.Row():
309
  grade_plot = gr.Plot(label="Grade distribution")
310
  grade_box = gr.Plot(label="Boxplot by grade")
311
+ section_plot = gr.Plot(label="Section-wise pass rate")
312
 
313
+ # -------- callbacks
314
  def on_upload(file_obj):
315
+ b = _read_file_bytes(file_obj)
316
+ xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl")
317
  sheets = xls.sheet_names or []
318
  if not sheets:
319
+ raise gr.Error("No sheets found in workbook.")
320
 
321
  sheet0 = sheets[0]
322
+ df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl"))
 
323
 
 
324
  cols = list(df0.columns)
325
+ m_guess = _guess_marks_col(df0)
326
+ g_guess = _guess_grade_col(cols)
327
+ c_guess, s_guess = _guess_optional(cols)
328
 
329
+ # Optional filters
330
+ course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
331
+ section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))
332
 
333
+ course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
334
+ section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
335
 
336
  if c_guess and c_guess in df0.columns:
337
  vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
338
+ course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
339
 
340
  if s_guess and s_guess in df0.columns:
341
  vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
342
+ section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
343
 
344
  return (
345
  gr.update(choices=sheets, value=sheet0, interactive=True), # sheet_dd
346
  gr.update(choices=cols, value=m_guess, interactive=True), # marks_col
347
  gr.update(choices=cols, value=g_guess, interactive=True), # grade_col
348
+ course_col_upd,
349
+ section_col_upd,
350
+ course_filter_upd,
351
+ section_filter_upd,
352
+ b, # file_bytes_state
353
+ sheet0, # sheet_state
354
  )
355
 
356
  upload.change(
357
  fn=on_upload,
358
  inputs=[upload],
359
+ outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state],
360
  )
361
 
362
  def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
363
  if not file_bytes:
364
  raise gr.Error("Upload Excel first.")
365
 
366
+ df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
 
367
 
368
  cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
369
  sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
 
380
 
381
  sheet_dd.change(
382
  fn=on_sheet_change,
383
+ inputs=[sheet_dd, file_bytes_state, course_col, section_col],
384
  outputs=[course_filter, section_filter, sheet_state],
385
  )
386
 
 
390
  if not sheet_name:
391
  raise gr.Error("Select a sheet.")
392
 
393
+ df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
 
394
 
395
+ kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights(
396
  df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
397
  )
398
 
399
  return (
400
+ "### HoD Insight\n\n" + insight,
401
+ kpi, pct, gdist, heap, gstats, sec_tbl,
402
+ f1, f2, f3, f4, f5
 
 
 
 
403
  )
404
 
405
  analyze_btn.click(
406
  fn=on_refresh,
407
+ inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
408
+ outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table,
409
+ hist_plot, cdf_plot, grade_plot, grade_box, section_plot],
410
  )
411
 
412
  demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)