Deevyankar commited on
Commit
df4a47d
Β·
verified Β·
1 Parent(s): 239975a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +286 -124
app.py CHANGED
@@ -1,12 +1,15 @@
1
  import io
2
  from datetime import datetime
3
 
 
4
  import pandas as pd
5
  import gradio as gr
6
  import matplotlib.pyplot as plt
 
7
  from reportlab.lib.pagesizes import A4
8
  from reportlab.pdfgen import canvas
9
  from reportlab.lib.units import cm
 
10
 
11
 
12
  # =============================
@@ -29,21 +32,58 @@ def _safe_numeric(series):
29
  return pd.to_numeric(series, errors="coerce")
30
 
31
 
32
- def _guess_cols(cols):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  lower = {c: str(c).strip().lower() for c in cols}
34
 
35
- student_guess = next((c for c in cols if any(k in lower[c] for k in ["student", "name", "id"])), cols[0])
36
- marks_guess = next((c for c in cols if lower[c] in ["marks", "mark", "score", "total", "final"]), cols[0])
37
- grade_guess = next((c for c in cols if "grade" in lower[c]), cols[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  course_guess = next((c for c in cols if any(k in lower[c] for k in ["course", "module", "subject"])), None)
40
  section_guess = next((c for c in cols if any(k in lower[c] for k in ["section", "group", "batch", "class"])), None)
41
 
42
- return student_guess, marks_guess, grade_guess, course_guess, section_guess
 
 
 
 
 
 
 
 
43
 
44
 
45
  # =============================
46
- # Load Excel and sheet
47
  # =============================
48
  def load_excel(file_obj):
49
  try:
@@ -55,23 +95,22 @@ def load_excel(file_obj):
55
 
56
  sheet0 = sheets[0]
57
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet0, engine="openpyxl")
58
- df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
59
 
 
60
  cols = list(df.columns)
61
- s_guess, m_guess, g_guess, c_guess, sec_guess = _guess_cols(cols)
62
 
63
- # Filter dropdowns (dynamic if columns exist)
64
- course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=bool(c_guess), visible=bool(c_guess), label="Course filter")
65
- section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=bool(sec_guess), visible=bool(sec_guess), label="Section/Group filter")
66
 
67
- # Pre-fill filter choices if present
68
- if c_guess:
69
  course_vals = ["(all)"] + sorted(df[c_guess].astype(str).fillna("NA").unique().tolist())
70
  course_dd = gr.Dropdown(choices=course_vals, value="(all)", interactive=True, visible=True, label="Course filter")
71
 
72
- if sec_guess:
73
  sec_vals = ["(all)"] + sorted(df[sec_guess].astype(str).fillna("NA").unique().tolist())
74
- section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section/Group filter")
75
 
76
  return (
77
  gr.Dropdown(choices=sheets, value=sheet0, interactive=True),
@@ -80,11 +119,11 @@ def load_excel(file_obj):
80
  gr.Dropdown(choices=cols, value=m_guess, interactive=True),
81
  gr.Dropdown(choices=cols, value=g_guess, interactive=True),
82
  gr.Dropdown(choices=cols, value=(c_guess or cols[0]), interactive=bool(c_guess), visible=bool(c_guess), label="Course column"),
83
- gr.Dropdown(choices=cols, value=(sec_guess or cols[0]), interactive=bool(sec_guess), visible=bool(sec_guess), label="Section/Group column"),
84
  course_dd,
85
  section_dd,
86
  file_bytes,
87
- sheet0,
88
  )
89
  except Exception:
90
  return (
@@ -106,12 +145,11 @@ def read_sheet(sheet_name, file_bytes, course_col, section_col):
106
  if not file_bytes:
107
  raise ValueError("Upload Excel first.")
108
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
109
- df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
110
- cols = list(df.columns)
111
 
112
- # Update filter choices based on current sheet + selected columns
113
  course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Course filter")
114
- section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Section/Group filter")
115
 
116
  if course_col and course_col in df.columns:
117
  course_vals = ["(all)"] + sorted(df[course_col].astype(str).fillna("NA").unique().tolist())
@@ -119,9 +157,9 @@ def read_sheet(sheet_name, file_bytes, course_col, section_col):
119
 
120
  if section_col and section_col in df.columns:
121
  sec_vals = ["(all)"] + sorted(df[section_col].astype(str).fillna("NA").unique().tolist())
122
- section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section/Group filter")
123
 
124
- return df, gr.Dropdown(choices=cols, interactive=True), gr.Dropdown(choices=cols, interactive=True)
125
 
126
 
127
  # =============================
@@ -143,34 +181,77 @@ def compute_dashboard(df, student_col, marks_col, grade_col, pass_mark, course_c
143
  if df is None or df.empty:
144
  raise gr.Error("Sheet is empty.")
145
 
146
- d = apply_filters(df, course_col, section_col, course_filter, section_filter)
 
 
 
 
147
 
148
- # metrics
149
  total = int(len(d))
150
- d[marks_col] = _safe_numeric(d[marks_col])
151
- valid = d[d[marks_col].notna()].copy()
152
  n = int(len(valid))
153
- missing_marks = int(d[marks_col].isna().sum())
154
 
155
- mean = float(valid[marks_col].mean()) if n else 0.0
156
- std = float(valid[marks_col].std(ddof=0)) if n else 0.0
157
- minv = float(valid[marks_col].min()) if n else 0.0
158
- maxv = float(valid[marks_col].max()) if n else 0.0
159
 
160
- pass_count = int((valid[marks_col] >= pass_mark).sum()) if n else 0
161
  pass_rate = (pass_count / n * 100.0) if n else 0.0
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  # risk / borderline
164
- risk_df = valid[valid[marks_col] < pass_mark][[student_col, marks_col, grade_col]].sort_values(by=marks_col).head(20)
165
- borderline_df = valid[(valid[marks_col] >= pass_mark) & (valid[marks_col] < pass_mark + 5)][[student_col, marks_col, grade_col]].sort_values(by=marks_col).head(20)
166
 
167
- top_df = valid[[student_col, marks_col, grade_col]].sort_values(by=marks_col, ascending=False).head(10)
168
- bottom_df = valid[[student_col, marks_col, grade_col]].sort_values(by=marks_col, ascending=True).head(10)
169
 
170
- grade_dist = d[grade_col].astype(str).fillna("NA").value_counts(dropna=False).rename("count").to_frame().reset_index()
171
  grade_dist.columns = [grade_col, "count"]
172
 
173
- # Status
 
 
 
 
 
 
 
 
 
 
174
  if pass_rate >= 80:
175
  status = "GREEN"
176
  elif pass_rate >= 60:
@@ -178,36 +259,27 @@ def compute_dashboard(df, student_col, marks_col, grade_col, pass_mark, course_c
178
  else:
179
  status = "RED"
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  insight = (
182
  f"Status: {status}. Pass rate {pass_rate:.1f}% (Pass mark {pass_mark}). "
183
  f"Average {mean:.1f}, Min {minv:.1f}, Max {maxv:.1f}, Std {std:.1f}. "
184
- f"Missing marks: {missing_marks}."
 
185
  )
186
 
187
- # --- Charts (matplotlib)
188
- # 1) Histogram
189
- fig1 = plt.figure()
190
- plt.hist(valid[marks_col].dropna(), bins=10)
191
- plt.title("Marks distribution")
192
- plt.xlabel("Marks")
193
- plt.ylabel("Students")
194
-
195
- # 2) Box plot
196
- fig2 = plt.figure()
197
- plt.boxplot(valid[marks_col].dropna(), vert=True)
198
- plt.title("Marks box plot")
199
- plt.ylabel("Marks")
200
-
201
- # 3) Grade bar
202
- fig3 = plt.figure()
203
- gd = grade_dist.set_index(grade_col)["count"]
204
- plt.bar(gd.index.astype(str), gd.values)
205
- plt.title("Grade distribution")
206
- plt.xlabel("Grade")
207
- plt.ylabel("Count")
208
- plt.xticks(rotation=45, ha="right")
209
-
210
- # KPI table
211
  kpi_df = pd.DataFrame(
212
  [
213
  ("Total rows (filtered)", total),
@@ -220,19 +292,81 @@ def compute_dashboard(df, student_col, marks_col, grade_col, pass_mark, course_c
220
  ("Std deviation", round(std, 2)),
221
  ("Minimum", round(minv, 2)),
222
  ("Maximum", round(maxv, 2)),
 
 
 
 
223
  ("Status", status),
224
  ("Insight", insight),
225
  ],
226
  columns=["Metric", "Value"],
227
  )
228
 
229
- return kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, fig1, fig2, fig3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
 
232
  # =============================
233
- # PDF (uses kpi + tables)
234
  # =============================
235
- def make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, title="Marks Dashboard Report"):
 
 
 
236
  buf = io.BytesIO()
237
  c = canvas.Canvas(buf, pagesize=A4)
238
  width, height = A4
@@ -257,6 +391,10 @@ def make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, titl
257
  c.setFont("Helvetica", 9.5)
258
  max_chars = 95
259
  for i in range(0, len(text), max_chars):
 
 
 
 
260
  c.drawString(x, y, text[i:i + max_chars])
261
  y -= 0.5 * cm
262
 
@@ -272,47 +410,55 @@ def make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, titl
272
  for _, r in df2.iterrows():
273
  line(" | ".join(r.values.tolist()))
274
 
275
- h(title)
276
- line(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
277
- sh("1) KPI Summary")
278
- table(kpi_df, max_rows=50)
 
 
279
 
280
- if y < 6 * cm:
281
- c.showPage()
282
- y = height - 2 * cm
283
 
284
- sh("2) Grade distribution")
285
- table(grade_dist, max_rows=30)
 
 
 
286
 
287
- if y < 6 * cm:
288
- c.showPage()
289
- y = height - 2 * cm
290
 
291
- sh("3) At-risk students (below pass mark)")
292
- table(risk_df, max_rows=20)
293
 
294
- if y < 6 * cm:
295
- c.showPage()
296
- y = height - 2 * cm
297
 
298
- sh("4) Borderline students (pass mark to pass mark + 5)")
299
- table(borderline_df, max_rows=20)
 
300
 
301
- if y < 6 * cm:
302
- c.showPage()
303
- y = height - 2 * cm
 
304
 
305
- sh("5) Top 10")
306
  table(top_df, max_rows=10)
307
-
308
- if y < 6 * cm:
309
- c.showPage()
310
- y = height - 2 * cm
311
-
312
- sh("6) Bottom 10")
313
  table(bottom_df, max_rows=10)
314
 
 
315
  c.showPage()
 
 
 
 
 
 
 
316
  c.save()
317
  buf.seek(0)
318
  return buf
@@ -322,14 +468,20 @@ def generate_pdf_report(file_bytes, sheet_name, student_col, marks_col, grade_co
322
  if not file_bytes:
323
  raise gr.Error("Upload Excel first.")
324
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
325
- df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
326
-
327
- kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, _, _, _ = compute_dashboard(
328
- df, student_col, marks_col, grade_col, int(pass_mark),
329
- course_col, section_col, course_filter, section_filter
 
 
 
 
 
 
 
 
330
  )
331
-
332
- pdf_buf = make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, title="HoD Result Dashboard Report")
333
  fname = f"dashboard_report__{sheet_name}__{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
334
  return (fname, pdf_buf.getvalue())
335
 
@@ -338,7 +490,7 @@ def generate_pdf_report(file_bytes, sheet_name, student_col, marks_col, grade_co
338
  # UI
339
  # =============================
340
  with gr.Blocks(title="HoD Result Dashboard") as demo:
341
- gr.Markdown("## πŸ“Š HoD Result Dashboard (Excel Upload) β€” Insights + Charts + PDF")
342
 
343
  file_state = gr.State(None)
344
  sheet_state = gr.State(None)
@@ -355,35 +507,44 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
355
 
356
  with gr.Row():
357
  course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
358
- section_col = gr.Dropdown(label="Section/Group column (optional)", choices=[], interactive=False, visible=False)
359
 
360
  with gr.Row():
361
  course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
362
- section_filter = gr.Dropdown(label="Section/Group filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
363
 
364
  df_preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)
365
 
366
  analyze_btn = gr.Button("πŸ” Refresh Dashboard")
367
 
368
- with gr.Row():
369
- kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
370
  grade_table = gr.Dataframe(label="Grade distribution", interactive=False, wrap=True)
371
 
372
- with gr.Row():
373
- risk_table = gr.Dataframe(label="At-risk (below pass) - Top 20", interactive=False, wrap=True)
374
- borderline_table = gr.Dataframe(label="Borderline (pass to pass+5) - Top 20", interactive=False, wrap=True)
 
375
 
376
- with gr.Row():
 
 
 
 
 
377
  top_table = gr.Dataframe(label="Top 10", interactive=False, wrap=True)
378
  bottom_table = gr.Dataframe(label="Bottom 10", interactive=False, wrap=True)
379
 
380
- with gr.Row():
381
- hist_plot = gr.Plot(label="Marks histogram")
382
- box_plot = gr.Plot(label="Marks box plot")
383
- grade_plot = gr.Plot(label="Grade chart")
 
 
 
384
 
385
  with gr.Row():
386
- pdf_btn = gr.Button("πŸ“„ Generate PDF Report")
387
  pdf_out = gr.File(label="Download PDF")
388
 
389
  # Events
@@ -402,7 +563,7 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
402
  sheet_dd.change(
403
  fn=read_sheet,
404
  inputs=[sheet_dd, file_state, course_col, section_col],
405
- outputs=[df_preview, course_filter, section_filter],
406
  )
407
 
408
  def on_refresh(file_bytes, sheet_name, s_col, m_col, g_col, pmark, c_col, sec_col, c_filter, sec_filter):
@@ -411,17 +572,18 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
411
  if not sheet_name:
412
  raise gr.Error("Select a sheet.")
413
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
414
- df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
415
 
416
- return compute_dashboard(
417
- df, s_col, m_col, g_col, int(pmark),
418
- c_col, sec_col, c_filter, sec_filter
419
- )
420
 
421
  analyze_btn.click(
422
  fn=on_refresh,
423
  inputs=[file_state, sheet_state, student_col, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
424
- outputs=[kpi_table, grade_table, risk_table, borderline_table, top_table, bottom_table, hist_plot, box_plot, grade_plot],
 
 
 
 
425
  )
426
 
427
  pdf_btn.click(
 
1
  import io
2
  from datetime import datetime
3
 
4
+ import numpy as np
5
  import pandas as pd
6
  import gradio as gr
7
  import matplotlib.pyplot as plt
8
+
9
  from reportlab.lib.pagesizes import A4
10
  from reportlab.pdfgen import canvas
11
  from reportlab.lib.units import cm
12
+ from reportlab.lib.utils import ImageReader
13
 
14
 
15
  # =============================
 
32
  return pd.to_numeric(series, errors="coerce")
33
 
34
 
35
+ def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
36
+ # drop fully empty columns + "Unnamed" columns
37
+ df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
38
+ unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
39
+ if unnamed:
40
+ df = df.drop(columns=unnamed, errors="ignore")
41
+ return df
42
+
43
+
44
+ def _guess_cols(df: pd.DataFrame):
45
+ """
46
+ Robust guessing for files like yours where marks column header can be numeric (e.g., 100).
47
+ Returns: student_guess, marks_guess, grade_guess, course_guess, section_guess
48
+ """
49
+ cols = list(df.columns)
50
  lower = {c: str(c).strip().lower() for c in cols}
51
 
52
+ # marks guess: first column that becomes mostly numeric
53
+ best_marks = None
54
+ best_score = -1
55
+ for c in cols:
56
+ s = _safe_numeric(df[c])
57
+ score = s.notna().mean() # proportion numeric
58
+ if score > best_score:
59
+ best_score = score
60
+ best_marks = c
61
+
62
+ # grade guess
63
+ grade_guess = next((c for c in cols if "grade" in lower[c] or "grde" in lower[c]), cols[0])
64
+
65
+ # student/id guess (if exists)
66
+ student_guess = next(
67
+ (c for c in cols if any(k in lower[c] for k in ["student", "name", "id", "roll", "reg", "sno"])),
68
+ cols[0],
69
+ )
70
 
71
  course_guess = next((c for c in cols if any(k in lower[c] for k in ["course", "module", "subject"])), None)
72
  section_guess = next((c for c in cols if any(k in lower[c] for k in ["section", "group", "batch", "class"])), None)
73
 
74
+ return student_guess, best_marks, grade_guess, course_guess, section_guess
75
+
76
+
77
+ def _fig_to_png_bytes(fig):
78
+ buf = io.BytesIO()
79
+ fig.savefig(buf, format="png", dpi=180, bbox_inches="tight")
80
+ plt.close(fig)
81
+ buf.seek(0)
82
+ return buf
83
 
84
 
85
  # =============================
86
+ # Load Excel
87
  # =============================
88
  def load_excel(file_obj):
89
  try:
 
95
 
96
  sheet0 = sheets[0]
97
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet0, engine="openpyxl")
98
+ df = _drop_useless_cols(df)
99
 
100
+ s_guess, m_guess, g_guess, c_guess, sec_guess = _guess_cols(df)
101
  cols = list(df.columns)
 
102
 
103
+ # Filters (optional)
104
+ course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Course filter")
105
+ section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Section filter")
106
 
107
+ if c_guess and c_guess in df.columns:
 
108
  course_vals = ["(all)"] + sorted(df[c_guess].astype(str).fillna("NA").unique().tolist())
109
  course_dd = gr.Dropdown(choices=course_vals, value="(all)", interactive=True, visible=True, label="Course filter")
110
 
111
+ if sec_guess and sec_guess in df.columns:
112
  sec_vals = ["(all)"] + sorted(df[sec_guess].astype(str).fillna("NA").unique().tolist())
113
+ section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section filter")
114
 
115
  return (
116
  gr.Dropdown(choices=sheets, value=sheet0, interactive=True),
 
119
  gr.Dropdown(choices=cols, value=m_guess, interactive=True),
120
  gr.Dropdown(choices=cols, value=g_guess, interactive=True),
121
  gr.Dropdown(choices=cols, value=(c_guess or cols[0]), interactive=bool(c_guess), visible=bool(c_guess), label="Course column"),
122
+ gr.Dropdown(choices=cols, value=(sec_guess or cols[0]), interactive=bool(sec_guess), visible=bool(sec_guess), label="Section column"),
123
  course_dd,
124
  section_dd,
125
  file_bytes,
126
+ sheet0, # sheet_state
127
  )
128
  except Exception:
129
  return (
 
145
  if not file_bytes:
146
  raise ValueError("Upload Excel first.")
147
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
148
+ df = _drop_useless_cols(df)
 
149
 
150
+ # Update filter choices based on selected columns
151
  course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Course filter")
152
+ section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Section filter")
153
 
154
  if course_col and course_col in df.columns:
155
  course_vals = ["(all)"] + sorted(df[course_col].astype(str).fillna("NA").unique().tolist())
 
157
 
158
  if section_col and section_col in df.columns:
159
  sec_vals = ["(all)"] + sorted(df[section_col].astype(str).fillna("NA").unique().tolist())
160
+ section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section filter")
161
 
162
+ return df, course_dd, section_dd, sheet_name # IMPORTANT: update sheet_state
163
 
164
 
165
  # =============================
 
181
  if df is None or df.empty:
182
  raise gr.Error("Sheet is empty.")
183
 
184
+ d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy()
185
+
186
+ # numeric marks
187
+ d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan
188
+ d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA"
189
 
 
190
  total = int(len(d))
191
+ valid = d[d["_marks"].notna()].copy()
 
192
  n = int(len(valid))
193
+ missing_marks = int(d["_marks"].isna().sum())
194
 
195
+ mean = float(valid["_marks"].mean()) if n else 0.0
196
+ std = float(valid["_marks"].std(ddof=0)) if n else 0.0
197
+ minv = float(valid["_marks"].min()) if n else 0.0
198
+ maxv = float(valid["_marks"].max()) if n else 0.0
199
 
200
+ pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
201
  pass_rate = (pass_count / n * 100.0) if n else 0.0
202
 
203
+ # distribution shape (simple but useful)
204
+ skew = float(valid["_marks"].skew()) if n else 0.0
205
+ kurt = float(valid["_marks"].kurt()) if n else 0.0
206
+
207
+ # percentiles
208
+ pct = {}
209
+ if n:
210
+ for p in [10, 25, 50, 75, 90]:
211
+ pct[f"P{p}"] = float(np.percentile(valid["_marks"], p))
212
+ percentiles_df = pd.DataFrame(list(pct.items()), columns=["Percentile", "Marks"]) if pct else pd.DataFrame()
213
+
214
+ # heaping: most repeated marks (teacher-friendly)
215
+ heaping_df = (
216
+ valid["_marks"].round(0).astype(int).value_counts().head(12).rename("count").reset_index()
217
+ .rename(columns={"index": "Mark"})
218
+ )
219
+
220
+ # IQR outliers
221
+ if n:
222
+ q1 = float(np.percentile(valid["_marks"], 25))
223
+ q3 = float(np.percentile(valid["_marks"], 75))
224
+ iqr = q3 - q1
225
+ low_thr = q1 - 1.5 * iqr
226
+ high_thr = q3 + 1.5 * iqr
227
+ outliers = valid[(valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)].copy()
228
+ else:
229
+ outliers = valid.head(0).copy()
230
+ low_thr = high_thr = 0.0
231
+
232
+ outliers_df = outliers[[student_col, marks_col, grade_col]].head(30) if not outliers.empty else pd.DataFrame()
233
+
234
  # risk / borderline
235
+ risk_df = valid[valid["_marks"] < pass_mark][[student_col, marks_col, grade_col]].sort_values(by="_marks").head(25)
236
+ borderline_df = valid[(valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)][[student_col, marks_col, grade_col]].sort_values(by="_marks").head(25)
237
 
238
+ top_df = valid[[student_col, marks_col, grade_col]].sort_values(by="_marks", ascending=False).head(10)
239
+ bottom_df = valid[[student_col, marks_col, grade_col]].sort_values(by="_marks", ascending=True).head(10)
240
 
241
+ grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
242
  grade_dist.columns = [grade_col, "count"]
243
 
244
+ # Grade -> marks mapping (VERY useful for teachers)
245
+ grade_stats = (
246
+ valid.assign(_g=d["_grade"])
247
+ .groupby(d["_grade"])["_marks"]
248
+ .agg(["count", "mean", "std", "min", "median", "max"])
249
+ .reset_index()
250
+ .rename(columns={"_grade": "Grade"})
251
+ .sort_values("mean", ascending=False)
252
+ )
253
+
254
+ # status
255
  if pass_rate >= 80:
256
  status = "GREEN"
257
  elif pass_rate >= 60:
 
259
  else:
260
  status = "RED"
261
 
262
+ # simple pattern flags
263
+ flags = []
264
+ if missing_marks > 0:
265
+ flags.append(f"{missing_marks} missing mark(s) need verification.")
266
+ if abs(skew) > 0.7:
267
+ flags.append("Marks are noticeably skewed (not symmetric).")
268
+ if kurt > 1.0:
269
+ flags.append("Marks have heavy tails (more extremes than normal).")
270
+ if len(heaping_df) and heaping_df["count"].iloc[0] >= max(10, 0.06 * n):
271
+ flags.append("Many students share the same mark(s) (mark heaping / clustering).")
272
+ if len(outliers_df) > 0:
273
+ flags.append("Outliers detected using IQR rule (check special cases).")
274
+ flags_text = " | ".join(flags) if flags else "No strong warnings detected."
275
+
276
  insight = (
277
  f"Status: {status}. Pass rate {pass_rate:.1f}% (Pass mark {pass_mark}). "
278
  f"Average {mean:.1f}, Min {minv:.1f}, Max {maxv:.1f}, Std {std:.1f}. "
279
+ f"Skew {skew:.2f}, Kurtosis {kurt:.2f}. Missing marks: {missing_marks}. "
280
+ f"Flags: {flags_text}"
281
  )
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  kpi_df = pd.DataFrame(
284
  [
285
  ("Total rows (filtered)", total),
 
292
  ("Std deviation", round(std, 2)),
293
  ("Minimum", round(minv, 2)),
294
  ("Maximum", round(maxv, 2)),
295
+ ("Skewness", round(skew, 3)),
296
+ ("Kurtosis", round(kurt, 3)),
297
+ ("Outlier low threshold (IQR)", round(low_thr, 2)),
298
+ ("Outlier high threshold (IQR)", round(high_thr, 2)),
299
  ("Status", status),
300
  ("Insight", insight),
301
  ],
302
  columns=["Metric", "Value"],
303
  )
304
 
305
+ # -------- Charts (matplotlib)
306
+ # 1) Histogram
307
+ fig1 = plt.figure()
308
+ plt.hist(valid["_marks"].dropna(), bins=12)
309
+ plt.title("Marks distribution")
310
+ plt.xlabel("Marks")
311
+ plt.ylabel("Students")
312
+
313
+ # 2) CDF curve (excellent for interpretation)
314
+ fig2 = plt.figure()
315
+ xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
316
+ ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
317
+ plt.plot(xs, ys)
318
+ plt.title("Cumulative distribution (CDF)")
319
+ plt.xlabel("Marks")
320
+ plt.ylabel("Proportion ≀ mark")
321
+
322
+ # 3) Grade bar
323
+ fig3 = plt.figure()
324
+ gd = grade_dist.set_index(grade_col)["count"]
325
+ plt.bar(gd.index.astype(str), gd.values)
326
+ plt.title("Grade distribution")
327
+ plt.xlabel("Grade")
328
+ plt.ylabel("Count")
329
+ plt.xticks(rotation=45, ha="right")
330
+
331
+ # 4) Boxplot by grade (pattern across grades)
332
+ fig4 = plt.figure()
333
+ # Keep grades ordered by mean
334
+ order = grade_stats[grade_stats.columns[0]].tolist() if not grade_stats.empty else []
335
+ data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order] if order else []
336
+ if data:
337
+ plt.boxplot(data, labels=[str(g) for g in order], vert=True)
338
+ plt.title("Marks spread by Grade")
339
+ plt.xlabel("Grade")
340
+ plt.ylabel("Marks")
341
+ plt.xticks(rotation=45, ha="right")
342
+ else:
343
+ plt.title("Marks spread by Grade (no data)")
344
+
345
+ return (
346
+ kpi_df,
347
+ grade_dist,
348
+ grade_stats,
349
+ percentiles_df,
350
+ heaping_df,
351
+ outliers_df,
352
+ risk_df,
353
+ borderline_df,
354
+ top_df,
355
+ bottom_df,
356
+ fig1,
357
+ fig2,
358
+ fig3,
359
+ fig4,
360
+ )
361
 
362
 
363
  # =============================
364
+ # PDF (with charts embedded)
365
  # =============================
366
+ def make_pdf(kpi_df, grade_dist, grade_stats, percentiles_df, heaping_df, outliers_df,
367
+ risk_df, borderline_df, top_df, bottom_df,
368
+ fig1, fig2, fig3, fig4,
369
+ title="Marks Dashboard Report"):
370
  buf = io.BytesIO()
371
  c = canvas.Canvas(buf, pagesize=A4)
372
  width, height = A4
 
391
  c.setFont("Helvetica", 9.5)
392
  max_chars = 95
393
  for i in range(0, len(text), max_chars):
394
+ if y < 2.2 * cm:
395
+ c.showPage()
396
+ y = height - 2 * cm
397
+ c.setFont("Helvetica", 9.5)
398
  c.drawString(x, y, text[i:i + max_chars])
399
  y -= 0.5 * cm
400
 
 
410
  for _, r in df2.iterrows():
411
  line(" | ".join(r.values.tolist()))
412
 
413
+ def add_chart(fig, caption):
414
+ nonlocal y
415
+ png = _fig_to_png_bytes(fig)
416
+ img = ImageReader(png)
417
+ img_w = width - 4 * cm
418
+ img_h = 7.0 * cm # fixed height to keep layout stable
419
 
420
+ if y < (img_h + 3.0 * cm):
421
+ c.showPage()
422
+ y = height - 2 * cm
423
 
424
+ c.setFont("Helvetica-Bold", 10.5)
425
+ c.drawString(x, y, caption)
426
+ y -= 0.5 * cm
427
+ c.drawImage(img, x, y - img_h, width=img_w, height=img_h, preserveAspectRatio=True, anchor='nw')
428
+ y -= (img_h + 0.7 * cm)
429
 
430
+ h(title)
431
+ line(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
 
432
 
433
+ sh("1) KPI Summary")
434
+ table(kpi_df, max_rows=60)
435
 
436
+ sh("2) Key Patterns (Percentiles + Heaping)")
437
+ table(percentiles_df, max_rows=10)
438
+ table(heaping_df, max_rows=12)
439
 
440
+ sh("3) Grade Distribution + Grade-to-Marks Mapping")
441
+ table(grade_dist, max_rows=40)
442
+ table(grade_stats, max_rows=40)
443
 
444
+ sh("4) At-risk / Borderline / Outliers")
445
+ table(risk_df, max_rows=25)
446
+ table(borderline_df, max_rows=25)
447
+ table(outliers_df, max_rows=30)
448
 
449
+ sh("5) Top & Bottom")
450
  table(top_df, max_rows=10)
 
 
 
 
 
 
451
  table(bottom_df, max_rows=10)
452
 
453
+ # charts pages
454
  c.showPage()
455
+ y = height - 2 * cm
456
+ h("Charts")
457
+ add_chart(fig1, "Chart 1: Marks Distribution (Histogram)")
458
+ add_chart(fig2, "Chart 2: CDF (Proportion of students at/below a mark)")
459
+ add_chart(fig3, "Chart 3: Grade Distribution (Bar)")
460
+ add_chart(fig4, "Chart 4: Marks Spread by Grade (Boxplot)")
461
+
462
  c.save()
463
  buf.seek(0)
464
  return buf
 
468
  if not file_bytes:
469
  raise gr.Error("Upload Excel first.")
470
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
471
+ df = _drop_useless_cols(df)
472
+
473
+ (
474
+ kpi_df, grade_dist, grade_stats, percentiles_df, heaping_df, outliers_df,
475
+ risk_df, borderline_df, top_df, bottom_df,
476
+ fig1, fig2, fig3, fig4
477
+ ) = compute_dashboard(df, student_col, marks_col, grade_col, int(pass_mark), course_col, section_col, course_filter, section_filter)
478
+
479
+ pdf_buf = make_pdf(
480
+ kpi_df, grade_dist, grade_stats, percentiles_df, heaping_df, outliers_df,
481
+ risk_df, borderline_df, top_df, bottom_df,
482
+ fig1, fig2, fig3, fig4,
483
+ title="HoD Result Dashboard Report"
484
  )
 
 
485
  fname = f"dashboard_report__{sheet_name}__{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
486
  return (fname, pdf_buf.getvalue())
487
 
 
490
  # UI
491
  # =============================
492
  with gr.Blocks(title="HoD Result Dashboard") as demo:
493
+ gr.Markdown("## πŸ“Š HoD Result Dashboard β€” Teacher Insights (Patterns + Stats + Charts + PDF)")
494
 
495
  file_state = gr.State(None)
496
  sheet_state = gr.State(None)
 
507
 
508
  with gr.Row():
509
  course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
510
+ section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False)
511
 
512
  with gr.Row():
513
  course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
514
+ section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
515
 
516
  df_preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)
517
 
518
  analyze_btn = gr.Button("πŸ” Refresh Dashboard")
519
 
520
+ with gr.Tab("Overview"):
521
+ kpi_table = gr.Dataframe(label="KPI Summary (includes insight + flags)", interactive=False, wrap=True)
522
  grade_table = gr.Dataframe(label="Grade distribution", interactive=False, wrap=True)
523
 
524
+ with gr.Tab("Patterns"):
525
+ percentiles_table = gr.Dataframe(label="Percentiles (P10/P25/P50/P75/P90)", interactive=False, wrap=True)
526
+ heaping_table = gr.Dataframe(label="Most repeated marks (heaping / clustering)", interactive=False, wrap=True)
527
+ outliers_table = gr.Dataframe(label="Outliers (IQR rule) - first 30", interactive=False, wrap=True)
528
 
529
+ with gr.Tab("By Grade"):
530
+ grade_stats_table = gr.Dataframe(label="Grade β†’ Marks mapping (min/max/mean/median)", interactive=False, wrap=True)
531
+
532
+ with gr.Tab("At-risk / Ranking"):
533
+ risk_table = gr.Dataframe(label="At-risk (below pass) - Top 25", interactive=False, wrap=True)
534
+ borderline_table = gr.Dataframe(label="Borderline (pass to pass+5) - Top 25", interactive=False, wrap=True)
535
  top_table = gr.Dataframe(label="Top 10", interactive=False, wrap=True)
536
  bottom_table = gr.Dataframe(label="Bottom 10", interactive=False, wrap=True)
537
 
538
+ with gr.Tab("Charts"):
539
+ with gr.Row():
540
+ hist_plot = gr.Plot(label="Histogram")
541
+ cdf_plot = gr.Plot(label="CDF")
542
+ with gr.Row():
543
+ grade_plot = gr.Plot(label="Grade distribution")
544
+ grade_box = gr.Plot(label="Boxplot by grade")
545
 
546
  with gr.Row():
547
+ pdf_btn = gr.Button("πŸ“„ Generate PDF Report (with charts)")
548
  pdf_out = gr.File(label="Download PDF")
549
 
550
  # Events
 
563
  sheet_dd.change(
564
  fn=read_sheet,
565
  inputs=[sheet_dd, file_state, course_col, section_col],
566
+ outputs=[df_preview, course_filter, section_filter, sheet_state], # IMPORTANT
567
  )
568
 
569
  def on_refresh(file_bytes, sheet_name, s_col, m_col, g_col, pmark, c_col, sec_col, c_filter, sec_filter):
 
572
  if not sheet_name:
573
  raise gr.Error("Select a sheet.")
574
  df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
575
+ df = _drop_useless_cols(df)
576
 
577
+ return compute_dashboard(df, s_col, m_col, g_col, int(pmark), c_col, sec_col, c_filter, sec_filter)
 
 
 
578
 
579
  analyze_btn.click(
580
  fn=on_refresh,
581
  inputs=[file_state, sheet_state, student_col, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
582
+ outputs=[
583
+ kpi_table, grade_table, grade_stats_table, percentiles_table, heaping_table, outliers_table,
584
+ risk_table, borderline_table, top_table, bottom_table,
585
+ hist_plot, cdf_plot, grade_plot, grade_box
586
+ ],
587
  )
588
 
589
  pdf_btn.click(