import io import numpy as np import pandas as pd import gradio as gr import matplotlib.pyplot as plt # ============================= # Helpers # ============================= def _read_file_bytes(file_obj): if file_obj is None: raise ValueError("No file uploaded.") file_path = getattr(file_obj, "name", None) if not file_path: raise ValueError("Invalid uploaded file object.") with open(file_path, "rb") as f: b = f.read() if not b: raise ValueError("Uploaded file is empty.") return b def _drop_useless(df: pd.DataFrame) -> pd.DataFrame: df = df.dropna(axis=1, how="all").dropna(axis=0, how="all") unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")] if unnamed: df = df.drop(columns=unnamed, errors="ignore") return df def _safe_numeric(s): return pd.to_numeric(s, errors="coerce") def _guess_marks_col(df: pd.DataFrame): cols = list(df.columns) best, best_score = cols[0], -1 for c in cols: score = _safe_numeric(df[c]).notna().mean() if score > best_score: best_score = score best = c return best def _guess_grade_col(cols): low = {c: str(c).strip().lower() for c in cols} return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0]) def _guess_optional(cols): low = {c: str(c).strip().lower() for c in cols} course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None) section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None) return course, section def apply_filters(df, course_col, section_col, course_filter, section_filter): d = df.copy() if course_col and course_col in d.columns and course_filter and course_filter != "(all)": d = d[d[course_col].astype(str).fillna("NA") == course_filter] if section_col and section_col in d.columns and section_filter and section_filter != "(all)": d = d[d[section_col].astype(str).fillna("NA") == section_filter] return d # ============================= # Core HoD Insights (no student tables) # ============================= def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter): if df is None or df.empty: raise gr.Error("Sheet is empty.") d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy() d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA" total = int(len(d)) valid = d[d["_marks"].notna()].copy() n = int(len(valid)) missing = int(d["_marks"].isna().sum()) pass_mark = int(pass_mark) mean = float(valid["_marks"].mean()) if n else 0.0 std = float(valid["_marks"].std(ddof=0)) if n else 0.0 minv = float(valid["_marks"].min()) if n else 0.0 maxv = float(valid["_marks"].max()) if n else 0.0 pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0 pass_rate = (pass_count / n * 100.0) if n else 0.0 # Borderline (pass to pass+5) and just-below (pass-5 to pass-1) borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0 borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0 # Distribution shape skew = float(valid["_marks"].skew()) if n else 0.0 kurt = float(valid["_marks"].kurt()) if n else 0.0 # Outliers by IQR outlier_count = 0 low_thr = high_thr = 0.0 if n: q1 = float(np.percentile(valid["_marks"], 25)) q3 = float(np.percentile(valid["_marks"], 75)) iqr = q3 - q1 low_thr = q1 - 1.5 * iqr high_thr = q3 + 1.5 * iqr outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum()) # Percentiles pct_df = pd.DataFrame( [(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]] if n else [], columns=["Percentile", "Marks"] ) # Grade distribution grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index() grade_dist.columns = [grade_col, "count"] grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0 # Grade ↔ marks mapping (moderation evidence) grade_stats = ( valid.groupby(d["_grade"])["_marks"] .agg(["count", "mean", "std", "min", "median", "max"]) .reset_index() .rename(columns={"_grade": "Grade"}) .sort_values("mean", ascending=False) ) # Mark heaping (repeated marks) heaping = ( valid["_marks"].round(0).astype(int) .value_counts().head(12) .rename("count").reset_index() .rename(columns={"index": "Mark"}) ) # Course status if pass_rate >= 80: status = "GREEN" elif pass_rate >= 60: status = "AMBER" else: status = "RED" # Flags flags = [] if missing > 0: flags.append(f"{missing} missing mark(s) — verify completeness.") if borderline_fail > max(5, 0.03 * n): flags.append("Many students just below pass — consider targeted support / moderation review.") if borderline_pass > max(5, 0.03 * n): flags.append("Many students just above pass — borderline attainment cluster.") if abs(skew) > 0.7: flags.append("Skewed distribution — check assessment balance and marking consistency.") if outlier_count > 0: flags.append(f"{outlier_count} outlier(s) by IQR — spot-check extremes.") if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n): flags.append("Heaping detected — many students share identical marks (rounding/marking pattern).") flags_text = " | ".join(flags) if flags else "No major warning patterns detected." insight = ( f"**Status:** {status} \n" f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark}) \n" f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f} \n" f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass} \n" f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing} \n" f"**Flags:** {flags_text}" ) # KPI table kpi = pd.DataFrame( [ ("Total rows (filtered)", total), ("Students with numeric marks", n), ("Missing marks", missing), ("Pass mark", pass_mark), ("Pass count", pass_count), ("Pass rate (%)", round(pass_rate, 2)), ("Borderline just below pass", borderline_fail), ("Borderline just above pass", borderline_pass), ("Average", round(mean, 2)), ("Std deviation", round(std, 2)), ("Minimum", round(minv, 2)), ("Maximum", round(maxv, 2)), ("Skewness", round(skew, 3)), ("Kurtosis", round(kurt, 3)), ("Outlier low threshold (IQR)", round(low_thr, 2)), ("Outlier high threshold (IQR)", round(high_thr, 2)), ("Outlier count (IQR)", outlier_count), ("Status", status), ], columns=["Metric", "Value"], ) # Charts # 1 Histogram fig1 = plt.figure() plt.hist(valid["_marks"].dropna(), bins=12) plt.axvline(pass_mark, linestyle="--") plt.title("Marks distribution (Histogram)") plt.xlabel("Marks") plt.ylabel("Students") # 2 CDF fig2 = plt.figure() xs = np.sort(valid["_marks"].dropna().values) if n else np.array([]) ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([]) if len(xs): plt.plot(xs, ys) plt.axvline(pass_mark, linestyle="--") plt.title("CDF (Proportion of students ≤ mark)") plt.xlabel("Marks") plt.ylabel("Proportion") # 3 Grade distribution fig3 = plt.figure() gd = grade_dist.set_index(grade_col)["count"] plt.bar(gd.index.astype(str), gd.values) plt.title("Grade distribution") plt.xlabel("Grade") plt.ylabel("Count") plt.xticks(rotation=45, ha="right") # 4 Boxplot by grade (moderation) fig4 = plt.figure() if not grade_stats.empty: order = grade_stats["Grade"].tolist() data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order] plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True) plt.title("Marks spread by Grade (Boxplot)") plt.xlabel("Grade") plt.ylabel("Marks") plt.xticks(rotation=45, ha="right") else: plt.title("Marks spread by Grade (Boxplot)") # 5 Section comparison (optional) fig5 = plt.figure() section_table = pd.DataFrame() if section_col and section_col in d.columns and n: sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index() sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values sec = sec.rename(columns={section_col: "Section"}) section_table = sec.sort_values("pass_rate_%", ascending=False) plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values) plt.title("Section-wise Pass Rate (%)") plt.xlabel("Section") plt.ylabel("Pass rate (%)") plt.xticks(rotation=45, ha="right") else: plt.title("Section-wise Pass Rate (%) — not available (no section column)") plt.axis("off") return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5 # ============================= # UI # ============================= with gr.Blocks(title="HoD Result Dashboard") as demo: gr.Markdown("## 📊 HoD Result Dashboard — Insights & Patterns (No Student Tables, No PDF)") file_bytes_state = gr.State(None) # bytes sheet_state = gr.State(None) # sheet name string with gr.Row(): upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"]) sheet_dd = gr.Dropdown(label="Sheet", choices=[], interactive=False) with gr.Row(): marks_col = gr.Dropdown(label="Marks column", choices=[], interactive=False) grade_col = gr.Dropdown(label="Grade column", choices=[], interactive=False) pass_mark = gr.Number(label="Pass mark", value=50, precision=0) with gr.Row(): course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False) section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False) with gr.Row(): course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False) section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False) analyze_btn = gr.Button("🔍 Refresh HoD Dashboard") insight_md = gr.Markdown("") with gr.Tab("Tables"): with gr.Row(): kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True) pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True) with gr.Row(): grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True) heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True) grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True) section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True) with gr.Tab("Charts"): with gr.Row(): hist_plot = gr.Plot(label="Histogram") cdf_plot = gr.Plot(label="CDF") with gr.Row(): grade_plot = gr.Plot(label="Grade distribution") grade_box = gr.Plot(label="Boxplot by grade") section_plot = gr.Plot(label="Section-wise pass rate") # -------- callbacks def on_upload(file_obj): b = _read_file_bytes(file_obj) xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl") sheets = xls.sheet_names or [] if not sheets: raise gr.Error("No sheets found in workbook.") sheet0 = sheets[0] df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl")) cols = list(df0.columns) m_guess = _guess_marks_col(df0) g_guess = _guess_grade_col(cols) c_guess, s_guess = _guess_optional(cols) # Optional filters course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess)) section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess)) course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) if c_guess and c_guess in df0.columns: vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist()) course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True) if s_guess and s_guess in df0.columns: vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist()) section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True) return ( gr.update(choices=sheets, value=sheet0, interactive=True), # sheet_dd gr.update(choices=cols, value=m_guess, interactive=True), # marks_col gr.update(choices=cols, value=g_guess, interactive=True), # grade_col course_col_upd, section_col_upd, course_filter_upd, section_filter_upd, b, # file_bytes_state sheet0, # sheet_state ) upload.change( fn=on_upload, inputs=[upload], outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state], ) def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val): if not file_bytes: raise gr.Error("Upload Excel first.") df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")) cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) if course_col_val and course_col_val in df.columns: vals = ["(all)"] + sorted(df[course_col_val].astype(str).fillna("NA").unique().tolist()) cf = gr.update(choices=vals, value="(all)", visible=True, interactive=True) if section_col_val and section_col_val in df.columns: vals = ["(all)"] + sorted(df[section_col_val].astype(str).fillna("NA").unique().tolist()) sf = gr.update(choices=vals, value="(all)", visible=True, interactive=True) return cf, sf, sheet_name sheet_dd.change( fn=on_sheet_change, inputs=[sheet_dd, file_bytes_state, course_col, section_col], outputs=[course_filter, section_filter, sheet_state], ) def on_refresh(file_bytes, sheet_name, m_col, g_col, pmark, c_col, s_col, c_filter, s_filter): if not file_bytes: raise gr.Error("Upload Excel first.") if not sheet_name: raise gr.Error("Select a sheet.") df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")) kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights( df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter ) return ( "### HoD Insight\n\n" + insight, kpi, pct, gdist, heap, gstats, sec_tbl, f1, f2, f3, f4, f5 ) analyze_btn.click( fn=on_refresh, inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter], outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table, hist_plot, cdf_plot, grade_plot, grade_box, section_plot], ) demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)