Spaces:
Sleeping
Sleeping
| import io | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| # ============================= | |
| # Helpers | |
| # ============================= | |
| def _read_file_bytes(file_obj): | |
| if file_obj is None: | |
| raise ValueError("No file uploaded.") | |
| file_path = getattr(file_obj, "name", None) | |
| if not file_path: | |
| raise ValueError("Invalid uploaded file object.") | |
| with open(file_path, "rb") as f: | |
| b = f.read() | |
| if not b: | |
| raise ValueError("Uploaded file is empty.") | |
| return b | |
| def _drop_useless(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.dropna(axis=1, how="all").dropna(axis=0, how="all") | |
| unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")] | |
| if unnamed: | |
| df = df.drop(columns=unnamed, errors="ignore") | |
| return df | |
| def _safe_numeric(s): | |
| return pd.to_numeric(s, errors="coerce") | |
| def _guess_marks_col(df: pd.DataFrame): | |
| cols = list(df.columns) | |
| best, best_score = cols[0], -1 | |
| for c in cols: | |
| score = _safe_numeric(df[c]).notna().mean() | |
| if score > best_score: | |
| best_score = score | |
| best = c | |
| return best | |
| def _guess_grade_col(cols): | |
| low = {c: str(c).strip().lower() for c in cols} | |
| return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0]) | |
| def _guess_optional(cols): | |
| low = {c: str(c).strip().lower() for c in cols} | |
| course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None) | |
| section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None) | |
| return course, section | |
| def apply_filters(df, course_col, section_col, course_filter, section_filter): | |
| d = df.copy() | |
| if course_col and course_col in d.columns and course_filter and course_filter != "(all)": | |
| d = d[d[course_col].astype(str).fillna("NA") == course_filter] | |
| if section_col and section_col in d.columns and section_filter and section_filter != "(all)": | |
| d = d[d[section_col].astype(str).fillna("NA") == section_filter] | |
| return d | |
| # ============================= | |
| # Core HoD Insights (no student tables) | |
| # ============================= | |
| def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter): | |
| if df is None or df.empty: | |
| raise gr.Error("Sheet is empty.") | |
| d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy() | |
| d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan | |
| d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA" | |
| total = int(len(d)) | |
| valid = d[d["_marks"].notna()].copy() | |
| n = int(len(valid)) | |
| missing = int(d["_marks"].isna().sum()) | |
| pass_mark = int(pass_mark) | |
| mean = float(valid["_marks"].mean()) if n else 0.0 | |
| std = float(valid["_marks"].std(ddof=0)) if n else 0.0 | |
| minv = float(valid["_marks"].min()) if n else 0.0 | |
| maxv = float(valid["_marks"].max()) if n else 0.0 | |
| pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0 | |
| pass_rate = (pass_count / n * 100.0) if n else 0.0 | |
| # Borderline (pass to pass+5) and just-below (pass-5 to pass-1) | |
| borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0 | |
| borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0 | |
| # Distribution shape | |
| skew = float(valid["_marks"].skew()) if n else 0.0 | |
| kurt = float(valid["_marks"].kurt()) if n else 0.0 | |
| # Outliers by IQR | |
| outlier_count = 0 | |
| low_thr = high_thr = 0.0 | |
| if n: | |
| q1 = float(np.percentile(valid["_marks"], 25)) | |
| q3 = float(np.percentile(valid["_marks"], 75)) | |
| iqr = q3 - q1 | |
| low_thr = q1 - 1.5 * iqr | |
| high_thr = q3 + 1.5 * iqr | |
| outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum()) | |
| # Percentiles | |
| pct_df = pd.DataFrame( | |
| [(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]] | |
| if n else [], | |
| columns=["Percentile", "Marks"] | |
| ) | |
| # Grade distribution | |
| grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index() | |
| grade_dist.columns = [grade_col, "count"] | |
| grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0 | |
| # Grade ↔ marks mapping (moderation evidence) | |
| grade_stats = ( | |
| valid.groupby(d["_grade"])["_marks"] | |
| .agg(["count", "mean", "std", "min", "median", "max"]) | |
| .reset_index() | |
| .rename(columns={"_grade": "Grade"}) | |
| .sort_values("mean", ascending=False) | |
| ) | |
| # Mark heaping (repeated marks) | |
| heaping = ( | |
| valid["_marks"].round(0).astype(int) | |
| .value_counts().head(12) | |
| .rename("count").reset_index() | |
| .rename(columns={"index": "Mark"}) | |
| ) | |
| # Course status | |
| if pass_rate >= 80: | |
| status = "GREEN" | |
| elif pass_rate >= 60: | |
| status = "AMBER" | |
| else: | |
| status = "RED" | |
| # Flags | |
| flags = [] | |
| if missing > 0: | |
| flags.append(f"{missing} missing mark(s) — verify completeness.") | |
| if borderline_fail > max(5, 0.03 * n): | |
| flags.append("Many students just below pass — consider targeted support / moderation review.") | |
| if borderline_pass > max(5, 0.03 * n): | |
| flags.append("Many students just above pass — borderline attainment cluster.") | |
| if abs(skew) > 0.7: | |
| flags.append("Skewed distribution — check assessment balance and marking consistency.") | |
| if outlier_count > 0: | |
| flags.append(f"{outlier_count} outlier(s) by IQR — spot-check extremes.") | |
| if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n): | |
| flags.append("Heaping detected — many students share identical marks (rounding/marking pattern).") | |
| flags_text = " | ".join(flags) if flags else "No major warning patterns detected." | |
| insight = ( | |
| f"**Status:** {status} \n" | |
| f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark}) \n" | |
| f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f} \n" | |
| f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass} \n" | |
| f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing} \n" | |
| f"**Flags:** {flags_text}" | |
| ) | |
| # KPI table | |
| kpi = pd.DataFrame( | |
| [ | |
| ("Total rows (filtered)", total), | |
| ("Students with numeric marks", n), | |
| ("Missing marks", missing), | |
| ("Pass mark", pass_mark), | |
| ("Pass count", pass_count), | |
| ("Pass rate (%)", round(pass_rate, 2)), | |
| ("Borderline just below pass", borderline_fail), | |
| ("Borderline just above pass", borderline_pass), | |
| ("Average", round(mean, 2)), | |
| ("Std deviation", round(std, 2)), | |
| ("Minimum", round(minv, 2)), | |
| ("Maximum", round(maxv, 2)), | |
| ("Skewness", round(skew, 3)), | |
| ("Kurtosis", round(kurt, 3)), | |
| ("Outlier low threshold (IQR)", round(low_thr, 2)), | |
| ("Outlier high threshold (IQR)", round(high_thr, 2)), | |
| ("Outlier count (IQR)", outlier_count), | |
| ("Status", status), | |
| ], | |
| columns=["Metric", "Value"], | |
| ) | |
| # Charts | |
| # 1 Histogram | |
| fig1 = plt.figure() | |
| plt.hist(valid["_marks"].dropna(), bins=12) | |
| plt.axvline(pass_mark, linestyle="--") | |
| plt.title("Marks distribution (Histogram)") | |
| plt.xlabel("Marks") | |
| plt.ylabel("Students") | |
| # 2 CDF | |
| fig2 = plt.figure() | |
| xs = np.sort(valid["_marks"].dropna().values) if n else np.array([]) | |
| ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([]) | |
| if len(xs): | |
| plt.plot(xs, ys) | |
| plt.axvline(pass_mark, linestyle="--") | |
| plt.title("CDF (Proportion of students ≤ mark)") | |
| plt.xlabel("Marks") | |
| plt.ylabel("Proportion") | |
| # 3 Grade distribution | |
| fig3 = plt.figure() | |
| gd = grade_dist.set_index(grade_col)["count"] | |
| plt.bar(gd.index.astype(str), gd.values) | |
| plt.title("Grade distribution") | |
| plt.xlabel("Grade") | |
| plt.ylabel("Count") | |
| plt.xticks(rotation=45, ha="right") | |
| # 4 Boxplot by grade (moderation) | |
| fig4 = plt.figure() | |
| if not grade_stats.empty: | |
| order = grade_stats["Grade"].tolist() | |
| data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order] | |
| plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True) | |
| plt.title("Marks spread by Grade (Boxplot)") | |
| plt.xlabel("Grade") | |
| plt.ylabel("Marks") | |
| plt.xticks(rotation=45, ha="right") | |
| else: | |
| plt.title("Marks spread by Grade (Boxplot)") | |
| # 5 Section comparison (optional) | |
| fig5 = plt.figure() | |
| section_table = pd.DataFrame() | |
| if section_col and section_col in d.columns and n: | |
| sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index() | |
| sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values | |
| sec = sec.rename(columns={section_col: "Section"}) | |
| section_table = sec.sort_values("pass_rate_%", ascending=False) | |
| plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values) | |
| plt.title("Section-wise Pass Rate (%)") | |
| plt.xlabel("Section") | |
| plt.ylabel("Pass rate (%)") | |
| plt.xticks(rotation=45, ha="right") | |
| else: | |
| plt.title("Section-wise Pass Rate (%) — not available (no section column)") | |
| plt.axis("off") | |
| return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5 | |
| # ============================= | |
| # UI | |
| # ============================= | |
| with gr.Blocks(title="HoD Result Dashboard") as demo: | |
| gr.Markdown("## 📊 HoD Result Dashboard — Insights & Patterns (No Student Tables, No PDF)") | |
| file_bytes_state = gr.State(None) # bytes | |
| sheet_state = gr.State(None) # sheet name string | |
| with gr.Row(): | |
| upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"]) | |
| sheet_dd = gr.Dropdown(label="Sheet", choices=[], interactive=False) | |
| with gr.Row(): | |
| marks_col = gr.Dropdown(label="Marks column", choices=[], interactive=False) | |
| grade_col = gr.Dropdown(label="Grade column", choices=[], interactive=False) | |
| pass_mark = gr.Number(label="Pass mark", value=50, precision=0) | |
| with gr.Row(): | |
| course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False) | |
| section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False) | |
| with gr.Row(): | |
| course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False) | |
| section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False) | |
| analyze_btn = gr.Button("🔍 Refresh HoD Dashboard") | |
| insight_md = gr.Markdown("") | |
| with gr.Tab("Tables"): | |
| with gr.Row(): | |
| kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True) | |
| pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True) | |
| with gr.Row(): | |
| grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True) | |
| heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True) | |
| grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True) | |
| section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True) | |
| with gr.Tab("Charts"): | |
| with gr.Row(): | |
| hist_plot = gr.Plot(label="Histogram") | |
| cdf_plot = gr.Plot(label="CDF") | |
| with gr.Row(): | |
| grade_plot = gr.Plot(label="Grade distribution") | |
| grade_box = gr.Plot(label="Boxplot by grade") | |
| section_plot = gr.Plot(label="Section-wise pass rate") | |
| # -------- callbacks | |
| def on_upload(file_obj): | |
| b = _read_file_bytes(file_obj) | |
| xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl") | |
| sheets = xls.sheet_names or [] | |
| if not sheets: | |
| raise gr.Error("No sheets found in workbook.") | |
| sheet0 = sheets[0] | |
| df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl")) | |
| cols = list(df0.columns) | |
| m_guess = _guess_marks_col(df0) | |
| g_guess = _guess_grade_col(cols) | |
| c_guess, s_guess = _guess_optional(cols) | |
| # Optional filters | |
| course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess)) | |
| section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess)) | |
| course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) | |
| section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) | |
| if c_guess and c_guess in df0.columns: | |
| vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist()) | |
| course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True) | |
| if s_guess and s_guess in df0.columns: | |
| vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist()) | |
| section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True) | |
| return ( | |
| gr.update(choices=sheets, value=sheet0, interactive=True), # sheet_dd | |
| gr.update(choices=cols, value=m_guess, interactive=True), # marks_col | |
| gr.update(choices=cols, value=g_guess, interactive=True), # grade_col | |
| course_col_upd, | |
| section_col_upd, | |
| course_filter_upd, | |
| section_filter_upd, | |
| b, # file_bytes_state | |
| sheet0, # sheet_state | |
| ) | |
| upload.change( | |
| fn=on_upload, | |
| inputs=[upload], | |
| outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state], | |
| ) | |
| def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val): | |
| if not file_bytes: | |
| raise gr.Error("Upload Excel first.") | |
| df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")) | |
| cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) | |
| sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False) | |
| if course_col_val and course_col_val in df.columns: | |
| vals = ["(all)"] + sorted(df[course_col_val].astype(str).fillna("NA").unique().tolist()) | |
| cf = gr.update(choices=vals, value="(all)", visible=True, interactive=True) | |
| if section_col_val and section_col_val in df.columns: | |
| vals = ["(all)"] + sorted(df[section_col_val].astype(str).fillna("NA").unique().tolist()) | |
| sf = gr.update(choices=vals, value="(all)", visible=True, interactive=True) | |
| return cf, sf, sheet_name | |
| sheet_dd.change( | |
| fn=on_sheet_change, | |
| inputs=[sheet_dd, file_bytes_state, course_col, section_col], | |
| outputs=[course_filter, section_filter, sheet_state], | |
| ) | |
| def on_refresh(file_bytes, sheet_name, m_col, g_col, pmark, c_col, s_col, c_filter, s_filter): | |
| if not file_bytes: | |
| raise gr.Error("Upload Excel first.") | |
| if not sheet_name: | |
| raise gr.Error("Select a sheet.") | |
| df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")) | |
| kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights( | |
| df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter | |
| ) | |
| return ( | |
| "### HoD Insight\n\n" + insight, | |
| kpi, pct, gdist, heap, gstats, sec_tbl, | |
| f1, f2, f3, f4, f5 | |
| ) | |
| analyze_btn.click( | |
| fn=on_refresh, | |
| inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter], | |
| outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table, | |
| hist_plot, cdf_plot, grade_plot, grade_box, section_plot], | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) | |