Spaces:

Deevyankar
/

cga

Sleeping

App Files Files Community

Deevyankar commited on Jan 1

Commit

bb85f6c

verified ·

1 Parent(s): 9655019

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -105

app.py CHANGED Viewed

@@ -1,6 +1,4 @@
 import io
-from datetime import datetime
 import numpy as np
 import pandas as pd
 import gradio as gr
@@ -23,11 +21,7 @@ def _read_file_bytes(file_obj):
     return b
-def _safe_numeric(series):
-    return pd.to_numeric(series, errors="coerce")
-def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
     df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
     unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
     if unnamed:
@@ -35,26 +29,31 @@ def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
     return df
-def _guess_cols(df: pd.DataFrame):
-    cols = list(df.columns)
-    lower = {c: str(c).strip().lower() for c in cols}
-    # marks guess = column with most numeric values
-    best_marks, best_score = cols[0], -1
     for c in cols:
-        s = _safe_numeric(df[c])
-        score = s.notna().mean()
         if score > best_score:
             best_score = score
-            best_marks = c
-    grade_guess = next((c for c in cols if "grade" in lower[c] or "grde" in lower[c]), cols[0])
-    # optional columns
-    course_guess = next((c for c in cols if any(k in lower[c] for k in ["course", "module", "subject"])), None)
-    section_guess = next((c for c in cols if any(k in lower[c] for k in ["section", "group", "batch", "class"])), None)
-    return best_marks, grade_guess, course_guess, section_guess
 def apply_filters(df, course_col, section_col, course_filter, section_filter):
@@ -67,9 +66,9 @@ def apply_filters(df, course_col, section_col, course_filter, section_filter):
 # =============================
-# Analytics (NO student tables)
 # =============================
-def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
     if df is None or df.empty:
         raise gr.Error("Sheet is empty.")
@@ -83,30 +82,47 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
     n = int(len(valid))
     missing = int(d["_marks"].isna().sum())
     mean = float(valid["_marks"].mean()) if n else 0.0
     std = float(valid["_marks"].std(ddof=0)) if n else 0.0
     minv = float(valid["_marks"].min()) if n else 0.0
     maxv = float(valid["_marks"].max()) if n else 0.0
-    pass_mark = int(pass_mark)
     pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
     pass_rate = (pass_count / n * 100.0) if n else 0.0
     skew = float(valid["_marks"].skew()) if n else 0.0
     kurt = float(valid["_marks"].kurt()) if n else 0.0
-    # Percentiles
-    pct_rows = []
     if n:
-        for p in [10, 25, 50, 75, 90]:
-            pct_rows.append((f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)))
-    percentiles_df = pd.DataFrame(pct_rows, columns=["Percentile", "Marks"]) if pct_rows else pd.DataFrame()
     # Grade distribution
     grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
     grade_dist.columns = [grade_col, "count"]
-    # Grade to marks mapping
     grade_stats = (
         valid.groupby(d["_grade"])["_marks"]
         .agg(["count", "mean", "std", "min", "median", "max"])
@@ -116,25 +132,14 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
     )
     # Mark heaping (repeated marks)
-    heaping_df = (
         valid["_marks"].round(0).astype(int)
         .value_counts().head(12)
         .rename("count").reset_index()
         .rename(columns={"index": "Mark"})
     )
-    # Outlier count (IQR)
-    outlier_count = 0
-    low_thr = high_thr = 0.0
-    if n:
-        q1 = float(np.percentile(valid["_marks"], 25))
-        q3 = float(np.percentile(valid["_marks"], 75))
-        iqr = q3 - q1
-        low_thr = q1 - 1.5 * iqr
-        high_thr = q3 + 1.5 * iqr
-        outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())
-    # Status
     if pass_rate >= 80:
         status = "GREEN"
     elif pass_rate >= 60:
@@ -142,26 +147,34 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
     else:
         status = "RED"
-    # Teacher flags
     flags = []
     if missing > 0:
-        flags.append(f"{missing} missing mark(s) → verify.")
     if abs(skew) > 0.7:
-        flags.append("Skewed distribution → performance not balanced.")
-    if len(heaping_df) and heaping_df["count"].iloc[0] >= max(10, 0.06 * n):
-        flags.append("Heaping → many students share same mark (rounding/marking pattern).")
     if outlier_count > 0:
-        flags.append(f"{outlier_count} outlier(s) by IQR → check special cases.")
     flags_text = " | ".join(flags) if flags else "No major warning patterns detected."
-    insight_text = (
-        f"Status: {status} | Pass rate: {pass_rate:.1f}% (Pass mark {pass_mark}) | "
-        f"Avg: {mean:.1f} (Std: {std:.1f}) | Min/Max: {minv:.1f}/{maxv:.1f} | "
-        f"Skew: {skew:.2f} | Kurtosis: {kurt:.2f} | Outliers: {outlier_count} | Missing: {missing}\n"
-        f"Flags: {flags_text}"
     )
-    kpi_df = pd.DataFrame(
         [
             ("Total rows (filtered)", total),
             ("Students with numeric marks", n),
@@ -169,6 +182,8 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
             ("Pass mark", pass_mark),
             ("Pass count", pass_count),
             ("Pass rate (%)", round(pass_rate, 2)),
             ("Average", round(mean, 2)),
             ("Std deviation", round(std, 2)),
             ("Minimum", round(minv, 2)),
@@ -178,29 +193,32 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
             ("Outlier low threshold (IQR)", round(low_thr, 2)),
             ("Outlier high threshold (IQR)", round(high_thr, 2)),
             ("Outlier count (IQR)", outlier_count),
         ],
         columns=["Metric", "Value"],
     )
     # Charts
-    # 1) Histogram
     fig1 = plt.figure()
     plt.hist(valid["_marks"].dropna(), bins=12)
     plt.title("Marks distribution (Histogram)")
     plt.xlabel("Marks")
     plt.ylabel("Students")
-    # 2) CDF
     fig2 = plt.figure()
     xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
     ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
     if len(xs):
         plt.plot(xs, ys)
     plt.title("CDF (Proportion of students ≤ mark)")
     plt.xlabel("Marks")
     plt.ylabel("Proportion")
-    # 3) Grade bar
     fig3 = plt.figure()
     gd = grade_dist.set_index(grade_col)["count"]
     plt.bar(gd.index.astype(str), gd.values)
@@ -209,12 +227,12 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
     plt.ylabel("Count")
     plt.xticks(rotation=45, ha="right")
-    # 4) Boxplot by grade
     fig4 = plt.figure()
     if not grade_stats.empty:
-        order = grade_stats[grade_stats.columns[0]].tolist()
         data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
-        plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)  # ✅ tick_labels
         plt.title("Marks spread by Grade (Boxplot)")
         plt.xlabel("Grade")
         plt.ylabel("Marks")
@@ -222,17 +240,35 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
     else:
         plt.title("Marks spread by Grade (Boxplot)")
-    return kpi_df, percentiles_df, grade_dist, grade_stats, heaping_df, insight_text, fig1, fig2, fig3, fig4
 # =============================
 # UI
 # =============================
 with gr.Blocks(title="HoD Result Dashboard") as demo:
-    gr.Markdown("## 📊 HoD Result Dashboard — Teacher Insights Dashboard (No PDF, No Student Tables)")
-    file_state = gr.State(None)   # bytes
-    sheet_state = gr.State(None)  # sheet name string
     with gr.Row():
         upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
@@ -251,18 +287,19 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
         course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
         section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
-    analyze_btn = gr.Button("🔍 Refresh Dashboard")
     insight_md = gr.Markdown("")
     with gr.Tab("Tables"):
         with gr.Row():
             kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
-            percentiles_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
         with gr.Row():
-            grade_dist_table = gr.Dataframe(label="Grade distribution", interactive=False, wrap=True)
-            heaping_table = gr.Dataframe(label="Mark heaping (top repeated marks)", interactive=False, wrap=True)
-        grade_stats_table = gr.Dataframe(label="Grade → Marks mapping (min/max/mean/median)", interactive=False, wrap=True)
     with gr.Tab("Charts"):
         with gr.Row():
@@ -271,61 +308,62 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
         with gr.Row():
             grade_plot = gr.Plot(label="Grade distribution")
             grade_box = gr.Plot(label="Boxplot by grade")
-    # ---- callbacks
     def on_upload(file_obj):
-        file_bytes = _read_file_bytes(file_obj)
-        xls = pd.ExcelFile(io.BytesIO(file_bytes), engine="openpyxl")
         sheets = xls.sheet_names or []
         if not sheets:
-            raise gr.Error("No sheets found.")
         sheet0 = sheets[0]
-        df0 = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet0, engine="openpyxl")
-        df0 = _drop_useless_cols(df0)
-        m_guess, g_guess, c_guess, s_guess = _guess_cols(df0)
         cols = list(df0.columns)
-        # optional filter dropdown setup
-        course_col_update = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
-        section_col_update = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))
-        course_filter_update = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
-        section_filter_update = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
         if c_guess and c_guess in df0.columns:
             vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
-            course_filter_update = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
         if s_guess and s_guess in df0.columns:
             vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
-            section_filter_update = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
         return (
             gr.update(choices=sheets, value=sheet0, interactive=True),   # sheet_dd
             gr.update(choices=cols, value=m_guess, interactive=True),    # marks_col
             gr.update(choices=cols, value=g_guess, interactive=True),    # grade_col
-            course_col_update,
-            section_col_update,
-            course_filter_update,
-            section_filter_update,
-            file_bytes,     # file_state BYTES
-            sheet0,         # sheet_state STRING
         )
     upload.change(
         fn=on_upload,
         inputs=[upload],
-        outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_state, sheet_state],
     )
     def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
         if not file_bytes:
             raise gr.Error("Upload Excel first.")
-        df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
-        df = _drop_useless_cols(df)
         cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
         sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
@@ -342,7 +380,7 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
     sheet_dd.change(
         fn=on_sheet_change,
-        inputs=[sheet_dd, file_state, course_col, section_col],
         outputs=[course_filter, section_filter, sheet_state],
     )
@@ -352,27 +390,23 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
         if not sheet_name:
             raise gr.Error("Select a sheet.")
-        df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
-        df = _drop_useless_cols(df)
-        kpi_df, pct_df, gdist_df, gstats_df, heap_df, insight_text, f1, f2, f3, f4 = compute_insights(
             df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
         )
         return (
-            f"### Teacher Insight\n\n{insight_text}",
-            kpi_df,
-            pct_df,
-            gdist_df,
-            heap_df,
-            gstats_df,
-            f1, f2, f3, f4
         )
     analyze_btn.click(
         fn=on_refresh,
-        inputs=[file_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
-        outputs=[insight_md, kpi_table, percentiles_table, grade_dist_table, heaping_table, grade_stats_table, hist_plot, cdf_plot, grade_plot, grade_box],
     )
 demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)

 import io
 import numpy as np
 import pandas as pd
 import gradio as gr
     return b
+def _drop_useless(df: pd.DataFrame) -> pd.DataFrame:
     df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
     unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
     if unnamed:
     return df
+def _safe_numeric(s):
+    return pd.to_numeric(s, errors="coerce")
+def _guess_marks_col(df: pd.DataFrame):
+    cols = list(df.columns)
+    best, best_score = cols[0], -1
     for c in cols:
+        score = _safe_numeric(df[c]).notna().mean()
         if score > best_score:
             best_score = score
+            best = c
+    return best
+def _guess_grade_col(cols):
+    low = {c: str(c).strip().lower() for c in cols}
+    return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0])
+def _guess_optional(cols):
+    low = {c: str(c).strip().lower() for c in cols}
+    course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None)
+    section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None)
+    return course, section
 def apply_filters(df, course_col, section_col, course_filter, section_filter):
 # =============================
+# Core HoD Insights (no student tables)
 # =============================
+def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
     if df is None or df.empty:
         raise gr.Error("Sheet is empty.")
     n = int(len(valid))
     missing = int(d["_marks"].isna().sum())
+    pass_mark = int(pass_mark)
     mean = float(valid["_marks"].mean()) if n else 0.0
     std = float(valid["_marks"].std(ddof=0)) if n else 0.0
     minv = float(valid["_marks"].min()) if n else 0.0
     maxv = float(valid["_marks"].max()) if n else 0.0
     pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
     pass_rate = (pass_count / n * 100.0) if n else 0.0
+    # Borderline (pass to pass+5) and just-below (pass-5 to pass-1)
+    borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0
+    borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0
+    # Distribution shape
     skew = float(valid["_marks"].skew()) if n else 0.0
     kurt = float(valid["_marks"].kurt()) if n else 0.0
+    # Outliers by IQR
+    outlier_count = 0
+    low_thr = high_thr = 0.0
     if n:
+        q1 = float(np.percentile(valid["_marks"], 25))
+        q3 = float(np.percentile(valid["_marks"], 75))
+        iqr = q3 - q1
+        low_thr = q1 - 1.5 * iqr
+        high_thr = q3 + 1.5 * iqr
+        outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())
+    # Percentiles
+    pct_df = pd.DataFrame(
+        [(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]]
+        if n else [],
+        columns=["Percentile", "Marks"]
+    )
     # Grade distribution
     grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
     grade_dist.columns = [grade_col, "count"]
+    grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0
+    # Grade ↔ marks mapping (moderation evidence)
     grade_stats = (
         valid.groupby(d["_grade"])["_marks"]
         .agg(["count", "mean", "std", "min", "median", "max"])
     )
     # Mark heaping (repeated marks)
+    heaping = (
         valid["_marks"].round(0).astype(int)
         .value_counts().head(12)
         .rename("count").reset_index()
         .rename(columns={"index": "Mark"})
     )
+    # Course status
     if pass_rate >= 80:
         status = "GREEN"
     elif pass_rate >= 60:
     else:
         status = "RED"
+    # Flags
     flags = []
     if missing > 0:
+        flags.append(f"{missing} missing mark(s) — verify completeness.")
+    if borderline_fail > max(5, 0.03 * n):
+        flags.append("Many students just below pass — consider targeted support / moderation review.")
+    if borderline_pass > max(5, 0.03 * n):
+        flags.append("Many students just above pass — borderline attainment cluster.")
     if abs(skew) > 0.7:
+        flags.append("Skewed distribution — check assessment balance and marking consistency.")
     if outlier_count > 0:
+        flags.append(f"{outlier_count} outlier(s) by IQR — spot-check extremes.")
+    if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n):
+        flags.append("Heaping detected — many students share identical marks (rounding/marking pattern).")
     flags_text = " | ".join(flags) if flags else "No major warning patterns detected."
+    insight = (
+        f"**Status:** {status}  \n"
+        f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark})  \n"
+        f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f}  \n"
+        f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass}  \n"
+        f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing}  \n"
+        f"**Flags:** {flags_text}"
     )
+    # KPI table
+    kpi = pd.DataFrame(
         [
             ("Total rows (filtered)", total),
             ("Students with numeric marks", n),
             ("Pass mark", pass_mark),
             ("Pass count", pass_count),
             ("Pass rate (%)", round(pass_rate, 2)),
+            ("Borderline just below pass", borderline_fail),
+            ("Borderline just above pass", borderline_pass),
             ("Average", round(mean, 2)),
             ("Std deviation", round(std, 2)),
             ("Minimum", round(minv, 2)),
             ("Outlier low threshold (IQR)", round(low_thr, 2)),
             ("Outlier high threshold (IQR)", round(high_thr, 2)),
             ("Outlier count (IQR)", outlier_count),
+            ("Status", status),
         ],
         columns=["Metric", "Value"],
     )
     # Charts
+    # 1 Histogram
     fig1 = plt.figure()
     plt.hist(valid["_marks"].dropna(), bins=12)
+    plt.axvline(pass_mark, linestyle="--")
     plt.title("Marks distribution (Histogram)")
     plt.xlabel("Marks")
     plt.ylabel("Students")
+    # 2 CDF
     fig2 = plt.figure()
     xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
     ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
     if len(xs):
         plt.plot(xs, ys)
+        plt.axvline(pass_mark, linestyle="--")
     plt.title("CDF (Proportion of students ≤ mark)")
     plt.xlabel("Marks")
     plt.ylabel("Proportion")
+    # 3 Grade distribution
     fig3 = plt.figure()
     gd = grade_dist.set_index(grade_col)["count"]
     plt.bar(gd.index.astype(str), gd.values)
     plt.ylabel("Count")
     plt.xticks(rotation=45, ha="right")
+    # 4 Boxplot by grade (moderation)
     fig4 = plt.figure()
     if not grade_stats.empty:
+        order = grade_stats["Grade"].tolist()
         data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
+        plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
         plt.title("Marks spread by Grade (Boxplot)")
         plt.xlabel("Grade")
         plt.ylabel("Marks")
     else:
         plt.title("Marks spread by Grade (Boxplot)")
+    # 5 Section comparison (optional)
+    fig5 = plt.figure()
+    section_table = pd.DataFrame()
+    if section_col and section_col in d.columns and n:
+        sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index()
+        sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values
+        sec = sec.rename(columns={section_col: "Section"})
+        section_table = sec.sort_values("pass_rate_%", ascending=False)
+        plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values)
+        plt.title("Section-wise Pass Rate (%)")
+        plt.xlabel("Section")
+        plt.ylabel("Pass rate (%)")
+        plt.xticks(rotation=45, ha="right")
+    else:
+        plt.title("Section-wise Pass Rate (%) — not available (no section column)")
+        plt.axis("off")
+    return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5
 # =============================
 # UI
 # =============================
 with gr.Blocks(title="HoD Result Dashboard") as demo:
+    gr.Markdown("## 📊 HoD Result Dashboard — Insights & Patterns (No Student Tables, No PDF)")
+    file_bytes_state = gr.State(None)   # bytes
+    sheet_state = gr.State(None)        # sheet name string
     with gr.Row():
         upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
         course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
         section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
+    analyze_btn = gr.Button("🔍 Refresh HoD Dashboard")
     insight_md = gr.Markdown("")
     with gr.Tab("Tables"):
         with gr.Row():
             kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
+            pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
         with gr.Row():
+            grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True)
+            heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True)
+        grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True)
+        section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True)
     with gr.Tab("Charts"):
         with gr.Row():
         with gr.Row():
             grade_plot = gr.Plot(label="Grade distribution")
             grade_box = gr.Plot(label="Boxplot by grade")
+        section_plot = gr.Plot(label="Section-wise pass rate")
+    # -------- callbacks
     def on_upload(file_obj):
+        b = _read_file_bytes(file_obj)
+        xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl")
         sheets = xls.sheet_names or []
         if not sheets:
+            raise gr.Error("No sheets found in workbook.")
         sheet0 = sheets[0]
+        df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl"))
         cols = list(df0.columns)
+        m_guess = _guess_marks_col(df0)
+        g_guess = _guess_grade_col(cols)
+        c_guess, s_guess = _guess_optional(cols)
+        # Optional filters
+        course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
+        section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))
+        course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
+        section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
         if c_guess and c_guess in df0.columns:
             vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
+            course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
         if s_guess and s_guess in df0.columns:
             vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
+            section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
         return (
             gr.update(choices=sheets, value=sheet0, interactive=True),   # sheet_dd
             gr.update(choices=cols, value=m_guess, interactive=True),    # marks_col
             gr.update(choices=cols, value=g_guess, interactive=True),    # grade_col
+            course_col_upd,
+            section_col_upd,
+            course_filter_upd,
+            section_filter_upd,
+            b,          # file_bytes_state
+            sheet0,     # sheet_state
         )
     upload.change(
         fn=on_upload,
         inputs=[upload],
+        outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state],
     )
     def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
         if not file_bytes:
             raise gr.Error("Upload Excel first.")
+        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
         cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
         sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
     sheet_dd.change(
         fn=on_sheet_change,
+        inputs=[sheet_dd, file_bytes_state, course_col, section_col],
         outputs=[course_filter, section_filter, sheet_state],
     )
         if not sheet_name:
             raise gr.Error("Select a sheet.")
+        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
+        kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights(
             df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
         )
         return (
+            "### HoD Insight\n\n" + insight,
+            kpi, pct, gdist, heap, gstats, sec_tbl,
+            f1, f2, f3, f4, f5
         )
     analyze_btn.click(
         fn=on_refresh,
+        inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
+        outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table,
+                 hist_plot, cdf_plot, grade_plot, grade_box, section_plot],
     )
 demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)