Spaces:

Deevyankar
/

cga

Sleeping

App Files Files Community

Deevyankar commited on Jan 1

Commit

df4a47d

verified ·

1 Parent(s): 239975a

Update app.py

Browse files

Files changed (1) hide show

app.py +286 -124

app.py CHANGED Viewed

@@ -1,12 +1,15 @@
 import io
 from datetime import datetime
 import pandas as pd
 import gradio as gr
 import matplotlib.pyplot as plt
 from reportlab.lib.pagesizes import A4
 from reportlab.pdfgen import canvas
 from reportlab.lib.units import cm
 # =============================
@@ -29,21 +32,58 @@ def _safe_numeric(series):
     return pd.to_numeric(series, errors="coerce")
-def _guess_cols(cols):
     lower = {c: str(c).strip().lower() for c in cols}
-    student_guess = next((c for c in cols if any(k in lower[c] for k in ["student", "name", "id"])), cols[0])
-    marks_guess = next((c for c in cols if lower[c] in ["marks", "mark", "score", "total", "final"]), cols[0])
-    grade_guess = next((c for c in cols if "grade" in lower[c]), cols[0])
     course_guess = next((c for c in cols if any(k in lower[c] for k in ["course", "module", "subject"])), None)
     section_guess = next((c for c in cols if any(k in lower[c] for k in ["section", "group", "batch", "class"])), None)
-    return student_guess, marks_guess, grade_guess, course_guess, section_guess
 # =============================
-# Load Excel and sheet
 # =============================
 def load_excel(file_obj):
     try:
@@ -55,23 +95,22 @@ def load_excel(file_obj):
         sheet0 = sheets[0]
         df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet0, engine="openpyxl")
-        df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
         cols = list(df.columns)
-        s_guess, m_guess, g_guess, c_guess, sec_guess = _guess_cols(cols)
-        # Filter dropdowns (dynamic if columns exist)
-        course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=bool(c_guess), visible=bool(c_guess), label="Course filter")
-        section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=bool(sec_guess), visible=bool(sec_guess), label="Section/Group filter")
-        # Pre-fill filter choices if present
-        if c_guess:
             course_vals = ["(all)"] + sorted(df[c_guess].astype(str).fillna("NA").unique().tolist())
             course_dd = gr.Dropdown(choices=course_vals, value="(all)", interactive=True, visible=True, label="Course filter")
-        if sec_guess:
             sec_vals = ["(all)"] + sorted(df[sec_guess].astype(str).fillna("NA").unique().tolist())
-            section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section/Group filter")
         return (
             gr.Dropdown(choices=sheets, value=sheet0, interactive=True),
@@ -80,11 +119,11 @@ def load_excel(file_obj):
             gr.Dropdown(choices=cols, value=m_guess, interactive=True),
             gr.Dropdown(choices=cols, value=g_guess, interactive=True),
             gr.Dropdown(choices=cols, value=(c_guess or cols[0]), interactive=bool(c_guess), visible=bool(c_guess), label="Course column"),
-            gr.Dropdown(choices=cols, value=(sec_guess or cols[0]), interactive=bool(sec_guess), visible=bool(sec_guess), label="Section/Group column"),
             course_dd,
             section_dd,
             file_bytes,
-            sheet0,
         )
     except Exception:
         return (
@@ -106,12 +145,11 @@ def read_sheet(sheet_name, file_bytes, course_col, section_col):
     if not file_bytes:
         raise ValueError("Upload Excel first.")
     df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
-    df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
-    cols = list(df.columns)
-    # Update filter choices based on current sheet + selected columns
     course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Course filter")
-    section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Section/Group filter")
     if course_col and course_col in df.columns:
         course_vals = ["(all)"] + sorted(df[course_col].astype(str).fillna("NA").unique().tolist())
@@ -119,9 +157,9 @@ def read_sheet(sheet_name, file_bytes, course_col, section_col):
     if section_col and section_col in df.columns:
         sec_vals = ["(all)"] + sorted(df[section_col].astype(str).fillna("NA").unique().tolist())
-        section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section/Group filter")
-    return df, gr.Dropdown(choices=cols, interactive=True), gr.Dropdown(choices=cols, interactive=True)
 # =============================
@@ -143,34 +181,77 @@ def compute_dashboard(df, student_col, marks_col, grade_col, pass_mark, course_c
     if df is None or df.empty:
         raise gr.Error("Sheet is empty.")
-    d = apply_filters(df, course_col, section_col, course_filter, section_filter)
-    # metrics
     total = int(len(d))
-    d[marks_col] = _safe_numeric(d[marks_col])
-    valid = d[d[marks_col].notna()].copy()
     n = int(len(valid))
-    missing_marks = int(d[marks_col].isna().sum())
-    mean = float(valid[marks_col].mean()) if n else 0.0
-    std = float(valid[marks_col].std(ddof=0)) if n else 0.0
-    minv = float(valid[marks_col].min()) if n else 0.0
-    maxv = float(valid[marks_col].max()) if n else 0.0
-    pass_count = int((valid[marks_col] >= pass_mark).sum()) if n else 0
     pass_rate = (pass_count / n * 100.0) if n else 0.0
     # risk / borderline
-    risk_df = valid[valid[marks_col] < pass_mark][[student_col, marks_col, grade_col]].sort_values(by=marks_col).head(20)
-    borderline_df = valid[(valid[marks_col] >= pass_mark) & (valid[marks_col] < pass_mark + 5)][[student_col, marks_col, grade_col]].sort_values(by=marks_col).head(20)
-    top_df = valid[[student_col, marks_col, grade_col]].sort_values(by=marks_col, ascending=False).head(10)
-    bottom_df = valid[[student_col, marks_col, grade_col]].sort_values(by=marks_col, ascending=True).head(10)
-    grade_dist = d[grade_col].astype(str).fillna("NA").value_counts(dropna=False).rename("count").to_frame().reset_index()
     grade_dist.columns = [grade_col, "count"]
-    # Status
     if pass_rate >= 80:
         status = "GREEN"
     elif pass_rate >= 60:
@@ -178,36 +259,27 @@ def compute_dashboard(df, student_col, marks_col, grade_col, pass_mark, course_c
     else:
         status = "RED"
     insight = (
         f"Status: {status}. Pass rate {pass_rate:.1f}% (Pass mark {pass_mark}). "
         f"Average {mean:.1f}, Min {minv:.1f}, Max {maxv:.1f}, Std {std:.1f}. "
-        f"Missing marks: {missing_marks}."
     )
-    # --- Charts (matplotlib)
-    # 1) Histogram
-    fig1 = plt.figure()
-    plt.hist(valid[marks_col].dropna(), bins=10)
-    plt.title("Marks distribution")
-    plt.xlabel("Marks")
-    plt.ylabel("Students")
-    # 2) Box plot
-    fig2 = plt.figure()
-    plt.boxplot(valid[marks_col].dropna(), vert=True)
-    plt.title("Marks box plot")
-    plt.ylabel("Marks")
-    # 3) Grade bar
-    fig3 = plt.figure()
-    gd = grade_dist.set_index(grade_col)["count"]
-    plt.bar(gd.index.astype(str), gd.values)
-    plt.title("Grade distribution")
-    plt.xlabel("Grade")
-    plt.ylabel("Count")
-    plt.xticks(rotation=45, ha="right")
-    # KPI table
     kpi_df = pd.DataFrame(
         [
             ("Total rows (filtered)", total),
@@ -220,19 +292,81 @@ def compute_dashboard(df, student_col, marks_col, grade_col, pass_mark, course_c
             ("Std deviation", round(std, 2)),
             ("Minimum", round(minv, 2)),
             ("Maximum", round(maxv, 2)),
             ("Status", status),
             ("Insight", insight),
         ],
         columns=["Metric", "Value"],
     )
-    return kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, fig1, fig2, fig3
 # =============================
-# PDF (uses kpi + tables)
 # =============================
-def make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, title="Marks Dashboard Report"):
     buf = io.BytesIO()
     c = canvas.Canvas(buf, pagesize=A4)
     width, height = A4
@@ -257,6 +391,10 @@ def make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, titl
         c.setFont("Helvetica", 9.5)
         max_chars = 95
         for i in range(0, len(text), max_chars):
             c.drawString(x, y, text[i:i + max_chars])
             y -= 0.5 * cm
@@ -272,47 +410,55 @@ def make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, titl
         for _, r in df2.iterrows():
             line(" | ".join(r.values.tolist()))
-    h(title)
-    line(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-    sh("1) KPI Summary")
-    table(kpi_df, max_rows=50)
-    if y < 6 * cm:
-        c.showPage()
-        y = height - 2 * cm
-    sh("2) Grade distribution")
-    table(grade_dist, max_rows=30)
-    if y < 6 * cm:
-        c.showPage()
-        y = height - 2 * cm
-    sh("3) At-risk students (below pass mark)")
-    table(risk_df, max_rows=20)
-    if y < 6 * cm:
-        c.showPage()
-        y = height - 2 * cm
-    sh("4) Borderline students (pass mark to pass mark + 5)")
-    table(borderline_df, max_rows=20)
-    if y < 6 * cm:
-        c.showPage()
-        y = height - 2 * cm
-    sh("5) Top 10")
     table(top_df, max_rows=10)
-    if y < 6 * cm:
-        c.showPage()
-        y = height - 2 * cm
-    sh("6) Bottom 10")
     table(bottom_df, max_rows=10)
     c.showPage()
     c.save()
     buf.seek(0)
     return buf
@@ -322,14 +468,20 @@ def generate_pdf_report(file_bytes, sheet_name, student_col, marks_col, grade_co
     if not file_bytes:
         raise gr.Error("Upload Excel first.")
     df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
-    df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
-    kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, _, _, _ = compute_dashboard(
-        df, student_col, marks_col, grade_col, int(pass_mark),
-        course_col, section_col, course_filter, section_filter
     )
-    pdf_buf = make_pdf(kpi_df, grade_dist, risk_df, borderline_df, top_df, bottom_df, title="HoD Result Dashboard Report")
     fname = f"dashboard_report__{sheet_name}__{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
     return (fname, pdf_buf.getvalue())
@@ -338,7 +490,7 @@ def generate_pdf_report(file_bytes, sheet_name, student_col, marks_col, grade_co
 # UI
 # =============================
 with gr.Blocks(title="HoD Result Dashboard") as demo:
-    gr.Markdown("## 📊 HoD Result Dashboard (Excel Upload) — Insights + Charts + PDF")
     file_state = gr.State(None)
     sheet_state = gr.State(None)
@@ -355,35 +507,44 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
     with gr.Row():
         course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
-        section_col = gr.Dropdown(label="Section/Group column (optional)", choices=[], interactive=False, visible=False)
     with gr.Row():
         course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
-        section_filter = gr.Dropdown(label="Section/Group filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
     df_preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)
     analyze_btn = gr.Button("🔍 Refresh Dashboard")
-    with gr.Row():
-        kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
         grade_table = gr.Dataframe(label="Grade distribution", interactive=False, wrap=True)
-    with gr.Row():
-        risk_table = gr.Dataframe(label="At-risk (below pass) - Top 20", interactive=False, wrap=True)
-        borderline_table = gr.Dataframe(label="Borderline (pass to pass+5) - Top 20", interactive=False, wrap=True)
-    with gr.Row():
         top_table = gr.Dataframe(label="Top 10", interactive=False, wrap=True)
         bottom_table = gr.Dataframe(label="Bottom 10", interactive=False, wrap=True)
-    with gr.Row():
-        hist_plot = gr.Plot(label="Marks histogram")
-        box_plot = gr.Plot(label="Marks box plot")
-        grade_plot = gr.Plot(label="Grade chart")
     with gr.Row():
-        pdf_btn = gr.Button("📄 Generate PDF Report")
         pdf_out = gr.File(label="Download PDF")
     # Events
@@ -402,7 +563,7 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
     sheet_dd.change(
         fn=read_sheet,
         inputs=[sheet_dd, file_state, course_col, section_col],
-        outputs=[df_preview, course_filter, section_filter],
     )
     def on_refresh(file_bytes, sheet_name, s_col, m_col, g_col, pmark, c_col, sec_col, c_filter, sec_filter):
@@ -411,17 +572,18 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
         if not sheet_name:
             raise gr.Error("Select a sheet.")
         df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
-        df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
-        return compute_dashboard(
-            df, s_col, m_col, g_col, int(pmark),
-            c_col, sec_col, c_filter, sec_filter
-        )
     analyze_btn.click(
         fn=on_refresh,
         inputs=[file_state, sheet_state, student_col, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
-        outputs=[kpi_table, grade_table, risk_table, borderline_table, top_table, bottom_table, hist_plot, box_plot, grade_plot],
     )
     pdf_btn.click(

 import io
 from datetime import datetime
+import numpy as np
 import pandas as pd
 import gradio as gr
 import matplotlib.pyplot as plt
 from reportlab.lib.pagesizes import A4
 from reportlab.pdfgen import canvas
 from reportlab.lib.units import cm
+from reportlab.lib.utils import ImageReader
 # =============================
     return pd.to_numeric(series, errors="coerce")
+def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
+    # drop fully empty columns + "Unnamed" columns
+    df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
+    unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
+    if unnamed:
+        df = df.drop(columns=unnamed, errors="ignore")
+    return df
+def _guess_cols(df: pd.DataFrame):
+    """
+    Robust guessing for files like yours where marks column header can be numeric (e.g., 100).
+    Returns: student_guess, marks_guess, grade_guess, course_guess, section_guess
+    """
+    cols = list(df.columns)
     lower = {c: str(c).strip().lower() for c in cols}
+    # marks guess: first column that becomes mostly numeric
+    best_marks = None
+    best_score = -1
+    for c in cols:
+        s = _safe_numeric(df[c])
+        score = s.notna().mean()  # proportion numeric
+        if score > best_score:
+            best_score = score
+            best_marks = c
+    # grade guess
+    grade_guess = next((c for c in cols if "grade" in lower[c] or "grde" in lower[c]), cols[0])
+    # student/id guess (if exists)
+    student_guess = next(
+        (c for c in cols if any(k in lower[c] for k in ["student", "name", "id", "roll", "reg", "sno"])),
+        cols[0],
+    )
     course_guess = next((c for c in cols if any(k in lower[c] for k in ["course", "module", "subject"])), None)
     section_guess = next((c for c in cols if any(k in lower[c] for k in ["section", "group", "batch", "class"])), None)
+    return student_guess, best_marks, grade_guess, course_guess, section_guess
+def _fig_to_png_bytes(fig):
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=180, bbox_inches="tight")
+    plt.close(fig)
+    buf.seek(0)
+    return buf
 # =============================
+# Load Excel
 # =============================
 def load_excel(file_obj):
     try:
         sheet0 = sheets[0]
         df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet0, engine="openpyxl")
+        df = _drop_useless_cols(df)
+        s_guess, m_guess, g_guess, c_guess, sec_guess = _guess_cols(df)
         cols = list(df.columns)
+        # Filters (optional)
+        course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Course filter")
+        section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Section filter")
+        if c_guess and c_guess in df.columns:
             course_vals = ["(all)"] + sorted(df[c_guess].astype(str).fillna("NA").unique().tolist())
             course_dd = gr.Dropdown(choices=course_vals, value="(all)", interactive=True, visible=True, label="Course filter")
+        if sec_guess and sec_guess in df.columns:
             sec_vals = ["(all)"] + sorted(df[sec_guess].astype(str).fillna("NA").unique().tolist())
+            section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section filter")
         return (
             gr.Dropdown(choices=sheets, value=sheet0, interactive=True),
             gr.Dropdown(choices=cols, value=m_guess, interactive=True),
             gr.Dropdown(choices=cols, value=g_guess, interactive=True),
             gr.Dropdown(choices=cols, value=(c_guess or cols[0]), interactive=bool(c_guess), visible=bool(c_guess), label="Course column"),
+            gr.Dropdown(choices=cols, value=(sec_guess or cols[0]), interactive=bool(sec_guess), visible=bool(sec_guess), label="Section column"),
             course_dd,
             section_dd,
             file_bytes,
+            sheet0,  # sheet_state
         )
     except Exception:
         return (
     if not file_bytes:
         raise ValueError("Upload Excel first.")
     df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
+    df = _drop_useless_cols(df)
+    # Update filter choices based on selected columns
     course_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Course filter")
+    section_dd = gr.Dropdown(choices=["(all)"], value="(all)", interactive=False, visible=False, label="Section filter")
     if course_col and course_col in df.columns:
         course_vals = ["(all)"] + sorted(df[course_col].astype(str).fillna("NA").unique().tolist())
     if section_col and section_col in df.columns:
         sec_vals = ["(all)"] + sorted(df[section_col].astype(str).fillna("NA").unique().tolist())
+        section_dd = gr.Dropdown(choices=sec_vals, value="(all)", interactive=True, visible=True, label="Section filter")
+    return df, course_dd, section_dd, sheet_name  # IMPORTANT: update sheet_state
 # =============================
     if df is None or df.empty:
         raise gr.Error("Sheet is empty.")
+    d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy()
+    # numeric marks
+    d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan
+    d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA"
     total = int(len(d))
+    valid = d[d["_marks"].notna()].copy()
     n = int(len(valid))
+    missing_marks = int(d["_marks"].isna().sum())
+    mean = float(valid["_marks"].mean()) if n else 0.0
+    std = float(valid["_marks"].std(ddof=0)) if n else 0.0
+    minv = float(valid["_marks"].min()) if n else 0.0
+    maxv = float(valid["_marks"].max()) if n else 0.0
+    pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
     pass_rate = (pass_count / n * 100.0) if n else 0.0
+    # distribution shape (simple but useful)
+    skew = float(valid["_marks"].skew()) if n else 0.0
+    kurt = float(valid["_marks"].kurt()) if n else 0.0
+    # percentiles
+    pct = {}
+    if n:
+        for p in [10, 25, 50, 75, 90]:
+            pct[f"P{p}"] = float(np.percentile(valid["_marks"], p))
+    percentiles_df = pd.DataFrame(list(pct.items()), columns=["Percentile", "Marks"]) if pct else pd.DataFrame()
+    # heaping: most repeated marks (teacher-friendly)
+    heaping_df = (
+        valid["_marks"].round(0).astype(int).value_counts().head(12).rename("count").reset_index()
+        .rename(columns={"index": "Mark"})
+    )
+    # IQR outliers
+    if n:
+        q1 = float(np.percentile(valid["_marks"], 25))
+        q3 = float(np.percentile(valid["_marks"], 75))
+        iqr = q3 - q1
+        low_thr = q1 - 1.5 * iqr
+        high_thr = q3 + 1.5 * iqr
+        outliers = valid[(valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)].copy()
+    else:
+        outliers = valid.head(0).copy()
+        low_thr = high_thr = 0.0
+    outliers_df = outliers[[student_col, marks_col, grade_col]].head(30) if not outliers.empty else pd.DataFrame()
     # risk / borderline
+    risk_df = valid[valid["_marks"] < pass_mark][[student_col, marks_col, grade_col]].sort_values(by="_marks").head(25)
+    borderline_df = valid[(valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)][[student_col, marks_col, grade_col]].sort_values(by="_marks").head(25)
+    top_df = valid[[student_col, marks_col, grade_col]].sort_values(by="_marks", ascending=False).head(10)
+    bottom_df = valid[[student_col, marks_col, grade_col]].sort_values(by="_marks", ascending=True).head(10)
+    grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
     grade_dist.columns = [grade_col, "count"]
+    # Grade -> marks mapping (VERY useful for teachers)
+    grade_stats = (
+        valid.assign(_g=d["_grade"])
+        .groupby(d["_grade"])["_marks"]
+        .agg(["count", "mean", "std", "min", "median", "max"])
+        .reset_index()
+        .rename(columns={"_grade": "Grade"})
+        .sort_values("mean", ascending=False)
+    )
+    # status
     if pass_rate >= 80:
         status = "GREEN"
     elif pass_rate >= 60:
     else:
         status = "RED"
+    # simple pattern flags
+    flags = []
+    if missing_marks > 0:
+        flags.append(f"{missing_marks} missing mark(s) need verification.")
+    if abs(skew) > 0.7:
+        flags.append("Marks are noticeably skewed (not symmetric).")
+    if kurt > 1.0:
+        flags.append("Marks have heavy tails (more extremes than normal).")
+    if len(heaping_df) and heaping_df["count"].iloc[0] >= max(10, 0.06 * n):
+        flags.append("Many students share the same mark(s) (mark heaping / clustering).")
+    if len(outliers_df) > 0:
+        flags.append("Outliers detected using IQR rule (check special cases).")
+    flags_text = " | ".join(flags) if flags else "No strong warnings detected."
     insight = (
         f"Status: {status}. Pass rate {pass_rate:.1f}% (Pass mark {pass_mark}). "
         f"Average {mean:.1f}, Min {minv:.1f}, Max {maxv:.1f}, Std {std:.1f}. "
+        f"Skew {skew:.2f}, Kurtosis {kurt:.2f}. Missing marks: {missing_marks}. "
+        f"Flags: {flags_text}"
     )
     kpi_df = pd.DataFrame(
         [
             ("Total rows (filtered)", total),
             ("Std deviation", round(std, 2)),
             ("Minimum", round(minv, 2)),
             ("Maximum", round(maxv, 2)),
+            ("Skewness", round(skew, 3)),
+            ("Kurtosis", round(kurt, 3)),
+            ("Outlier low threshold (IQR)", round(low_thr, 2)),
+            ("Outlier high threshold (IQR)", round(high_thr, 2)),
             ("Status", status),
             ("Insight", insight),
         ],
         columns=["Metric", "Value"],
     )
+    # -------- Charts (matplotlib)
+    # 1) Histogram
+    fig1 = plt.figure()
+    plt.hist(valid["_marks"].dropna(), bins=12)
+    plt.title("Marks distribution")
+    plt.xlabel("Marks")
+    plt.ylabel("Students")
+    # 2) CDF curve (excellent for interpretation)
+    fig2 = plt.figure()
+    xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
+    ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
+    plt.plot(xs, ys)
+    plt.title("Cumulative distribution (CDF)")
+    plt.xlabel("Marks")
+    plt.ylabel("Proportion ≤ mark")
+    # 3) Grade bar
+    fig3 = plt.figure()
+    gd = grade_dist.set_index(grade_col)["count"]
+    plt.bar(gd.index.astype(str), gd.values)
+    plt.title("Grade distribution")
+    plt.xlabel("Grade")
+    plt.ylabel("Count")
+    plt.xticks(rotation=45, ha="right")
+    # 4) Boxplot by grade (pattern across grades)
+    fig4 = plt.figure()
+    # Keep grades ordered by mean
+    order = grade_stats[grade_stats.columns[0]].tolist() if not grade_stats.empty else []
+    data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order] if order else []
+    if data:
+        plt.boxplot(data, labels=[str(g) for g in order], vert=True)
+        plt.title("Marks spread by Grade")
+        plt.xlabel("Grade")
+        plt.ylabel("Marks")
+        plt.xticks(rotation=45, ha="right")
+    else:
+        plt.title("Marks spread by Grade (no data)")
+    return (
+        kpi_df,
+        grade_dist,
+        grade_stats,
+        percentiles_df,
+        heaping_df,
+        outliers_df,
+        risk_df,
+        borderline_df,
+        top_df,
+        bottom_df,
+        fig1,
+        fig2,
+        fig3,
+        fig4,
+    )
 # =============================
+# PDF (with charts embedded)
 # =============================
+def make_pdf(kpi_df, grade_dist, grade_stats, percentiles_df, heaping_df, outliers_df,
+             risk_df, borderline_df, top_df, bottom_df,
+             fig1, fig2, fig3, fig4,
+             title="Marks Dashboard Report"):
     buf = io.BytesIO()
     c = canvas.Canvas(buf, pagesize=A4)
     width, height = A4
         c.setFont("Helvetica", 9.5)
         max_chars = 95
         for i in range(0, len(text), max_chars):
+            if y < 2.2 * cm:
+                c.showPage()
+                y = height - 2 * cm
+                c.setFont("Helvetica", 9.5)
             c.drawString(x, y, text[i:i + max_chars])
             y -= 0.5 * cm
         for _, r in df2.iterrows():
             line(" | ".join(r.values.tolist()))
+    def add_chart(fig, caption):
+        nonlocal y
+        png = _fig_to_png_bytes(fig)
+        img = ImageReader(png)
+        img_w = width - 4 * cm
+        img_h = 7.0 * cm  # fixed height to keep layout stable
+        if y < (img_h + 3.0 * cm):
+            c.showPage()
+            y = height - 2 * cm
+        c.setFont("Helvetica-Bold", 10.5)
+        c.drawString(x, y, caption)
+        y -= 0.5 * cm
+        c.drawImage(img, x, y - img_h, width=img_w, height=img_h, preserveAspectRatio=True, anchor='nw')
+        y -= (img_h + 0.7 * cm)
+    h(title)
+    line(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    sh("1) KPI Summary")
+    table(kpi_df, max_rows=60)
+    sh("2) Key Patterns (Percentiles + Heaping)")
+    table(percentiles_df, max_rows=10)
+    table(heaping_df, max_rows=12)
+    sh("3) Grade Distribution + Grade-to-Marks Mapping")
+    table(grade_dist, max_rows=40)
+    table(grade_stats, max_rows=40)
+    sh("4) At-risk / Borderline / Outliers")
+    table(risk_df, max_rows=25)
+    table(borderline_df, max_rows=25)
+    table(outliers_df, max_rows=30)
+    sh("5) Top & Bottom")
     table(top_df, max_rows=10)
     table(bottom_df, max_rows=10)
+    # charts pages
     c.showPage()
+    y = height - 2 * cm
+    h("Charts")
+    add_chart(fig1, "Chart 1: Marks Distribution (Histogram)")
+    add_chart(fig2, "Chart 2: CDF (Proportion of students at/below a mark)")
+    add_chart(fig3, "Chart 3: Grade Distribution (Bar)")
+    add_chart(fig4, "Chart 4: Marks Spread by Grade (Boxplot)")
     c.save()
     buf.seek(0)
     return buf
     if not file_bytes:
         raise gr.Error("Upload Excel first.")
     df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
+    df = _drop_useless_cols(df)
+    (
+        kpi_df, grade_dist, grade_stats, percentiles_df, heaping_df, outliers_df,
+        risk_df, borderline_df, top_df, bottom_df,
+        fig1, fig2, fig3, fig4
+    ) = compute_dashboard(df, student_col, marks_col, grade_col, int(pass_mark), course_col, section_col, course_filter, section_filter)
+    pdf_buf = make_pdf(
+        kpi_df, grade_dist, grade_stats, percentiles_df, heaping_df, outliers_df,
+        risk_df, borderline_df, top_df, bottom_df,
+        fig1, fig2, fig3, fig4,
+        title="HoD Result Dashboard Report"
     )
     fname = f"dashboard_report__{sheet_name}__{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
     return (fname, pdf_buf.getvalue())
 # UI
 # =============================
 with gr.Blocks(title="HoD Result Dashboard") as demo:
+    gr.Markdown("## 📊 HoD Result Dashboard — Teacher Insights (Patterns + Stats + Charts + PDF)")
     file_state = gr.State(None)
     sheet_state = gr.State(None)
     with gr.Row():
         course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
+        section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False)
     with gr.Row():
         course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
+        section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
     df_preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)
     analyze_btn = gr.Button("🔍 Refresh Dashboard")
+    with gr.Tab("Overview"):
+        kpi_table = gr.Dataframe(label="KPI Summary (includes insight + flags)", interactive=False, wrap=True)
         grade_table = gr.Dataframe(label="Grade distribution", interactive=False, wrap=True)
+    with gr.Tab("Patterns"):
+        percentiles_table = gr.Dataframe(label="Percentiles (P10/P25/P50/P75/P90)", interactive=False, wrap=True)
+        heaping_table = gr.Dataframe(label="Most repeated marks (heaping / clustering)", interactive=False, wrap=True)
+        outliers_table = gr.Dataframe(label="Outliers (IQR rule) - first 30", interactive=False, wrap=True)
+    with gr.Tab("By Grade"):
+        grade_stats_table = gr.Dataframe(label="Grade → Marks mapping (min/max/mean/median)", interactive=False, wrap=True)
+    with gr.Tab("At-risk / Ranking"):
+        risk_table = gr.Dataframe(label="At-risk (below pass) - Top 25", interactive=False, wrap=True)
+        borderline_table = gr.Dataframe(label="Borderline (pass to pass+5) - Top 25", interactive=False, wrap=True)
         top_table = gr.Dataframe(label="Top 10", interactive=False, wrap=True)
         bottom_table = gr.Dataframe(label="Bottom 10", interactive=False, wrap=True)
+    with gr.Tab("Charts"):
+        with gr.Row():
+            hist_plot = gr.Plot(label="Histogram")
+            cdf_plot = gr.Plot(label="CDF")
+        with gr.Row():
+            grade_plot = gr.Plot(label="Grade distribution")
+            grade_box = gr.Plot(label="Boxplot by grade")
     with gr.Row():
+        pdf_btn = gr.Button("📄 Generate PDF Report (with charts)")
         pdf_out = gr.File(label="Download PDF")
     # Events
     sheet_dd.change(
         fn=read_sheet,
         inputs=[sheet_dd, file_state, course_col, section_col],
+        outputs=[df_preview, course_filter, section_filter, sheet_state],  # IMPORTANT
     )
     def on_refresh(file_bytes, sheet_name, s_col, m_col, g_col, pmark, c_col, sec_col, c_filter, sec_filter):
         if not sheet_name:
             raise gr.Error("Select a sheet.")
         df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
+        df = _drop_useless_cols(df)
+        return compute_dashboard(df, s_col, m_col, g_col, int(pmark), c_col, sec_col, c_filter, sec_filter)
     analyze_btn.click(
         fn=on_refresh,
         inputs=[file_state, sheet_state, student_col, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
+        outputs=[
+            kpi_table, grade_table, grade_stats_table, percentiles_table, heaping_table, outliers_table,
+            risk_table, borderline_table, top_table, bottom_table,
+            hist_plot, cdf_plot, grade_plot, grade_box
+        ],
     )
     pdf_btn.click(