Spaces:

Deevyankar
/

cga

Sleeping

File size: 17,024 Bytes

847e426
df4a47d
847e426
 
8a12b71
df4a47d
847e426
8a12b71
 
 
 
847e426
 
 
 
 
 
8a12b71
 
847e426
8a12b71
 
 
bb85f6c
df4a47d
 
 
 
 
 
 
bb85f6c
 
 
8a12b71
bb85f6c
 
 
df4a47d
bb85f6c
df4a47d
 
bb85f6c
 
 
df4a47d
bb85f6c
 
 
847e426
 
bb85f6c
 
 
 
 
df4a47d
 
8a12b71
 
e622de3
8a12b71
e622de3
8a12b71
 
 
 
b98be59
bb85f6c
b98be59
bb85f6c
8a12b71
 
 
df4a47d
e622de3
df4a47d
 
8a12b71
 
df4a47d
8a12b71
b98be59
8a12b71
bb85f6c
df4a47d
 
 
 
8a12b71
df4a47d
8a12b71
 
bb85f6c
 
 
 
 
df4a47d
 
 
bb85f6c
 
 
df4a47d
bb85f6c
 
 
 
 
 
 
 
 
 
 
 
 
b98be59
e622de3
b98be59
 
bb85f6c
df4a47d
bb85f6c
b98be59
 
 
 
 
 
 
 
e622de3
bb85f6c
b98be59
 
 
df4a47d
 
 
bb85f6c
8a12b71
 
 
 
 
 
 
bb85f6c
df4a47d
b98be59
bb85f6c
 
 
 
 
df4a47d
bb85f6c
b98be59
bb85f6c
 
 
 
b98be59
 
bb85f6c
 
 
 
 
 
 
8a12b71
 
bb85f6c
 
8a12b71
 
 
b98be59
8a12b71
 
 
bb85f6c
 
8a12b71
 
 
 
df4a47d
 
 
 
b98be59
bb85f6c
8a12b71
 
 
 
e622de3
bb85f6c
df4a47d
 
bb85f6c
b98be59
df4a47d
 
 
bb85f6c
df4a47d
 
 
b98be59
 
bb85f6c
9655019
df4a47d
b98be59
df4a47d
bb85f6c
df4a47d
 
 
 
 
 
 
 
bb85f6c
df4a47d
b98be59
bb85f6c
b98be59
bb85f6c
b98be59
df4a47d
 
 
 
b98be59
 
bb85f6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a12b71
 
 
9655019
8a12b71
 
bb85f6c
847e426
bb85f6c
 
847e426
 
 
 
 
 
8a12b71
 
 
847e426
8a12b71
 
df4a47d
847e426
8a12b71
 
df4a47d
8a12b71
bb85f6c
8a12b71
9655019
8a12b71
9655019
 
 
bb85f6c
9655019
bb85f6c
 
 
 
df4a47d
 
 
 
 
 
 
 
bb85f6c
847e426
bb85f6c
e622de3
bb85f6c
 
e622de3
 
bb85f6c
e622de3
 
bb85f6c
e622de3
 
bb85f6c
 
 
e622de3
bb85f6c
 
 
e622de3
bb85f6c
 
9655019
e622de3
 
bb85f6c
e622de3
 
 
bb85f6c
b98be59
 
9655019
 
 
bb85f6c
 
 
 
 
 
b98be59
 
847e426
e622de3
847e426
bb85f6c
847e426
 
e622de3
 
 
9655019
bb85f6c
e622de3
 
 
 
 
 
 
 
 
 
 
 
 
 
847e426
e622de3
bb85f6c
b98be59
8a12b71
 
e622de3
8a12b71
 
 
 
9655019
bb85f6c
8a12b71
bb85f6c
9655019
 
 
 
bb85f6c
 
 
9655019
8a12b71
 
 
bb85f6c
 
 
847e426
 
b98be59

import io
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt


# =============================
# Helpers
# =============================
def _read_file_bytes(file_obj):
    if file_obj is None:
        raise ValueError("No file uploaded.")
    file_path = getattr(file_obj, "name", None)
    if not file_path:
        raise ValueError("Invalid uploaded file object.")
    with open(file_path, "rb") as f:
        b = f.read()
    if not b:
        raise ValueError("Uploaded file is empty.")
    return b


def _drop_useless(df: pd.DataFrame) -> pd.DataFrame:
    df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
    unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
    if unnamed:
        df = df.drop(columns=unnamed, errors="ignore")
    return df


def _safe_numeric(s):
    return pd.to_numeric(s, errors="coerce")


def _guess_marks_col(df: pd.DataFrame):
    cols = list(df.columns)
    best, best_score = cols[0], -1
    for c in cols:
        score = _safe_numeric(df[c]).notna().mean()
        if score > best_score:
            best_score = score
            best = c
    return best


def _guess_grade_col(cols):
    low = {c: str(c).strip().lower() for c in cols}
    return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0])


def _guess_optional(cols):
    low = {c: str(c).strip().lower() for c in cols}
    course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None)
    section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None)
    return course, section


def apply_filters(df, course_col, section_col, course_filter, section_filter):
    d = df.copy()
    if course_col and course_col in d.columns and course_filter and course_filter != "(all)":
        d = d[d[course_col].astype(str).fillna("NA") == course_filter]
    if section_col and section_col in d.columns and section_filter and section_filter != "(all)":
        d = d[d[section_col].astype(str).fillna("NA") == section_filter]
    return d


# =============================
# Core HoD Insights (no student tables)
# =============================
def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
    if df is None or df.empty:
        raise gr.Error("Sheet is empty.")

    d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy()

    d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan
    d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA"

    total = int(len(d))
    valid = d[d["_marks"].notna()].copy()
    n = int(len(valid))
    missing = int(d["_marks"].isna().sum())

    pass_mark = int(pass_mark)
    mean = float(valid["_marks"].mean()) if n else 0.0
    std = float(valid["_marks"].std(ddof=0)) if n else 0.0
    minv = float(valid["_marks"].min()) if n else 0.0
    maxv = float(valid["_marks"].max()) if n else 0.0

    pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
    pass_rate = (pass_count / n * 100.0) if n else 0.0

    # Borderline (pass to pass+5) and just-below (pass-5 to pass-1)
    borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0
    borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0

    # Distribution shape
    skew = float(valid["_marks"].skew()) if n else 0.0
    kurt = float(valid["_marks"].kurt()) if n else 0.0

    # Outliers by IQR
    outlier_count = 0
    low_thr = high_thr = 0.0
    if n:
        q1 = float(np.percentile(valid["_marks"], 25))
        q3 = float(np.percentile(valid["_marks"], 75))
        iqr = q3 - q1
        low_thr = q1 - 1.5 * iqr
        high_thr = q3 + 1.5 * iqr
        outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())

    # Percentiles
    pct_df = pd.DataFrame(
        [(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]]
        if n else [],
        columns=["Percentile", "Marks"]
    )

    # Grade distribution
    grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
    grade_dist.columns = [grade_col, "count"]
    grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0

    # Grade ↔ marks mapping (moderation evidence)
    grade_stats = (
        valid.groupby(d["_grade"])["_marks"]
        .agg(["count", "mean", "std", "min", "median", "max"])
        .reset_index()
        .rename(columns={"_grade": "Grade"})
        .sort_values("mean", ascending=False)
    )

    # Mark heaping (repeated marks)
    heaping = (
        valid["_marks"].round(0).astype(int)
        .value_counts().head(12)
        .rename("count").reset_index()
        .rename(columns={"index": "Mark"})
    )

    # Course status
    if pass_rate >= 80:
        status = "GREEN"
    elif pass_rate >= 60:
        status = "AMBER"
    else:
        status = "RED"

    # Flags
    flags = []
    if missing > 0:
        flags.append(f"{missing} missing mark(s) — verify completeness.")
    if borderline_fail > max(5, 0.03 * n):
        flags.append("Many students just below pass — consider targeted support / moderation review.")
    if borderline_pass > max(5, 0.03 * n):
        flags.append("Many students just above pass — borderline attainment cluster.")
    if abs(skew) > 0.7:
        flags.append("Skewed distribution — check assessment balance and marking consistency.")
    if outlier_count > 0:
        flags.append(f"{outlier_count} outlier(s) by IQR — spot-check extremes.")
    if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n):
        flags.append("Heaping detected — many students share identical marks (rounding/marking pattern).")

    flags_text = " | ".join(flags) if flags else "No major warning patterns detected."

    insight = (
        f"**Status:** {status}  \n"
        f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark})  \n"
        f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f}  \n"
        f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass}  \n"
        f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing}  \n"
        f"**Flags:** {flags_text}"
    )

    # KPI table
    kpi = pd.DataFrame(
        [
            ("Total rows (filtered)", total),
            ("Students with numeric marks", n),
            ("Missing marks", missing),
            ("Pass mark", pass_mark),
            ("Pass count", pass_count),
            ("Pass rate (%)", round(pass_rate, 2)),
            ("Borderline just below pass", borderline_fail),
            ("Borderline just above pass", borderline_pass),
            ("Average", round(mean, 2)),
            ("Std deviation", round(std, 2)),
            ("Minimum", round(minv, 2)),
            ("Maximum", round(maxv, 2)),
            ("Skewness", round(skew, 3)),
            ("Kurtosis", round(kurt, 3)),
            ("Outlier low threshold (IQR)", round(low_thr, 2)),
            ("Outlier high threshold (IQR)", round(high_thr, 2)),
            ("Outlier count (IQR)", outlier_count),
            ("Status", status),
        ],
        columns=["Metric", "Value"],
    )

    # Charts
    # 1 Histogram
    fig1 = plt.figure()
    plt.hist(valid["_marks"].dropna(), bins=12)
    plt.axvline(pass_mark, linestyle="--")
    plt.title("Marks distribution (Histogram)")
    plt.xlabel("Marks")
    plt.ylabel("Students")

    # 2 CDF
    fig2 = plt.figure()
    xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
    ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
    if len(xs):
        plt.plot(xs, ys)
        plt.axvline(pass_mark, linestyle="--")
    plt.title("CDF (Proportion of students ≤ mark)")
    plt.xlabel("Marks")
    plt.ylabel("Proportion")

    # 3 Grade distribution
    fig3 = plt.figure()
    gd = grade_dist.set_index(grade_col)["count"]
    plt.bar(gd.index.astype(str), gd.values)
    plt.title("Grade distribution")
    plt.xlabel("Grade")
    plt.ylabel("Count")
    plt.xticks(rotation=45, ha="right")

    # 4 Boxplot by grade (moderation)
    fig4 = plt.figure()
    if not grade_stats.empty:
        order = grade_stats["Grade"].tolist()
        data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
        plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
        plt.title("Marks spread by Grade (Boxplot)")
        plt.xlabel("Grade")
        plt.ylabel("Marks")
        plt.xticks(rotation=45, ha="right")
    else:
        plt.title("Marks spread by Grade (Boxplot)")

    # 5 Section comparison (optional)
    fig5 = plt.figure()
    section_table = pd.DataFrame()
    if section_col and section_col in d.columns and n:
        sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index()
        sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values
        sec = sec.rename(columns={section_col: "Section"})
        section_table = sec.sort_values("pass_rate_%", ascending=False)

        plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values)
        plt.title("Section-wise Pass Rate (%)")
        plt.xlabel("Section")
        plt.ylabel("Pass rate (%)")
        plt.xticks(rotation=45, ha="right")
    else:
        plt.title("Section-wise Pass Rate (%) — not available (no section column)")
        plt.axis("off")

    return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5


# =============================
# UI
# =============================
with gr.Blocks(title="HoD Result Dashboard") as demo:
    gr.Markdown("## 📊 HoD Result Dashboard — Insights & Patterns (No Student Tables, No PDF)")

    file_bytes_state = gr.State(None)   # bytes
    sheet_state = gr.State(None)        # sheet name string

    with gr.Row():
        upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
        sheet_dd = gr.Dropdown(label="Sheet", choices=[], interactive=False)

    with gr.Row():
        marks_col = gr.Dropdown(label="Marks column", choices=[], interactive=False)
        grade_col = gr.Dropdown(label="Grade column", choices=[], interactive=False)
        pass_mark = gr.Number(label="Pass mark", value=50, precision=0)

    with gr.Row():
        course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
        section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False)

    with gr.Row():
        course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
        section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)

    analyze_btn = gr.Button("🔍 Refresh HoD Dashboard")

    insight_md = gr.Markdown("")

    with gr.Tab("Tables"):
        with gr.Row():
            kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
            pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
        with gr.Row():
            grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True)
            heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True)
        grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True)
        section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True)

    with gr.Tab("Charts"):
        with gr.Row():
            hist_plot = gr.Plot(label="Histogram")
            cdf_plot = gr.Plot(label="CDF")
        with gr.Row():
            grade_plot = gr.Plot(label="Grade distribution")
            grade_box = gr.Plot(label="Boxplot by grade")
        section_plot = gr.Plot(label="Section-wise pass rate")

    # -------- callbacks
    def on_upload(file_obj):
        b = _read_file_bytes(file_obj)
        xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl")
        sheets = xls.sheet_names or []
        if not sheets:
            raise gr.Error("No sheets found in workbook.")

        sheet0 = sheets[0]
        df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl"))

        cols = list(df0.columns)
        m_guess = _guess_marks_col(df0)
        g_guess = _guess_grade_col(cols)
        c_guess, s_guess = _guess_optional(cols)

        # Optional filters
        course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
        section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))

        course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
        section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)

        if c_guess and c_guess in df0.columns:
            vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
            course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        if s_guess and s_guess in df0.columns:
            vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
            section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        return (
            gr.update(choices=sheets, value=sheet0, interactive=True),   # sheet_dd
            gr.update(choices=cols, value=m_guess, interactive=True),    # marks_col
            gr.update(choices=cols, value=g_guess, interactive=True),    # grade_col
            course_col_upd,
            section_col_upd,
            course_filter_upd,
            section_filter_upd,
            b,          # file_bytes_state
            sheet0,     # sheet_state
        )

    upload.change(
        fn=on_upload,
        inputs=[upload],
        outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state],
    )

    def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
        if not file_bytes:
            raise gr.Error("Upload Excel first.")

        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))

        cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
        sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)

        if course_col_val and course_col_val in df.columns:
            vals = ["(all)"] + sorted(df[course_col_val].astype(str).fillna("NA").unique().tolist())
            cf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        if section_col_val and section_col_val in df.columns:
            vals = ["(all)"] + sorted(df[section_col_val].astype(str).fillna("NA").unique().tolist())
            sf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        return cf, sf, sheet_name

    sheet_dd.change(
        fn=on_sheet_change,
        inputs=[sheet_dd, file_bytes_state, course_col, section_col],
        outputs=[course_filter, section_filter, sheet_state],
    )

    def on_refresh(file_bytes, sheet_name, m_col, g_col, pmark, c_col, s_col, c_filter, s_filter):
        if not file_bytes:
            raise gr.Error("Upload Excel first.")
        if not sheet_name:
            raise gr.Error("Select a sheet.")

        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))

        kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights(
            df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
        )

        return (
            "### HoD Insight\n\n" + insight,
            kpi, pct, gdist, heap, gstats, sec_tbl,
            f1, f2, f3, f4, f5
        )

    analyze_btn.click(
        fn=on_refresh,
        inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
        outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table,
                 hist_plot, cdf_plot, grade_plot, grade_box, section_plot],
    )

demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)