File size: 17,024 Bytes
847e426
df4a47d
847e426
 
8a12b71
df4a47d
847e426
8a12b71
 
 
 
847e426
 
 
 
 
 
8a12b71
 
847e426
8a12b71
 
 
bb85f6c
df4a47d
 
 
 
 
 
 
bb85f6c
 
 
8a12b71
bb85f6c
 
 
df4a47d
bb85f6c
df4a47d
 
bb85f6c
 
 
df4a47d
bb85f6c
 
 
847e426
 
bb85f6c
 
 
 
 
df4a47d
 
8a12b71
 
e622de3
8a12b71
e622de3
8a12b71
 
 
 
b98be59
bb85f6c
b98be59
bb85f6c
8a12b71
 
 
df4a47d
e622de3
df4a47d
 
8a12b71
 
df4a47d
8a12b71
b98be59
8a12b71
bb85f6c
df4a47d
 
 
 
8a12b71
df4a47d
8a12b71
 
bb85f6c
 
 
 
 
df4a47d
 
 
bb85f6c
 
 
df4a47d
bb85f6c
 
 
 
 
 
 
 
 
 
 
 
 
b98be59
e622de3
b98be59
 
bb85f6c
df4a47d
bb85f6c
b98be59
 
 
 
 
 
 
 
e622de3
bb85f6c
b98be59
 
 
df4a47d
 
 
bb85f6c
8a12b71
 
 
 
 
 
 
bb85f6c
df4a47d
b98be59
bb85f6c
 
 
 
 
df4a47d
bb85f6c
b98be59
bb85f6c
 
 
 
b98be59
 
bb85f6c
 
 
 
 
 
 
8a12b71
 
bb85f6c
 
8a12b71
 
 
b98be59
8a12b71
 
 
bb85f6c
 
8a12b71
 
 
 
df4a47d
 
 
 
b98be59
bb85f6c
8a12b71
 
 
 
e622de3
bb85f6c
df4a47d
 
bb85f6c
b98be59
df4a47d
 
 
bb85f6c
df4a47d
 
 
b98be59
 
bb85f6c
9655019
df4a47d
b98be59
df4a47d
bb85f6c
df4a47d
 
 
 
 
 
 
 
bb85f6c
df4a47d
b98be59
bb85f6c
b98be59
bb85f6c
b98be59
df4a47d
 
 
 
b98be59
 
bb85f6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a12b71
 
 
9655019
8a12b71
 
bb85f6c
847e426
bb85f6c
 
847e426
 
 
 
 
 
8a12b71
 
 
847e426
8a12b71
 
df4a47d
847e426
8a12b71
 
df4a47d
8a12b71
bb85f6c
8a12b71
9655019
8a12b71
9655019
 
 
bb85f6c
9655019
bb85f6c
 
 
 
df4a47d
 
 
 
 
 
 
 
bb85f6c
847e426
bb85f6c
e622de3
bb85f6c
 
e622de3
 
bb85f6c
e622de3
 
bb85f6c
e622de3
 
bb85f6c
 
 
e622de3
bb85f6c
 
 
e622de3
bb85f6c
 
9655019
e622de3
 
bb85f6c
e622de3
 
 
bb85f6c
b98be59
 
9655019
 
 
bb85f6c
 
 
 
 
 
b98be59
 
847e426
e622de3
847e426
bb85f6c
847e426
 
e622de3
 
 
9655019
bb85f6c
e622de3
 
 
 
 
 
 
 
 
 
 
 
 
 
847e426
e622de3
bb85f6c
b98be59
8a12b71
 
e622de3
8a12b71
 
 
 
9655019
bb85f6c
8a12b71
bb85f6c
9655019
 
 
 
bb85f6c
 
 
9655019
8a12b71
 
 
bb85f6c
 
 
847e426
 
b98be59
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
import io
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt


# =============================
# Helpers
# =============================
def _read_file_bytes(file_obj):
    if file_obj is None:
        raise ValueError("No file uploaded.")
    file_path = getattr(file_obj, "name", None)
    if not file_path:
        raise ValueError("Invalid uploaded file object.")
    with open(file_path, "rb") as f:
        b = f.read()
    if not b:
        raise ValueError("Uploaded file is empty.")
    return b


def _drop_useless(df: pd.DataFrame) -> pd.DataFrame:
    df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
    unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
    if unnamed:
        df = df.drop(columns=unnamed, errors="ignore")
    return df


def _safe_numeric(s):
    return pd.to_numeric(s, errors="coerce")


def _guess_marks_col(df: pd.DataFrame):
    cols = list(df.columns)
    best, best_score = cols[0], -1
    for c in cols:
        score = _safe_numeric(df[c]).notna().mean()
        if score > best_score:
            best_score = score
            best = c
    return best


def _guess_grade_col(cols):
    low = {c: str(c).strip().lower() for c in cols}
    return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0])


def _guess_optional(cols):
    low = {c: str(c).strip().lower() for c in cols}
    course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None)
    section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None)
    return course, section


def apply_filters(df, course_col, section_col, course_filter, section_filter):
    d = df.copy()
    if course_col and course_col in d.columns and course_filter and course_filter != "(all)":
        d = d[d[course_col].astype(str).fillna("NA") == course_filter]
    if section_col and section_col in d.columns and section_filter and section_filter != "(all)":
        d = d[d[section_col].astype(str).fillna("NA") == section_filter]
    return d


# =============================
# Core HoD Insights (no student tables)
# =============================
def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
    if df is None or df.empty:
        raise gr.Error("Sheet is empty.")

    d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy()

    d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan
    d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA"

    total = int(len(d))
    valid = d[d["_marks"].notna()].copy()
    n = int(len(valid))
    missing = int(d["_marks"].isna().sum())

    pass_mark = int(pass_mark)
    mean = float(valid["_marks"].mean()) if n else 0.0
    std = float(valid["_marks"].std(ddof=0)) if n else 0.0
    minv = float(valid["_marks"].min()) if n else 0.0
    maxv = float(valid["_marks"].max()) if n else 0.0

    pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
    pass_rate = (pass_count / n * 100.0) if n else 0.0

    # Borderline (pass to pass+5) and just-below (pass-5 to pass-1)
    borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0
    borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0

    # Distribution shape
    skew = float(valid["_marks"].skew()) if n else 0.0
    kurt = float(valid["_marks"].kurt()) if n else 0.0

    # Outliers by IQR
    outlier_count = 0
    low_thr = high_thr = 0.0
    if n:
        q1 = float(np.percentile(valid["_marks"], 25))
        q3 = float(np.percentile(valid["_marks"], 75))
        iqr = q3 - q1
        low_thr = q1 - 1.5 * iqr
        high_thr = q3 + 1.5 * iqr
        outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())

    # Percentiles
    pct_df = pd.DataFrame(
        [(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]]
        if n else [],
        columns=["Percentile", "Marks"]
    )

    # Grade distribution
    grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
    grade_dist.columns = [grade_col, "count"]
    grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0

    # Grade ↔ marks mapping (moderation evidence)
    grade_stats = (
        valid.groupby(d["_grade"])["_marks"]
        .agg(["count", "mean", "std", "min", "median", "max"])
        .reset_index()
        .rename(columns={"_grade": "Grade"})
        .sort_values("mean", ascending=False)
    )

    # Mark heaping (repeated marks)
    heaping = (
        valid["_marks"].round(0).astype(int)
        .value_counts().head(12)
        .rename("count").reset_index()
        .rename(columns={"index": "Mark"})
    )

    # Course status
    if pass_rate >= 80:
        status = "GREEN"
    elif pass_rate >= 60:
        status = "AMBER"
    else:
        status = "RED"

    # Flags
    flags = []
    if missing > 0:
        flags.append(f"{missing} missing mark(s) β€” verify completeness.")
    if borderline_fail > max(5, 0.03 * n):
        flags.append("Many students just below pass β€” consider targeted support / moderation review.")
    if borderline_pass > max(5, 0.03 * n):
        flags.append("Many students just above pass β€” borderline attainment cluster.")
    if abs(skew) > 0.7:
        flags.append("Skewed distribution β€” check assessment balance and marking consistency.")
    if outlier_count > 0:
        flags.append(f"{outlier_count} outlier(s) by IQR β€” spot-check extremes.")
    if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n):
        flags.append("Heaping detected β€” many students share identical marks (rounding/marking pattern).")

    flags_text = " | ".join(flags) if flags else "No major warning patterns detected."

    insight = (
        f"**Status:** {status}  \n"
        f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark})  \n"
        f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f}  \n"
        f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass}  \n"
        f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing}  \n"
        f"**Flags:** {flags_text}"
    )

    # KPI table
    kpi = pd.DataFrame(
        [
            ("Total rows (filtered)", total),
            ("Students with numeric marks", n),
            ("Missing marks", missing),
            ("Pass mark", pass_mark),
            ("Pass count", pass_count),
            ("Pass rate (%)", round(pass_rate, 2)),
            ("Borderline just below pass", borderline_fail),
            ("Borderline just above pass", borderline_pass),
            ("Average", round(mean, 2)),
            ("Std deviation", round(std, 2)),
            ("Minimum", round(minv, 2)),
            ("Maximum", round(maxv, 2)),
            ("Skewness", round(skew, 3)),
            ("Kurtosis", round(kurt, 3)),
            ("Outlier low threshold (IQR)", round(low_thr, 2)),
            ("Outlier high threshold (IQR)", round(high_thr, 2)),
            ("Outlier count (IQR)", outlier_count),
            ("Status", status),
        ],
        columns=["Metric", "Value"],
    )

    # Charts
    # 1 Histogram
    fig1 = plt.figure()
    plt.hist(valid["_marks"].dropna(), bins=12)
    plt.axvline(pass_mark, linestyle="--")
    plt.title("Marks distribution (Histogram)")
    plt.xlabel("Marks")
    plt.ylabel("Students")

    # 2 CDF
    fig2 = plt.figure()
    xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
    ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
    if len(xs):
        plt.plot(xs, ys)
        plt.axvline(pass_mark, linestyle="--")
    plt.title("CDF (Proportion of students ≀ mark)")
    plt.xlabel("Marks")
    plt.ylabel("Proportion")

    # 3 Grade distribution
    fig3 = plt.figure()
    gd = grade_dist.set_index(grade_col)["count"]
    plt.bar(gd.index.astype(str), gd.values)
    plt.title("Grade distribution")
    plt.xlabel("Grade")
    plt.ylabel("Count")
    plt.xticks(rotation=45, ha="right")

    # 4 Boxplot by grade (moderation)
    fig4 = plt.figure()
    if not grade_stats.empty:
        order = grade_stats["Grade"].tolist()
        data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
        plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
        plt.title("Marks spread by Grade (Boxplot)")
        plt.xlabel("Grade")
        plt.ylabel("Marks")
        plt.xticks(rotation=45, ha="right")
    else:
        plt.title("Marks spread by Grade (Boxplot)")

    # 5 Section comparison (optional)
    fig5 = plt.figure()
    section_table = pd.DataFrame()
    if section_col and section_col in d.columns and n:
        sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index()
        sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values
        sec = sec.rename(columns={section_col: "Section"})
        section_table = sec.sort_values("pass_rate_%", ascending=False)

        plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values)
        plt.title("Section-wise Pass Rate (%)")
        plt.xlabel("Section")
        plt.ylabel("Pass rate (%)")
        plt.xticks(rotation=45, ha="right")
    else:
        plt.title("Section-wise Pass Rate (%) β€” not available (no section column)")
        plt.axis("off")

    return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5


# =============================
# UI
# =============================
with gr.Blocks(title="HoD Result Dashboard") as demo:
    gr.Markdown("## πŸ“Š HoD Result Dashboard β€” Insights & Patterns (No Student Tables, No PDF)")

    file_bytes_state = gr.State(None)   # bytes
    sheet_state = gr.State(None)        # sheet name string

    with gr.Row():
        upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
        sheet_dd = gr.Dropdown(label="Sheet", choices=[], interactive=False)

    with gr.Row():
        marks_col = gr.Dropdown(label="Marks column", choices=[], interactive=False)
        grade_col = gr.Dropdown(label="Grade column", choices=[], interactive=False)
        pass_mark = gr.Number(label="Pass mark", value=50, precision=0)

    with gr.Row():
        course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
        section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False)

    with gr.Row():
        course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
        section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)

    analyze_btn = gr.Button("πŸ” Refresh HoD Dashboard")

    insight_md = gr.Markdown("")

    with gr.Tab("Tables"):
        with gr.Row():
            kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
            pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
        with gr.Row():
            grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True)
            heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True)
        grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True)
        section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True)

    with gr.Tab("Charts"):
        with gr.Row():
            hist_plot = gr.Plot(label="Histogram")
            cdf_plot = gr.Plot(label="CDF")
        with gr.Row():
            grade_plot = gr.Plot(label="Grade distribution")
            grade_box = gr.Plot(label="Boxplot by grade")
        section_plot = gr.Plot(label="Section-wise pass rate")

    # -------- callbacks
    def on_upload(file_obj):
        b = _read_file_bytes(file_obj)
        xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl")
        sheets = xls.sheet_names or []
        if not sheets:
            raise gr.Error("No sheets found in workbook.")

        sheet0 = sheets[0]
        df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl"))

        cols = list(df0.columns)
        m_guess = _guess_marks_col(df0)
        g_guess = _guess_grade_col(cols)
        c_guess, s_guess = _guess_optional(cols)

        # Optional filters
        course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
        section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))

        course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
        section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)

        if c_guess and c_guess in df0.columns:
            vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
            course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        if s_guess and s_guess in df0.columns:
            vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
            section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        return (
            gr.update(choices=sheets, value=sheet0, interactive=True),   # sheet_dd
            gr.update(choices=cols, value=m_guess, interactive=True),    # marks_col
            gr.update(choices=cols, value=g_guess, interactive=True),    # grade_col
            course_col_upd,
            section_col_upd,
            course_filter_upd,
            section_filter_upd,
            b,          # file_bytes_state
            sheet0,     # sheet_state
        )

    upload.change(
        fn=on_upload,
        inputs=[upload],
        outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state],
    )

    def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
        if not file_bytes:
            raise gr.Error("Upload Excel first.")

        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))

        cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
        sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)

        if course_col_val and course_col_val in df.columns:
            vals = ["(all)"] + sorted(df[course_col_val].astype(str).fillna("NA").unique().tolist())
            cf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        if section_col_val and section_col_val in df.columns:
            vals = ["(all)"] + sorted(df[section_col_val].astype(str).fillna("NA").unique().tolist())
            sf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        return cf, sf, sheet_name

    sheet_dd.change(
        fn=on_sheet_change,
        inputs=[sheet_dd, file_bytes_state, course_col, section_col],
        outputs=[course_filter, section_filter, sheet_state],
    )

    def on_refresh(file_bytes, sheet_name, m_col, g_col, pmark, c_col, s_col, c_filter, s_filter):
        if not file_bytes:
            raise gr.Error("Upload Excel first.")
        if not sheet_name:
            raise gr.Error("Select a sheet.")

        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))

        kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights(
            df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
        )

        return (
            "### HoD Insight\n\n" + insight,
            kpi, pct, gdist, heap, gstats, sec_tbl,
            f1, f2, f3, f4, f5
        )

    analyze_btn.click(
        fn=on_refresh,
        inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
        outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table,
                 hist_plot, cdf_plot, grade_plot, grade_box, section_plot],
    )

demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)