cga / app.py
Deevyankar's picture
Update app.py
bb85f6c verified
import io
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
# =============================
# Helpers
# =============================
def _read_file_bytes(file_obj):
if file_obj is None:
raise ValueError("No file uploaded.")
file_path = getattr(file_obj, "name", None)
if not file_path:
raise ValueError("Invalid uploaded file object.")
with open(file_path, "rb") as f:
b = f.read()
if not b:
raise ValueError("Uploaded file is empty.")
return b
def _drop_useless(df: pd.DataFrame) -> pd.DataFrame:
df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
if unnamed:
df = df.drop(columns=unnamed, errors="ignore")
return df
def _safe_numeric(s):
return pd.to_numeric(s, errors="coerce")
def _guess_marks_col(df: pd.DataFrame):
cols = list(df.columns)
best, best_score = cols[0], -1
for c in cols:
score = _safe_numeric(df[c]).notna().mean()
if score > best_score:
best_score = score
best = c
return best
def _guess_grade_col(cols):
low = {c: str(c).strip().lower() for c in cols}
return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0])
def _guess_optional(cols):
low = {c: str(c).strip().lower() for c in cols}
course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None)
section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None)
return course, section
def apply_filters(df, course_col, section_col, course_filter, section_filter):
d = df.copy()
if course_col and course_col in d.columns and course_filter and course_filter != "(all)":
d = d[d[course_col].astype(str).fillna("NA") == course_filter]
if section_col and section_col in d.columns and section_filter and section_filter != "(all)":
d = d[d[section_col].astype(str).fillna("NA") == section_filter]
return d
# =============================
# Core HoD Insights (no student tables)
# =============================
def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
if df is None or df.empty:
raise gr.Error("Sheet is empty.")
d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy()
d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan
d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA"
total = int(len(d))
valid = d[d["_marks"].notna()].copy()
n = int(len(valid))
missing = int(d["_marks"].isna().sum())
pass_mark = int(pass_mark)
mean = float(valid["_marks"].mean()) if n else 0.0
std = float(valid["_marks"].std(ddof=0)) if n else 0.0
minv = float(valid["_marks"].min()) if n else 0.0
maxv = float(valid["_marks"].max()) if n else 0.0
pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
pass_rate = (pass_count / n * 100.0) if n else 0.0
# Borderline (pass to pass+5) and just-below (pass-5 to pass-1)
borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0
borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0
# Distribution shape
skew = float(valid["_marks"].skew()) if n else 0.0
kurt = float(valid["_marks"].kurt()) if n else 0.0
# Outliers by IQR
outlier_count = 0
low_thr = high_thr = 0.0
if n:
q1 = float(np.percentile(valid["_marks"], 25))
q3 = float(np.percentile(valid["_marks"], 75))
iqr = q3 - q1
low_thr = q1 - 1.5 * iqr
high_thr = q3 + 1.5 * iqr
outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())
# Percentiles
pct_df = pd.DataFrame(
[(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]]
if n else [],
columns=["Percentile", "Marks"]
)
# Grade distribution
grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
grade_dist.columns = [grade_col, "count"]
grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0
# Grade ↔ marks mapping (moderation evidence)
grade_stats = (
valid.groupby(d["_grade"])["_marks"]
.agg(["count", "mean", "std", "min", "median", "max"])
.reset_index()
.rename(columns={"_grade": "Grade"})
.sort_values("mean", ascending=False)
)
# Mark heaping (repeated marks)
heaping = (
valid["_marks"].round(0).astype(int)
.value_counts().head(12)
.rename("count").reset_index()
.rename(columns={"index": "Mark"})
)
# Course status
if pass_rate >= 80:
status = "GREEN"
elif pass_rate >= 60:
status = "AMBER"
else:
status = "RED"
# Flags
flags = []
if missing > 0:
flags.append(f"{missing} missing mark(s) — verify completeness.")
if borderline_fail > max(5, 0.03 * n):
flags.append("Many students just below pass — consider targeted support / moderation review.")
if borderline_pass > max(5, 0.03 * n):
flags.append("Many students just above pass — borderline attainment cluster.")
if abs(skew) > 0.7:
flags.append("Skewed distribution — check assessment balance and marking consistency.")
if outlier_count > 0:
flags.append(f"{outlier_count} outlier(s) by IQR — spot-check extremes.")
if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n):
flags.append("Heaping detected — many students share identical marks (rounding/marking pattern).")
flags_text = " | ".join(flags) if flags else "No major warning patterns detected."
insight = (
f"**Status:** {status} \n"
f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark}) \n"
f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f} \n"
f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass} \n"
f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing} \n"
f"**Flags:** {flags_text}"
)
# KPI table
kpi = pd.DataFrame(
[
("Total rows (filtered)", total),
("Students with numeric marks", n),
("Missing marks", missing),
("Pass mark", pass_mark),
("Pass count", pass_count),
("Pass rate (%)", round(pass_rate, 2)),
("Borderline just below pass", borderline_fail),
("Borderline just above pass", borderline_pass),
("Average", round(mean, 2)),
("Std deviation", round(std, 2)),
("Minimum", round(minv, 2)),
("Maximum", round(maxv, 2)),
("Skewness", round(skew, 3)),
("Kurtosis", round(kurt, 3)),
("Outlier low threshold (IQR)", round(low_thr, 2)),
("Outlier high threshold (IQR)", round(high_thr, 2)),
("Outlier count (IQR)", outlier_count),
("Status", status),
],
columns=["Metric", "Value"],
)
# Charts
# 1 Histogram
fig1 = plt.figure()
plt.hist(valid["_marks"].dropna(), bins=12)
plt.axvline(pass_mark, linestyle="--")
plt.title("Marks distribution (Histogram)")
plt.xlabel("Marks")
plt.ylabel("Students")
# 2 CDF
fig2 = plt.figure()
xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
if len(xs):
plt.plot(xs, ys)
plt.axvline(pass_mark, linestyle="--")
plt.title("CDF (Proportion of students ≤ mark)")
plt.xlabel("Marks")
plt.ylabel("Proportion")
# 3 Grade distribution
fig3 = plt.figure()
gd = grade_dist.set_index(grade_col)["count"]
plt.bar(gd.index.astype(str), gd.values)
plt.title("Grade distribution")
plt.xlabel("Grade")
plt.ylabel("Count")
plt.xticks(rotation=45, ha="right")
# 4 Boxplot by grade (moderation)
fig4 = plt.figure()
if not grade_stats.empty:
order = grade_stats["Grade"].tolist()
data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
plt.title("Marks spread by Grade (Boxplot)")
plt.xlabel("Grade")
plt.ylabel("Marks")
plt.xticks(rotation=45, ha="right")
else:
plt.title("Marks spread by Grade (Boxplot)")
# 5 Section comparison (optional)
fig5 = plt.figure()
section_table = pd.DataFrame()
if section_col and section_col in d.columns and n:
sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index()
sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values
sec = sec.rename(columns={section_col: "Section"})
section_table = sec.sort_values("pass_rate_%", ascending=False)
plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values)
plt.title("Section-wise Pass Rate (%)")
plt.xlabel("Section")
plt.ylabel("Pass rate (%)")
plt.xticks(rotation=45, ha="right")
else:
plt.title("Section-wise Pass Rate (%) — not available (no section column)")
plt.axis("off")
return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5
# =============================
# UI
# =============================
with gr.Blocks(title="HoD Result Dashboard") as demo:
gr.Markdown("## 📊 HoD Result Dashboard — Insights & Patterns (No Student Tables, No PDF)")
file_bytes_state = gr.State(None) # bytes
sheet_state = gr.State(None) # sheet name string
with gr.Row():
upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
sheet_dd = gr.Dropdown(label="Sheet", choices=[], interactive=False)
with gr.Row():
marks_col = gr.Dropdown(label="Marks column", choices=[], interactive=False)
grade_col = gr.Dropdown(label="Grade column", choices=[], interactive=False)
pass_mark = gr.Number(label="Pass mark", value=50, precision=0)
with gr.Row():
course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False)
with gr.Row():
course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
analyze_btn = gr.Button("🔍 Refresh HoD Dashboard")
insight_md = gr.Markdown("")
with gr.Tab("Tables"):
with gr.Row():
kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
with gr.Row():
grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True)
heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True)
grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True)
section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True)
with gr.Tab("Charts"):
with gr.Row():
hist_plot = gr.Plot(label="Histogram")
cdf_plot = gr.Plot(label="CDF")
with gr.Row():
grade_plot = gr.Plot(label="Grade distribution")
grade_box = gr.Plot(label="Boxplot by grade")
section_plot = gr.Plot(label="Section-wise pass rate")
# -------- callbacks
def on_upload(file_obj):
b = _read_file_bytes(file_obj)
xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl")
sheets = xls.sheet_names or []
if not sheets:
raise gr.Error("No sheets found in workbook.")
sheet0 = sheets[0]
df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl"))
cols = list(df0.columns)
m_guess = _guess_marks_col(df0)
g_guess = _guess_grade_col(cols)
c_guess, s_guess = _guess_optional(cols)
# Optional filters
course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))
course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
if c_guess and c_guess in df0.columns:
vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
if s_guess and s_guess in df0.columns:
vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
return (
gr.update(choices=sheets, value=sheet0, interactive=True), # sheet_dd
gr.update(choices=cols, value=m_guess, interactive=True), # marks_col
gr.update(choices=cols, value=g_guess, interactive=True), # grade_col
course_col_upd,
section_col_upd,
course_filter_upd,
section_filter_upd,
b, # file_bytes_state
sheet0, # sheet_state
)
upload.change(
fn=on_upload,
inputs=[upload],
outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state],
)
def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
if not file_bytes:
raise gr.Error("Upload Excel first.")
df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
if course_col_val and course_col_val in df.columns:
vals = ["(all)"] + sorted(df[course_col_val].astype(str).fillna("NA").unique().tolist())
cf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
if section_col_val and section_col_val in df.columns:
vals = ["(all)"] + sorted(df[section_col_val].astype(str).fillna("NA").unique().tolist())
sf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
return cf, sf, sheet_name
sheet_dd.change(
fn=on_sheet_change,
inputs=[sheet_dd, file_bytes_state, course_col, section_col],
outputs=[course_filter, section_filter, sheet_state],
)
def on_refresh(file_bytes, sheet_name, m_col, g_col, pmark, c_col, s_col, c_filter, s_filter):
if not file_bytes:
raise gr.Error("Upload Excel first.")
if not sheet_name:
raise gr.Error("Select a sheet.")
df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights(
df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
)
return (
"### HoD Insight\n\n" + insight,
kpi, pct, gdist, heap, gstats, sec_tbl,
f1, f2, f3, f4, f5
)
analyze_btn.click(
fn=on_refresh,
inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table,
hist_plot, cdf_plot, grade_plot, grade_box, section_plot],
)
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)