Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
import io
|
| 2 |
-
from datetime import datetime
|
| 3 |
-
|
| 4 |
import numpy as np
|
| 5 |
import pandas as pd
|
| 6 |
import gradio as gr
|
|
@@ -23,11 +21,7 @@ def _read_file_bytes(file_obj):
|
|
| 23 |
return b
|
| 24 |
|
| 25 |
|
| 26 |
-
def
|
| 27 |
-
return pd.to_numeric(series, errors="coerce")
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
|
| 31 |
df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
|
| 32 |
unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
|
| 33 |
if unnamed:
|
|
@@ -35,26 +29,31 @@ def _drop_useless_cols(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 35 |
return df
|
| 36 |
|
| 37 |
|
| 38 |
-
def
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
|
|
|
| 44 |
for c in cols:
|
| 45 |
-
|
| 46 |
-
score = s.notna().mean()
|
| 47 |
if score > best_score:
|
| 48 |
best_score = score
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
# optional columns
|
| 54 |
-
course_guess = next((c for c in cols if any(k in lower[c] for k in ["course", "module", "subject"])), None)
|
| 55 |
-
section_guess = next((c for c in cols if any(k in lower[c] for k in ["section", "group", "batch", "class"])), None)
|
| 56 |
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
def apply_filters(df, course_col, section_col, course_filter, section_filter):
|
|
@@ -67,9 +66,9 @@ def apply_filters(df, course_col, section_col, course_filter, section_filter):
|
|
| 67 |
|
| 68 |
|
| 69 |
# =============================
|
| 70 |
-
#
|
| 71 |
# =============================
|
| 72 |
-
def
|
| 73 |
if df is None or df.empty:
|
| 74 |
raise gr.Error("Sheet is empty.")
|
| 75 |
|
|
@@ -83,30 +82,47 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
|
|
| 83 |
n = int(len(valid))
|
| 84 |
missing = int(d["_marks"].isna().sum())
|
| 85 |
|
|
|
|
| 86 |
mean = float(valid["_marks"].mean()) if n else 0.0
|
| 87 |
std = float(valid["_marks"].std(ddof=0)) if n else 0.0
|
| 88 |
minv = float(valid["_marks"].min()) if n else 0.0
|
| 89 |
maxv = float(valid["_marks"].max()) if n else 0.0
|
| 90 |
|
| 91 |
-
pass_mark = int(pass_mark)
|
| 92 |
pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
|
| 93 |
pass_rate = (pass_count / n * 100.0) if n else 0.0
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
skew = float(valid["_marks"].skew()) if n else 0.0
|
| 96 |
kurt = float(valid["_marks"].kurt()) if n else 0.0
|
| 97 |
|
| 98 |
-
#
|
| 99 |
-
|
|
|
|
| 100 |
if n:
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
# Grade distribution
|
| 106 |
grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
|
| 107 |
grade_dist.columns = [grade_col, "count"]
|
|
|
|
| 108 |
|
| 109 |
-
# Grade
|
| 110 |
grade_stats = (
|
| 111 |
valid.groupby(d["_grade"])["_marks"]
|
| 112 |
.agg(["count", "mean", "std", "min", "median", "max"])
|
|
@@ -116,25 +132,14 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
|
|
| 116 |
)
|
| 117 |
|
| 118 |
# Mark heaping (repeated marks)
|
| 119 |
-
|
| 120 |
valid["_marks"].round(0).astype(int)
|
| 121 |
.value_counts().head(12)
|
| 122 |
.rename("count").reset_index()
|
| 123 |
.rename(columns={"index": "Mark"})
|
| 124 |
)
|
| 125 |
|
| 126 |
-
#
|
| 127 |
-
outlier_count = 0
|
| 128 |
-
low_thr = high_thr = 0.0
|
| 129 |
-
if n:
|
| 130 |
-
q1 = float(np.percentile(valid["_marks"], 25))
|
| 131 |
-
q3 = float(np.percentile(valid["_marks"], 75))
|
| 132 |
-
iqr = q3 - q1
|
| 133 |
-
low_thr = q1 - 1.5 * iqr
|
| 134 |
-
high_thr = q3 + 1.5 * iqr
|
| 135 |
-
outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())
|
| 136 |
-
|
| 137 |
-
# Status
|
| 138 |
if pass_rate >= 80:
|
| 139 |
status = "GREEN"
|
| 140 |
elif pass_rate >= 60:
|
|
@@ -142,26 +147,34 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
|
|
| 142 |
else:
|
| 143 |
status = "RED"
|
| 144 |
|
| 145 |
-
#
|
| 146 |
flags = []
|
| 147 |
if missing > 0:
|
| 148 |
-
flags.append(f"{missing} missing mark(s)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
if abs(skew) > 0.7:
|
| 150 |
-
flags.append("Skewed distribution
|
| 151 |
-
if len(heaping_df) and heaping_df["count"].iloc[0] >= max(10, 0.06 * n):
|
| 152 |
-
flags.append("Heaping β many students share same mark (rounding/marking pattern).")
|
| 153 |
if outlier_count > 0:
|
| 154 |
-
flags.append(f"{outlier_count} outlier(s) by IQR
|
|
|
|
|
|
|
|
|
|
| 155 |
flags_text = " | ".join(flags) if flags else "No major warning patterns detected."
|
| 156 |
|
| 157 |
-
|
| 158 |
-
f"Status: {status}
|
| 159 |
-
f"
|
| 160 |
-
f"
|
| 161 |
-
f"
|
|
|
|
|
|
|
| 162 |
)
|
| 163 |
|
| 164 |
-
|
|
|
|
| 165 |
[
|
| 166 |
("Total rows (filtered)", total),
|
| 167 |
("Students with numeric marks", n),
|
|
@@ -169,6 +182,8 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
|
|
| 169 |
("Pass mark", pass_mark),
|
| 170 |
("Pass count", pass_count),
|
| 171 |
("Pass rate (%)", round(pass_rate, 2)),
|
|
|
|
|
|
|
| 172 |
("Average", round(mean, 2)),
|
| 173 |
("Std deviation", round(std, 2)),
|
| 174 |
("Minimum", round(minv, 2)),
|
|
@@ -178,29 +193,32 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
|
|
| 178 |
("Outlier low threshold (IQR)", round(low_thr, 2)),
|
| 179 |
("Outlier high threshold (IQR)", round(high_thr, 2)),
|
| 180 |
("Outlier count (IQR)", outlier_count),
|
|
|
|
| 181 |
],
|
| 182 |
columns=["Metric", "Value"],
|
| 183 |
)
|
| 184 |
|
| 185 |
# Charts
|
| 186 |
-
# 1
|
| 187 |
fig1 = plt.figure()
|
| 188 |
plt.hist(valid["_marks"].dropna(), bins=12)
|
|
|
|
| 189 |
plt.title("Marks distribution (Histogram)")
|
| 190 |
plt.xlabel("Marks")
|
| 191 |
plt.ylabel("Students")
|
| 192 |
|
| 193 |
-
# 2
|
| 194 |
fig2 = plt.figure()
|
| 195 |
xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
|
| 196 |
ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
|
| 197 |
if len(xs):
|
| 198 |
plt.plot(xs, ys)
|
|
|
|
| 199 |
plt.title("CDF (Proportion of students β€ mark)")
|
| 200 |
plt.xlabel("Marks")
|
| 201 |
plt.ylabel("Proportion")
|
| 202 |
|
| 203 |
-
# 3
|
| 204 |
fig3 = plt.figure()
|
| 205 |
gd = grade_dist.set_index(grade_col)["count"]
|
| 206 |
plt.bar(gd.index.astype(str), gd.values)
|
|
@@ -209,12 +227,12 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
|
|
| 209 |
plt.ylabel("Count")
|
| 210 |
plt.xticks(rotation=45, ha="right")
|
| 211 |
|
| 212 |
-
# 4
|
| 213 |
fig4 = plt.figure()
|
| 214 |
if not grade_stats.empty:
|
| 215 |
-
order = grade_stats[
|
| 216 |
data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
|
| 217 |
-
plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
|
| 218 |
plt.title("Marks spread by Grade (Boxplot)")
|
| 219 |
plt.xlabel("Grade")
|
| 220 |
plt.ylabel("Marks")
|
|
@@ -222,17 +240,35 @@ def compute_insights(df, marks_col, grade_col, pass_mark, course_col, section_co
|
|
| 222 |
else:
|
| 223 |
plt.title("Marks spread by Grade (Boxplot)")
|
| 224 |
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
|
| 228 |
# =============================
|
| 229 |
# UI
|
| 230 |
# =============================
|
| 231 |
with gr.Blocks(title="HoD Result Dashboard") as demo:
|
| 232 |
-
gr.Markdown("## π HoD Result Dashboard β
|
| 233 |
|
| 234 |
-
|
| 235 |
-
sheet_state = gr.State(None)
|
| 236 |
|
| 237 |
with gr.Row():
|
| 238 |
upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
|
|
@@ -251,18 +287,19 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
|
|
| 251 |
course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
|
| 252 |
section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
|
| 253 |
|
| 254 |
-
analyze_btn = gr.Button("π Refresh Dashboard")
|
| 255 |
|
| 256 |
insight_md = gr.Markdown("")
|
| 257 |
|
| 258 |
with gr.Tab("Tables"):
|
| 259 |
with gr.Row():
|
| 260 |
kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
|
| 261 |
-
|
| 262 |
with gr.Row():
|
| 263 |
-
grade_dist_table = gr.Dataframe(label="Grade
|
| 264 |
-
heaping_table = gr.Dataframe(label="Mark
|
| 265 |
-
grade_stats_table = gr.Dataframe(label="Grade
|
|
|
|
| 266 |
|
| 267 |
with gr.Tab("Charts"):
|
| 268 |
with gr.Row():
|
|
@@ -271,61 +308,62 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
|
|
| 271 |
with gr.Row():
|
| 272 |
grade_plot = gr.Plot(label="Grade distribution")
|
| 273 |
grade_box = gr.Plot(label="Boxplot by grade")
|
|
|
|
| 274 |
|
| 275 |
-
# ---- callbacks
|
| 276 |
def on_upload(file_obj):
|
| 277 |
-
|
| 278 |
-
xls = pd.ExcelFile(io.BytesIO(
|
| 279 |
sheets = xls.sheet_names or []
|
| 280 |
if not sheets:
|
| 281 |
-
raise gr.Error("No sheets found.")
|
| 282 |
|
| 283 |
sheet0 = sheets[0]
|
| 284 |
-
df0 = pd.read_excel(io.BytesIO(
|
| 285 |
-
df0 = _drop_useless_cols(df0)
|
| 286 |
|
| 287 |
-
m_guess, g_guess, c_guess, s_guess = _guess_cols(df0)
|
| 288 |
cols = list(df0.columns)
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
-
#
|
| 291 |
-
|
| 292 |
-
|
| 293 |
|
| 294 |
-
|
| 295 |
-
|
| 296 |
|
| 297 |
if c_guess and c_guess in df0.columns:
|
| 298 |
vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
|
| 299 |
-
|
| 300 |
|
| 301 |
if s_guess and s_guess in df0.columns:
|
| 302 |
vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
|
| 303 |
-
|
| 304 |
|
| 305 |
return (
|
| 306 |
gr.update(choices=sheets, value=sheet0, interactive=True), # sheet_dd
|
| 307 |
gr.update(choices=cols, value=m_guess, interactive=True), # marks_col
|
| 308 |
gr.update(choices=cols, value=g_guess, interactive=True), # grade_col
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
sheet0,
|
| 315 |
)
|
| 316 |
|
| 317 |
upload.change(
|
| 318 |
fn=on_upload,
|
| 319 |
inputs=[upload],
|
| 320 |
-
outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter,
|
| 321 |
)
|
| 322 |
|
| 323 |
def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
|
| 324 |
if not file_bytes:
|
| 325 |
raise gr.Error("Upload Excel first.")
|
| 326 |
|
| 327 |
-
df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
|
| 328 |
-
df = _drop_useless_cols(df)
|
| 329 |
|
| 330 |
cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
|
| 331 |
sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
|
|
@@ -342,7 +380,7 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
|
|
| 342 |
|
| 343 |
sheet_dd.change(
|
| 344 |
fn=on_sheet_change,
|
| 345 |
-
inputs=[sheet_dd,
|
| 346 |
outputs=[course_filter, section_filter, sheet_state],
|
| 347 |
)
|
| 348 |
|
|
@@ -352,27 +390,23 @@ with gr.Blocks(title="HoD Result Dashboard") as demo:
|
|
| 352 |
if not sheet_name:
|
| 353 |
raise gr.Error("Select a sheet.")
|
| 354 |
|
| 355 |
-
df = pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl")
|
| 356 |
-
df = _drop_useless_cols(df)
|
| 357 |
|
| 358 |
-
|
| 359 |
df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
|
| 360 |
)
|
| 361 |
|
| 362 |
return (
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
gdist_df,
|
| 367 |
-
heap_df,
|
| 368 |
-
gstats_df,
|
| 369 |
-
f1, f2, f3, f4
|
| 370 |
)
|
| 371 |
|
| 372 |
analyze_btn.click(
|
| 373 |
fn=on_refresh,
|
| 374 |
-
inputs=[
|
| 375 |
-
outputs=[insight_md, kpi_table,
|
|
|
|
| 376 |
)
|
| 377 |
|
| 378 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|
|
|
|
| 1 |
import io
|
|
|
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
import gradio as gr
|
|
|
|
| 21 |
return b
|
| 22 |
|
| 23 |
|
| 24 |
+
def _drop_useless(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
|
| 26 |
unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
|
| 27 |
if unnamed:
|
|
|
|
| 29 |
return df
|
| 30 |
|
| 31 |
|
| 32 |
+
def _safe_numeric(s):
|
| 33 |
+
return pd.to_numeric(s, errors="coerce")
|
| 34 |
+
|
| 35 |
|
| 36 |
+
def _guess_marks_col(df: pd.DataFrame):
|
| 37 |
+
cols = list(df.columns)
|
| 38 |
+
best, best_score = cols[0], -1
|
| 39 |
for c in cols:
|
| 40 |
+
score = _safe_numeric(df[c]).notna().mean()
|
|
|
|
| 41 |
if score > best_score:
|
| 42 |
best_score = score
|
| 43 |
+
best = c
|
| 44 |
+
return best
|
| 45 |
+
|
| 46 |
|
| 47 |
+
def _guess_grade_col(cols):
|
| 48 |
+
low = {c: str(c).strip().lower() for c in cols}
|
| 49 |
+
return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0])
|
| 50 |
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
def _guess_optional(cols):
|
| 53 |
+
low = {c: str(c).strip().lower() for c in cols}
|
| 54 |
+
course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None)
|
| 55 |
+
section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None)
|
| 56 |
+
return course, section
|
| 57 |
|
| 58 |
|
| 59 |
def apply_filters(df, course_col, section_col, course_filter, section_filter):
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
# =============================
|
| 69 |
+
# Core HoD Insights (no student tables)
|
| 70 |
# =============================
|
| 71 |
+
def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
|
| 72 |
if df is None or df.empty:
|
| 73 |
raise gr.Error("Sheet is empty.")
|
| 74 |
|
|
|
|
| 82 |
n = int(len(valid))
|
| 83 |
missing = int(d["_marks"].isna().sum())
|
| 84 |
|
| 85 |
+
pass_mark = int(pass_mark)
|
| 86 |
mean = float(valid["_marks"].mean()) if n else 0.0
|
| 87 |
std = float(valid["_marks"].std(ddof=0)) if n else 0.0
|
| 88 |
minv = float(valid["_marks"].min()) if n else 0.0
|
| 89 |
maxv = float(valid["_marks"].max()) if n else 0.0
|
| 90 |
|
|
|
|
| 91 |
pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
|
| 92 |
pass_rate = (pass_count / n * 100.0) if n else 0.0
|
| 93 |
|
| 94 |
+
# Borderline (pass to pass+5) and just-below (pass-5 to pass-1)
|
| 95 |
+
borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0
|
| 96 |
+
borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0
|
| 97 |
+
|
| 98 |
+
# Distribution shape
|
| 99 |
skew = float(valid["_marks"].skew()) if n else 0.0
|
| 100 |
kurt = float(valid["_marks"].kurt()) if n else 0.0
|
| 101 |
|
| 102 |
+
# Outliers by IQR
|
| 103 |
+
outlier_count = 0
|
| 104 |
+
low_thr = high_thr = 0.0
|
| 105 |
if n:
|
| 106 |
+
q1 = float(np.percentile(valid["_marks"], 25))
|
| 107 |
+
q3 = float(np.percentile(valid["_marks"], 75))
|
| 108 |
+
iqr = q3 - q1
|
| 109 |
+
low_thr = q1 - 1.5 * iqr
|
| 110 |
+
high_thr = q3 + 1.5 * iqr
|
| 111 |
+
outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())
|
| 112 |
+
|
| 113 |
+
# Percentiles
|
| 114 |
+
pct_df = pd.DataFrame(
|
| 115 |
+
[(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]]
|
| 116 |
+
if n else [],
|
| 117 |
+
columns=["Percentile", "Marks"]
|
| 118 |
+
)
|
| 119 |
|
| 120 |
# Grade distribution
|
| 121 |
grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
|
| 122 |
grade_dist.columns = [grade_col, "count"]
|
| 123 |
+
grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0
|
| 124 |
|
| 125 |
+
# Grade β marks mapping (moderation evidence)
|
| 126 |
grade_stats = (
|
| 127 |
valid.groupby(d["_grade"])["_marks"]
|
| 128 |
.agg(["count", "mean", "std", "min", "median", "max"])
|
|
|
|
| 132 |
)
|
| 133 |
|
| 134 |
# Mark heaping (repeated marks)
|
| 135 |
+
heaping = (
|
| 136 |
valid["_marks"].round(0).astype(int)
|
| 137 |
.value_counts().head(12)
|
| 138 |
.rename("count").reset_index()
|
| 139 |
.rename(columns={"index": "Mark"})
|
| 140 |
)
|
| 141 |
|
| 142 |
+
# Course status
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
if pass_rate >= 80:
|
| 144 |
status = "GREEN"
|
| 145 |
elif pass_rate >= 60:
|
|
|
|
| 147 |
else:
|
| 148 |
status = "RED"
|
| 149 |
|
| 150 |
+
# Flags
|
| 151 |
flags = []
|
| 152 |
if missing > 0:
|
| 153 |
+
flags.append(f"{missing} missing mark(s) β verify completeness.")
|
| 154 |
+
if borderline_fail > max(5, 0.03 * n):
|
| 155 |
+
flags.append("Many students just below pass β consider targeted support / moderation review.")
|
| 156 |
+
if borderline_pass > max(5, 0.03 * n):
|
| 157 |
+
flags.append("Many students just above pass β borderline attainment cluster.")
|
| 158 |
if abs(skew) > 0.7:
|
| 159 |
+
flags.append("Skewed distribution β check assessment balance and marking consistency.")
|
|
|
|
|
|
|
| 160 |
if outlier_count > 0:
|
| 161 |
+
flags.append(f"{outlier_count} outlier(s) by IQR β spot-check extremes.")
|
| 162 |
+
if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n):
|
| 163 |
+
flags.append("Heaping detected β many students share identical marks (rounding/marking pattern).")
|
| 164 |
+
|
| 165 |
flags_text = " | ".join(flags) if flags else "No major warning patterns detected."
|
| 166 |
|
| 167 |
+
insight = (
|
| 168 |
+
f"**Status:** {status} \n"
|
| 169 |
+
f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark}) \n"
|
| 170 |
+
f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f} \n"
|
| 171 |
+
f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass} \n"
|
| 172 |
+
f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing} \n"
|
| 173 |
+
f"**Flags:** {flags_text}"
|
| 174 |
)
|
| 175 |
|
| 176 |
+
# KPI table
|
| 177 |
+
kpi = pd.DataFrame(
|
| 178 |
[
|
| 179 |
("Total rows (filtered)", total),
|
| 180 |
("Students with numeric marks", n),
|
|
|
|
| 182 |
("Pass mark", pass_mark),
|
| 183 |
("Pass count", pass_count),
|
| 184 |
("Pass rate (%)", round(pass_rate, 2)),
|
| 185 |
+
("Borderline just below pass", borderline_fail),
|
| 186 |
+
("Borderline just above pass", borderline_pass),
|
| 187 |
("Average", round(mean, 2)),
|
| 188 |
("Std deviation", round(std, 2)),
|
| 189 |
("Minimum", round(minv, 2)),
|
|
|
|
| 193 |
("Outlier low threshold (IQR)", round(low_thr, 2)),
|
| 194 |
("Outlier high threshold (IQR)", round(high_thr, 2)),
|
| 195 |
("Outlier count (IQR)", outlier_count),
|
| 196 |
+
("Status", status),
|
| 197 |
],
|
| 198 |
columns=["Metric", "Value"],
|
| 199 |
)
|
| 200 |
|
| 201 |
# Charts
|
| 202 |
+
# 1 Histogram
|
| 203 |
fig1 = plt.figure()
|
| 204 |
plt.hist(valid["_marks"].dropna(), bins=12)
|
| 205 |
+
plt.axvline(pass_mark, linestyle="--")
|
| 206 |
plt.title("Marks distribution (Histogram)")
|
| 207 |
plt.xlabel("Marks")
|
| 208 |
plt.ylabel("Students")
|
| 209 |
|
| 210 |
+
# 2 CDF
|
| 211 |
fig2 = plt.figure()
|
| 212 |
xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
|
| 213 |
ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
|
| 214 |
if len(xs):
|
| 215 |
plt.plot(xs, ys)
|
| 216 |
+
plt.axvline(pass_mark, linestyle="--")
|
| 217 |
plt.title("CDF (Proportion of students β€ mark)")
|
| 218 |
plt.xlabel("Marks")
|
| 219 |
plt.ylabel("Proportion")
|
| 220 |
|
| 221 |
+
# 3 Grade distribution
|
| 222 |
fig3 = plt.figure()
|
| 223 |
gd = grade_dist.set_index(grade_col)["count"]
|
| 224 |
plt.bar(gd.index.astype(str), gd.values)
|
|
|
|
| 227 |
plt.ylabel("Count")
|
| 228 |
plt.xticks(rotation=45, ha="right")
|
| 229 |
|
| 230 |
+
# 4 Boxplot by grade (moderation)
|
| 231 |
fig4 = plt.figure()
|
| 232 |
if not grade_stats.empty:
|
| 233 |
+
order = grade_stats["Grade"].tolist()
|
| 234 |
data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
|
| 235 |
+
plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
|
| 236 |
plt.title("Marks spread by Grade (Boxplot)")
|
| 237 |
plt.xlabel("Grade")
|
| 238 |
plt.ylabel("Marks")
|
|
|
|
| 240 |
else:
|
| 241 |
plt.title("Marks spread by Grade (Boxplot)")
|
| 242 |
|
| 243 |
+
# 5 Section comparison (optional)
|
| 244 |
+
fig5 = plt.figure()
|
| 245 |
+
section_table = pd.DataFrame()
|
| 246 |
+
if section_col and section_col in d.columns and n:
|
| 247 |
+
sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index()
|
| 248 |
+
sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values
|
| 249 |
+
sec = sec.rename(columns={section_col: "Section"})
|
| 250 |
+
section_table = sec.sort_values("pass_rate_%", ascending=False)
|
| 251 |
+
|
| 252 |
+
plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values)
|
| 253 |
+
plt.title("Section-wise Pass Rate (%)")
|
| 254 |
+
plt.xlabel("Section")
|
| 255 |
+
plt.ylabel("Pass rate (%)")
|
| 256 |
+
plt.xticks(rotation=45, ha="right")
|
| 257 |
+
else:
|
| 258 |
+
plt.title("Section-wise Pass Rate (%) β not available (no section column)")
|
| 259 |
+
plt.axis("off")
|
| 260 |
+
|
| 261 |
+
return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5
|
| 262 |
|
| 263 |
|
| 264 |
# =============================
|
| 265 |
# UI
|
| 266 |
# =============================
|
| 267 |
with gr.Blocks(title="HoD Result Dashboard") as demo:
|
| 268 |
+
gr.Markdown("## π HoD Result Dashboard β Insights & Patterns (No Student Tables, No PDF)")
|
| 269 |
|
| 270 |
+
file_bytes_state = gr.State(None) # bytes
|
| 271 |
+
sheet_state = gr.State(None) # sheet name string
|
| 272 |
|
| 273 |
with gr.Row():
|
| 274 |
upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
|
|
|
|
| 287 |
course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
|
| 288 |
section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
|
| 289 |
|
| 290 |
+
analyze_btn = gr.Button("π Refresh HoD Dashboard")
|
| 291 |
|
| 292 |
insight_md = gr.Markdown("")
|
| 293 |
|
| 294 |
with gr.Tab("Tables"):
|
| 295 |
with gr.Row():
|
| 296 |
kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
|
| 297 |
+
pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
|
| 298 |
with gr.Row():
|
| 299 |
+
grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True)
|
| 300 |
+
heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True)
|
| 301 |
+
grade_stats_table = gr.Dataframe(label="Grade β Marks (Moderation evidence)", interactive=False, wrap=True)
|
| 302 |
+
section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True)
|
| 303 |
|
| 304 |
with gr.Tab("Charts"):
|
| 305 |
with gr.Row():
|
|
|
|
| 308 |
with gr.Row():
|
| 309 |
grade_plot = gr.Plot(label="Grade distribution")
|
| 310 |
grade_box = gr.Plot(label="Boxplot by grade")
|
| 311 |
+
section_plot = gr.Plot(label="Section-wise pass rate")
|
| 312 |
|
| 313 |
+
# -------- callbacks
|
| 314 |
def on_upload(file_obj):
|
| 315 |
+
b = _read_file_bytes(file_obj)
|
| 316 |
+
xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl")
|
| 317 |
sheets = xls.sheet_names or []
|
| 318 |
if not sheets:
|
| 319 |
+
raise gr.Error("No sheets found in workbook.")
|
| 320 |
|
| 321 |
sheet0 = sheets[0]
|
| 322 |
+
df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl"))
|
|
|
|
| 323 |
|
|
|
|
| 324 |
cols = list(df0.columns)
|
| 325 |
+
m_guess = _guess_marks_col(df0)
|
| 326 |
+
g_guess = _guess_grade_col(cols)
|
| 327 |
+
c_guess, s_guess = _guess_optional(cols)
|
| 328 |
|
| 329 |
+
# Optional filters
|
| 330 |
+
course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
|
| 331 |
+
section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))
|
| 332 |
|
| 333 |
+
course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
|
| 334 |
+
section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
|
| 335 |
|
| 336 |
if c_guess and c_guess in df0.columns:
|
| 337 |
vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
|
| 338 |
+
course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
|
| 339 |
|
| 340 |
if s_guess and s_guess in df0.columns:
|
| 341 |
vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
|
| 342 |
+
section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)
|
| 343 |
|
| 344 |
return (
|
| 345 |
gr.update(choices=sheets, value=sheet0, interactive=True), # sheet_dd
|
| 346 |
gr.update(choices=cols, value=m_guess, interactive=True), # marks_col
|
| 347 |
gr.update(choices=cols, value=g_guess, interactive=True), # grade_col
|
| 348 |
+
course_col_upd,
|
| 349 |
+
section_col_upd,
|
| 350 |
+
course_filter_upd,
|
| 351 |
+
section_filter_upd,
|
| 352 |
+
b, # file_bytes_state
|
| 353 |
+
sheet0, # sheet_state
|
| 354 |
)
|
| 355 |
|
| 356 |
upload.change(
|
| 357 |
fn=on_upload,
|
| 358 |
inputs=[upload],
|
| 359 |
+
outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state],
|
| 360 |
)
|
| 361 |
|
| 362 |
def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
|
| 363 |
if not file_bytes:
|
| 364 |
raise gr.Error("Upload Excel first.")
|
| 365 |
|
| 366 |
+
df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
|
|
|
|
| 367 |
|
| 368 |
cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
|
| 369 |
sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
|
|
|
|
| 380 |
|
| 381 |
sheet_dd.change(
|
| 382 |
fn=on_sheet_change,
|
| 383 |
+
inputs=[sheet_dd, file_bytes_state, course_col, section_col],
|
| 384 |
outputs=[course_filter, section_filter, sheet_state],
|
| 385 |
)
|
| 386 |
|
|
|
|
| 390 |
if not sheet_name:
|
| 391 |
raise gr.Error("Select a sheet.")
|
| 392 |
|
| 393 |
+
df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))
|
|
|
|
| 394 |
|
| 395 |
+
kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights(
|
| 396 |
df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
|
| 397 |
)
|
| 398 |
|
| 399 |
return (
|
| 400 |
+
"### HoD Insight\n\n" + insight,
|
| 401 |
+
kpi, pct, gdist, heap, gstats, sec_tbl,
|
| 402 |
+
f1, f2, f3, f4, f5
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
)
|
| 404 |
|
| 405 |
analyze_btn.click(
|
| 406 |
fn=on_refresh,
|
| 407 |
+
inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
|
| 408 |
+
outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table,
|
| 409 |
+
hist_plot, cdf_plot, grade_plot, grade_box, section_plot],
|
| 410 |
)
|
| 411 |
|
| 412 |
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|