Deevyankar commited on
Commit
75845be
Β·
verified Β·
1 Parent(s): eae0add

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -124
app.py CHANGED
@@ -1,16 +1,20 @@
1
- # app.py
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
5
  import plotly.express as px
 
6
 
7
  st.set_page_config(page_title="Excel β†’ Management Insights (Power BI style)", layout="wide")
8
 
9
  st.title("πŸ“Š Excel β†’ Interactive Management Dashboard (Power BI style)")
10
- st.caption("Grade-based decision rule: **PASS if Grade β‰₯ C (including C, C+, B-, etc.)** and **FAIL if below C (C-, D, F, etc.)**. Marks thresholds are not used.")
 
 
 
11
 
12
  # -----------------------------
13
- # Grade logic (FINAL as per you)
14
  # -----------------------------
15
  def grade_pass_fail(g):
16
  if pd.isna(g):
@@ -33,48 +37,146 @@ def grade_pass_fail(g):
33
 
34
  return "Unknown"
35
 
36
- def pick_grade_column(df: pd.DataFrame) -> str:
37
- # User confirmed "Grade is last column" β€” we still try to be robust.
38
- candidates = [c for c in df.columns if "grade" in str(c).lower()]
39
- if candidates:
40
- return candidates[-1]
41
- return df.columns[-1]
42
 
43
  def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
44
- # Clean common trailing spaces
45
  df = df.copy()
46
  df.columns = [str(c).strip() for c in df.columns]
47
  return df
48
 
 
 
 
 
 
 
 
49
  def coerce_numeric(df: pd.DataFrame, cols):
50
  for c in cols:
51
  if c in df.columns:
52
  df[c] = pd.to_numeric(df[c], errors="coerce")
53
  return df
54
 
55
- def describe_fail_reason(row, components):
56
- # Human-readable reason (simple, management-friendly)
57
- if row.get("PassFail") != "Fail":
58
- return ""
59
- hints = []
60
- for c in components:
61
- v = row.get(c)
62
- if pd.notna(v):
63
- # rough, non-controversial hinting β€” not using thresholds for pass/fail
64
- if c.lower().find("final") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
65
- hints.append("Final exam is in the lower quartile")
66
- if c.lower().find("lab") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
67
- hints.append("Lab total is in the lower quartile")
68
- if c.lower().find("mid") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
69
- hints.append("Mid exam is in the lower quartile")
70
- if c.lower().find("test") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
71
- hints.append("Test score is in the lower quartile")
72
- if not hints:
73
- return "Grade below C (check component performance & attendance/assessment issues)."
74
- return " | ".join(hints)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  # -----------------------------
77
- # Upload + read
78
  # -----------------------------
79
  uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
80
 
@@ -82,65 +184,35 @@ if uploaded is None:
82
  st.info("Upload an Excel file to begin.")
83
  st.stop()
84
 
85
- xls = pd.ExcelFile(uploaded)
86
- sheet = st.selectbox("Select sheet", xls.sheet_names, index=0)
87
- raw = pd.read_excel(uploaded, sheet_name=sheet)
88
- raw = normalize_headers(raw)
89
 
90
- # Try to remove non-student rows (robust: keep rows with any numeric marks OR any grade-like text)
91
- grade_col_name = pick_grade_column(raw)
92
- tmp_grade = raw[grade_col_name].astype(str).str.strip()
93
- grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
94
 
95
- numeric_cols_guess = [c for c in raw.columns if c != grade_col_name]
96
- numeric_signal = raw[numeric_cols_guess].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
97
 
98
- df = raw[grade_like | numeric_signal].copy()
99
 
100
- # Add Sno if exists, else create row id
101
- sno_col = None
102
- for c in df.columns:
103
- if str(c).strip().lower() in ["sno", "sno.", "sr", "sr.", "id", "studentid", "student id"]:
104
- sno_col = c
105
- break
106
- if sno_col is None:
107
- df.insert(0, "Sno", range(1, len(df) + 1))
108
- sno_col = "Sno"
109
 
110
- # Grade column
111
  df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
112
  df["PassFail"] = df["Grade"].apply(grade_pass_fail)
113
  df["Pass"] = df["PassFail"].eq("Pass")
114
  df["Fail"] = df["PassFail"].eq("Fail")
 
115
 
116
- # Identify likely mark columns (common names; if not found, pick numeric ones)
117
- common_components = ["Test -1", "Test-1", "Test 1", "Mid Exam", "Mid", "Lab Total", "Final Exam", "Total"]
118
- component_cols = [c for c in df.columns if c in common_components]
119
- if not component_cols:
120
- # fallback: all numeric columns except Sno
121
- num_cols = df.columns[df.apply(lambda s: pd.to_numeric(s, errors="coerce").notna().mean() > 0.4)]
122
- component_cols = [c for c in num_cols if c != sno_col]
123
-
124
- # Coerce numerics (if present)
125
  df = coerce_numeric(df, component_cols)
126
 
127
- # Consistency score (std across available components)
128
- if len(component_cols) >= 2:
129
- df["Consistency_SD"] = df[component_cols].std(axis=1, skipna=True)
130
- else:
131
- df["Consistency_SD"] = np.nan
132
-
133
- # Global for hinting
134
- components_df = df.copy()
135
-
136
- # Optional β€œFail reason” (for drilldown / risk view)
137
- if component_cols:
138
- df["FailReasonHint"] = df.apply(lambda r: describe_fail_reason(r, component_cols), axis=1)
139
- else:
140
- df["FailReasonHint"] = np.where(df["Fail"], "Grade below C.", "")
141
 
142
  # -----------------------------
143
- # Sidebar: β€œPower BI pages”
144
  # -----------------------------
145
  st.sidebar.header("Perspective")
146
  view = st.sidebar.radio(
@@ -150,7 +222,9 @@ view = st.sidebar.radio(
150
  )
151
 
152
  st.sidebar.header("Filters")
153
- pf = st.sidebar.multiselect("Pass/Fail", ["Pass", "Fail", "Unknown"], default=["Pass", "Fail", "Unknown"])
 
 
154
  grade_unique = sorted([g for g in df["Grade"].dropna().unique()])
155
  sel_grades = st.sidebar.multiselect("Grades", grade_unique, default=grade_unique)
156
 
@@ -161,9 +235,12 @@ filtered = filtered[filtered["Grade"].isin(sel_grades)]
161
  # KPI Row
162
  # -----------------------------
163
  k1, k2, k3, k4, k5 = st.columns(5)
164
- with k1: st.metric("Students", int(filtered.shape[0]))
165
- with k2: st.metric("Pass", int(filtered["Pass"].sum()))
166
- with k3: st.metric("Fail", int(filtered["Fail"].sum()))
 
 
 
167
  with k4:
168
  pr = (filtered["Pass"].mean() * 100) if filtered.shape[0] else 0
169
  st.metric("Pass Rate", f"{pr:.1f}%")
@@ -178,7 +255,7 @@ st.divider()
178
  # -----------------------------
179
  # Views
180
  # -----------------------------
181
- def executive_view(d):
182
  left, right = st.columns([1, 1])
183
 
184
  with left:
@@ -198,9 +275,10 @@ def executive_view(d):
198
  st.subheader("Hidden Patterns (Quick Signals)")
199
  c1, c2, c3 = st.columns(3)
200
 
201
- # Pattern: Strong Lab but Fail (if lab exists)
202
- if any("Lab" in c for c in component_cols):
203
- lab_col = [c for c in component_cols if "Lab" in c][0]
 
204
  strong_lab_fail = d[(d["Fail"]) & (d[lab_col].notna()) & (d[lab_col] >= d[lab_col].quantile(0.75))]
205
  with c1:
206
  st.metric("Fail with Strong Lab", int(strong_lab_fail.shape[0]))
@@ -208,7 +286,7 @@ def executive_view(d):
208
  with c1:
209
  st.metric("Fail with Strong Lab", "β€”")
210
 
211
- # Pattern: Inconsistent high SD
212
  if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
213
  top_incons = d["Consistency_SD"].quantile(0.90)
214
  with c2:
@@ -217,7 +295,7 @@ def executive_view(d):
217
  with c2:
218
  st.metric("High Inconsistency (Top 10%)", "β€”")
219
 
220
- # Pattern: Fail with good Total (if Total exists)
221
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
222
  good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
223
  with c3:
@@ -226,97 +304,115 @@ def executive_view(d):
226
  with c3:
227
  st.metric("Fail with High Total", "β€”")
228
 
229
- if component_cols and "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
 
 
230
  st.subheader("What Drives Total? (Correlation)")
231
- corr_cols = [c for c in component_cols if c in d.columns] + ["Total"]
232
  corr = d[corr_cols].corr(numeric_only=True)
233
  fig = px.imshow(corr, text_auto=True, aspect="auto")
234
  st.plotly_chart(fig, use_container_width=True)
235
 
236
- def risk_view(d):
237
- st.subheader("Fail List (Grade < C)")
 
238
  fails = d[d["Fail"]].copy()
239
 
240
- # Bucket: C- vs D/F etc.
 
 
 
241
  fails["FailType"] = np.where(fails["Grade"].str.startswith("C-"), "C- (Borderline Fail)", "Below C")
 
242
  bucket = fails["FailType"].value_counts().reset_index()
243
  bucket.columns = ["Fail Type", "Count"]
 
244
  c1, c2 = st.columns([1, 2])
245
  with c1:
246
  fig = px.bar(bucket, x="Fail Type", y="Count")
247
  st.plotly_chart(fig, use_container_width=True)
 
248
  with c2:
249
  show_cols = [sno_col, "Grade", "PassFail"]
250
  for c in ["Total"] + component_cols:
251
  if c in fails.columns and c not in show_cols:
252
  show_cols.append(c)
253
- show_cols += ["FailReasonHint"]
254
- st.dataframe(fails[show_cols].sort_values(by=["Grade", sno_col]), use_container_width=True, height=420)
 
 
 
 
 
255
 
256
  st.subheader("Intervention Suggestions (Management-friendly)")
257
  st.markdown(
258
  """
259
- - **Many C- failures** β†’ run targeted revision + re-assessment readiness support (borderline group).
260
- - **Failures concentrated with low Final** β†’ strengthen exam preparation (mock exams + feedback).
261
- - **Failures with strong Lab** β†’ review exam alignment, study strategy, and assessment balance.
262
  """
263
  )
264
 
265
- def assessment_quality_view(d):
 
266
  st.subheader("Assessment Component Overview")
267
- if not component_cols:
268
- st.warning("No numeric component columns detected. Add columns like Test/Mid/Lab/Final/Total for deeper assessment analysis.")
 
 
269
  return
270
 
271
- # Component distributions
272
- comp = st.selectbox("Choose component", component_cols, index=min(0, len(component_cols)-1))
273
  fig = px.histogram(d, x=comp, nbins=20)
274
  st.plotly_chart(fig, use_container_width=True)
275
 
276
- # Component vs Grade
277
  st.subheader("Component vs Grade (Boxplot)")
278
  fig = px.box(d, x="Grade", y=comp)
279
  st.plotly_chart(fig, use_container_width=True)
280
 
281
- # Zero / missing checks
282
  st.subheader("Data Quality Flags")
283
  flags = []
284
- for c in component_cols:
285
  series = d[c]
286
- if pd.api.types.is_numeric_dtype(series):
287
- missing = int(series.isna().sum())
288
- zeros = int((series == 0).sum())
289
- flags.append({"Component": c, "Missing": missing, "Zeros": zeros})
290
  st.dataframe(pd.DataFrame(flags), use_container_width=True)
291
 
292
- # If Total exists: correlation heatmap
293
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
294
  st.subheader("Correlation Heatmap")
295
- corr_cols = [c for c in component_cols if c in d.columns] + ["Total"]
296
  corr = d[corr_cols].corr(numeric_only=True)
297
  fig = px.imshow(corr, text_auto=True, aspect="auto")
298
  st.plotly_chart(fig, use_container_width=True)
299
 
300
- def student_drilldown_view(d):
 
301
  st.subheader("Student Drill-down")
302
- st.caption("Pick a student to view component breakdown and the grade-based decision.")
 
303
  sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
304
  row = d[d[sno_col] == sid].iloc[0]
305
 
306
  c1, c2, c3 = st.columns(3)
307
- with c1: st.metric("Grade", str(row.get("Grade", "β€”")))
308
- with c2: st.metric("Status", str(row.get("PassFail", "β€”")))
 
 
309
  with c3:
310
  if "Total" in d.columns and pd.notna(row.get("Total", np.nan)):
311
  st.metric("Total", f"{row['Total']:.2f}")
312
  else:
313
  st.metric("Total", "β€”")
314
 
315
- st.write("**Reason (simple hint):**", row.get("FailReasonHint", ""))
 
 
316
 
317
- # Component bar
318
- if component_cols:
319
- comp_vals = {c: row.get(c) for c in component_cols if c in d.columns}
320
  comp_df = pd.DataFrame({"Component": list(comp_vals.keys()), "Score": list(comp_vals.values())})
321
  fig = px.bar(comp_df, x="Component", y="Score")
322
  st.plotly_chart(fig, use_container_width=True)
@@ -324,19 +420,28 @@ def student_drilldown_view(d):
324
  st.subheader("Raw record")
325
  st.dataframe(pd.DataFrame(row).T, use_container_width=True)
326
 
327
- def export_view(d):
 
328
  st.subheader("Export for Power BI")
329
- st.caption("Download cleaned data with the computed PassFail fields. Load into Power BI (Get Data β†’ Text/CSV).")
330
 
331
  clean_csv = d.to_csv(index=False).encode("utf-8")
332
- st.download_button("⬇️ Download Cleaned Data (CSV)", clean_csv, file_name="cleaned_marks_with_passfail.csv", mime="text/csv")
 
 
 
 
 
333
 
334
  st.subheader("Recommended Power BI Measures (DAX)")
335
- st.code(r"""
 
336
  Pass Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Pass")
337
  Fail Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Fail")
338
  Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
339
- """, language="text")
 
 
340
 
341
  st.subheader("Summary Tables")
342
  grade_summary = d["Grade"].value_counts(dropna=False).reset_index()
@@ -347,7 +452,8 @@ Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
347
  pf_summary.columns = ["PassFail", "Count"]
348
  st.dataframe(pf_summary, use_container_width=True)
349
 
350
- # Render selected view
 
351
  if view == "Executive (Management)":
352
  executive_view(filtered)
353
  elif view == "Risk & Intervention":
 
1
+ # app.py (FULL REPLACEMENT - HF/Streamlit safe upload + grade>=C pass logic)
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
5
  import plotly.express as px
6
+ import io
7
 
8
  st.set_page_config(page_title="Excel β†’ Management Insights (Power BI style)", layout="wide")
9
 
10
  st.title("πŸ“Š Excel β†’ Interactive Management Dashboard (Power BI style)")
11
+ st.caption(
12
+ "Decision rule: **PASS if Grade β‰₯ C (C, C+, B-, etc.)** and **FAIL if below C (C-, D, F, etc.)**. "
13
+ "This dashboard uses the **Grade column only** for pass/fail."
14
+ )
15
 
16
  # -----------------------------
17
+ # Grade logic (FINAL as per user)
18
  # -----------------------------
19
  def grade_pass_fail(g):
20
  if pd.isna(g):
 
37
 
38
  return "Unknown"
39
 
 
 
 
 
 
 
40
 
41
  def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
 
42
  df = df.copy()
43
  df.columns = [str(c).strip() for c in df.columns]
44
  return df
45
 
46
+
47
+ def pick_grade_column(df: pd.DataFrame) -> str:
48
+ # Prefer columns containing "grade"; otherwise last column
49
+ candidates = [c for c in df.columns if "grade" in str(c).lower()]
50
+ return candidates[-1] if candidates else df.columns[-1]
51
+
52
+
53
  def coerce_numeric(df: pd.DataFrame, cols):
54
  for c in cols:
55
  if c in df.columns:
56
  df[c] = pd.to_numeric(df[c], errors="coerce")
57
  return df
58
 
59
+
60
+ @st.cache_data(show_spinner=False)
61
+ def list_sheets(file_bytes: bytes):
62
+ bio = io.BytesIO(file_bytes)
63
+ xls = pd.ExcelFile(bio)
64
+ return xls.sheet_names
65
+
66
+
67
+ @st.cache_data(show_spinner=False)
68
+ def read_excel_sheet(file_bytes: bytes, sheet_name: str):
69
+ bio = io.BytesIO(file_bytes)
70
+ df = pd.read_excel(bio, sheet_name=sheet_name)
71
+ return df
72
+
73
+
74
+ def detect_student_rows(df: pd.DataFrame, grade_col: str) -> pd.DataFrame:
75
+ """
76
+ Removes non-student rows robustly:
77
+ Keeps rows that look like grade entries (A, B+, C-, etc.) OR have numeric marks in other cols.
78
+ """
79
+ tmp_grade = df[grade_col].astype(str).str.strip()
80
+ grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
81
+
82
+ other_cols = [c for c in df.columns if c != grade_col]
83
+ numeric_signal = df[other_cols].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
84
+
85
+ cleaned = df[grade_like | numeric_signal].copy()
86
+ return cleaned
87
+
88
+
89
+ def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
90
+ sno_col = None
91
+ for c in df.columns:
92
+ if str(c).strip().lower() in ["sno", "sno.", "sr", "sr.", "id", "studentid", "student id"]:
93
+ sno_col = c
94
+ break
95
+ if sno_col is None:
96
+ df = df.copy()
97
+ df.insert(0, "Sno", range(1, len(df) + 1))
98
+ sno_col = "Sno"
99
+ return df, sno_col
100
+
101
+
102
+ def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list[str]:
103
+ common = [
104
+ "Test -1", "Test-1", "Test 1", "Test",
105
+ "Mid Exam", "Mid", "Midterm",
106
+ "Lab Total", "Lab",
107
+ "Final Exam", "Final",
108
+ "Total"
109
+ ]
110
+ component_cols = [c for c in df.columns if c in common and c not in [grade_col, sno_col]]
111
+
112
+ if not component_cols:
113
+ # fallback: numeric columns other than sno and grade
114
+ numeric_cols = []
115
+ for c in df.columns:
116
+ if c in [grade_col, sno_col]:
117
+ continue
118
+ s = pd.to_numeric(df[c], errors="coerce")
119
+ if s.notna().mean() > 0.4:
120
+ numeric_cols.append(c)
121
+ component_cols = numeric_cols
122
+
123
+ # Keep ordering friendly
124
+ preferred_order = ["Test -1", "Test-1", "Test 1", "Test", "Mid Exam", "Mid", "Midterm", "Lab Total", "Lab", "Final Exam", "Final", "Total"]
125
+ ordered = [c for c in preferred_order if c in component_cols]
126
+ for c in component_cols:
127
+ if c not in ordered:
128
+ ordered.append(c)
129
+ return ordered
130
+
131
+
132
+ def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
133
+ df = df.copy()
134
+ cols_for_sd = [c for c in component_cols if c.lower() != "total"]
135
+ if len(cols_for_sd) >= 2:
136
+ df["Consistency_SD"] = df[cols_for_sd].std(axis=1, skipna=True)
137
+ else:
138
+ df["Consistency_SD"] = np.nan
139
+ return df
140
+
141
+
142
+ def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
143
+ """
144
+ Simple, management-friendly hints (NOT used for pass/fail).
145
+ """
146
+ df = df.copy()
147
+ comps = [c for c in component_cols if c.lower() != "total" and pd.api.types.is_numeric_dtype(df[c])]
148
+ if not comps:
149
+ df["FailReasonHint"] = np.where(df["PassFail"] == "Fail", "Grade below C.", "")
150
+ return df
151
+
152
+ # Precompute quartiles safely
153
+ q25 = {c: df[c].dropna().quantile(0.25) if df[c].dropna().shape[0] else np.nan for c in comps}
154
+
155
+ def reason(row):
156
+ if row.get("PassFail") != "Fail":
157
+ return ""
158
+ hints = []
159
+ for c in comps:
160
+ v = row.get(c)
161
+ if pd.notna(v) and pd.notna(q25[c]) and v < q25[c]:
162
+ if "final" in c.lower():
163
+ hints.append("Final exam is in the lower quartile")
164
+ elif "lab" in c.lower():
165
+ hints.append("Lab total is in the lower quartile")
166
+ elif "mid" in c.lower():
167
+ hints.append("Mid exam is in the lower quartile")
168
+ elif "test" in c.lower():
169
+ hints.append("Test score is in the lower quartile")
170
+ else:
171
+ hints.append(f"{c} is in the lower quartile")
172
+ return " | ".join(hints) if hints else "Grade below C (review support plan)."
173
+
174
+ df["FailReasonHint"] = df.apply(reason, axis=1)
175
+ return df
176
+
177
 
178
  # -----------------------------
179
+ # Upload + Read (HF SAFE)
180
  # -----------------------------
181
  uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
182
 
 
184
  st.info("Upload an Excel file to begin.")
185
  st.stop()
186
 
187
+ file_bytes = uploaded.getvalue()
 
 
 
188
 
189
+ sheet_names = list_sheets(file_bytes)
190
+ sheet = st.selectbox("Select sheet", sheet_names, index=0)
 
 
191
 
192
+ raw = read_excel_sheet(file_bytes, sheet)
193
+ raw = normalize_headers(raw)
194
 
195
+ grade_col_name = pick_grade_column(raw)
196
 
197
+ df = detect_student_rows(raw, grade_col_name)
198
+ df, sno_col = ensure_sno(df)
 
 
 
 
 
 
 
199
 
200
+ # Grade column from chosen grade column name (fallback to last column already handled)
201
  df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
202
  df["PassFail"] = df["Grade"].apply(grade_pass_fail)
203
  df["Pass"] = df["PassFail"].eq("Pass")
204
  df["Fail"] = df["PassFail"].eq("Fail")
205
+ df["At_Risk"] = df["Fail"]
206
 
207
+ # Components (optional for insights)
208
+ component_cols = infer_component_cols(df, grade_col_name, sno_col)
 
 
 
 
 
 
 
209
  df = coerce_numeric(df, component_cols)
210
 
211
+ df = add_consistency(df, component_cols)
212
+ df = make_fail_reason_hints(df, component_cols)
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  # -----------------------------
215
+ # Sidebar: "Power BI pages"
216
  # -----------------------------
217
  st.sidebar.header("Perspective")
218
  view = st.sidebar.radio(
 
222
  )
223
 
224
  st.sidebar.header("Filters")
225
+ pf_choices = ["Pass", "Fail", "Unknown"]
226
+ pf = st.sidebar.multiselect("Pass/Fail", pf_choices, default=pf_choices)
227
+
228
  grade_unique = sorted([g for g in df["Grade"].dropna().unique()])
229
  sel_grades = st.sidebar.multiselect("Grades", grade_unique, default=grade_unique)
230
 
 
235
  # KPI Row
236
  # -----------------------------
237
  k1, k2, k3, k4, k5 = st.columns(5)
238
+ with k1:
239
+ st.metric("Students", int(filtered.shape[0]))
240
+ with k2:
241
+ st.metric("Pass", int(filtered["Pass"].sum()))
242
+ with k3:
243
+ st.metric("Fail", int(filtered["Fail"].sum()))
244
  with k4:
245
  pr = (filtered["Pass"].mean() * 100) if filtered.shape[0] else 0
246
  st.metric("Pass Rate", f"{pr:.1f}%")
 
255
  # -----------------------------
256
  # Views
257
  # -----------------------------
258
+ def executive_view(d: pd.DataFrame):
259
  left, right = st.columns([1, 1])
260
 
261
  with left:
 
275
  st.subheader("Hidden Patterns (Quick Signals)")
276
  c1, c2, c3 = st.columns(3)
277
 
278
+ # Strong Lab but Fail (if any lab-like col exists)
279
+ lab_candidates = [c for c in component_cols if "lab" in c.lower() and c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
280
+ if lab_candidates:
281
+ lab_col = lab_candidates[0]
282
  strong_lab_fail = d[(d["Fail"]) & (d[lab_col].notna()) & (d[lab_col] >= d[lab_col].quantile(0.75))]
283
  with c1:
284
  st.metric("Fail with Strong Lab", int(strong_lab_fail.shape[0]))
 
286
  with c1:
287
  st.metric("Fail with Strong Lab", "β€”")
288
 
289
+ # High inconsistency
290
  if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
291
  top_incons = d["Consistency_SD"].quantile(0.90)
292
  with c2:
 
295
  with c2:
296
  st.metric("High Inconsistency (Top 10%)", "β€”")
297
 
298
+ # Fail with high Total (if Total exists)
299
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
300
  good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
301
  with c3:
 
304
  with c3:
305
  st.metric("Fail with High Total", "β€”")
306
 
307
+ # Correlation only if Total + numeric components exist
308
+ numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
309
+ if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and numeric_comps:
310
  st.subheader("What Drives Total? (Correlation)")
311
+ corr_cols = numeric_comps + ["Total"]
312
  corr = d[corr_cols].corr(numeric_only=True)
313
  fig = px.imshow(corr, text_auto=True, aspect="auto")
314
  st.plotly_chart(fig, use_container_width=True)
315
 
316
+
317
+ def risk_view(d: pd.DataFrame):
318
+ st.subheader("Fail List (Grade below C)")
319
  fails = d[d["Fail"]].copy()
320
 
321
+ if fails.empty:
322
+ st.success("No failing students in the current filter.")
323
+ return
324
+
325
  fails["FailType"] = np.where(fails["Grade"].str.startswith("C-"), "C- (Borderline Fail)", "Below C")
326
+
327
  bucket = fails["FailType"].value_counts().reset_index()
328
  bucket.columns = ["Fail Type", "Count"]
329
+
330
  c1, c2 = st.columns([1, 2])
331
  with c1:
332
  fig = px.bar(bucket, x="Fail Type", y="Count")
333
  st.plotly_chart(fig, use_container_width=True)
334
+
335
  with c2:
336
  show_cols = [sno_col, "Grade", "PassFail"]
337
  for c in ["Total"] + component_cols:
338
  if c in fails.columns and c not in show_cols:
339
  show_cols.append(c)
340
+ show_cols.append("FailReasonHint")
341
+
342
+ st.dataframe(
343
+ fails[show_cols].sort_values(by=["Grade", sno_col]),
344
+ use_container_width=True,
345
+ height=420
346
+ )
347
 
348
  st.subheader("Intervention Suggestions (Management-friendly)")
349
  st.markdown(
350
  """
351
+ - **Many C- failures** β†’ target borderline support (revision plan + short formative checks).
352
+ - **Failures linked with low Final** β†’ structured exam-prep support (mock tests + feedback).
353
+ - **Failures with strong Lab** β†’ review exam alignment + study strategy support.
354
  """
355
  )
356
 
357
+
358
+ def assessment_quality_view(d: pd.DataFrame):
359
  st.subheader("Assessment Component Overview")
360
+
361
+ numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
362
+ if not numeric_comps:
363
+ st.warning("No numeric component columns detected for assessment analysis. Add Test/Mid/Lab/Final columns for deeper analysis.")
364
  return
365
 
366
+ comp = st.selectbox("Choose component", numeric_comps, index=0)
 
367
  fig = px.histogram(d, x=comp, nbins=20)
368
  st.plotly_chart(fig, use_container_width=True)
369
 
 
370
  st.subheader("Component vs Grade (Boxplot)")
371
  fig = px.box(d, x="Grade", y=comp)
372
  st.plotly_chart(fig, use_container_width=True)
373
 
 
374
  st.subheader("Data Quality Flags")
375
  flags = []
376
+ for c in numeric_comps:
377
  series = d[c]
378
+ missing = int(series.isna().sum())
379
+ zeros = int((series == 0).sum())
380
+ flags.append({"Component": c, "Missing": missing, "Zeros": zeros})
 
381
  st.dataframe(pd.DataFrame(flags), use_container_width=True)
382
 
 
383
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
384
  st.subheader("Correlation Heatmap")
385
+ corr_cols = numeric_comps + ["Total"]
386
  corr = d[corr_cols].corr(numeric_only=True)
387
  fig = px.imshow(corr, text_auto=True, aspect="auto")
388
  st.plotly_chart(fig, use_container_width=True)
389
 
390
+
391
+ def student_drilldown_view(d: pd.DataFrame):
392
  st.subheader("Student Drill-down")
393
+ st.caption("Pick a student to view component breakdown and the grade-based decision (Grade β‰₯ C pass).")
394
+
395
  sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
396
  row = d[d[sno_col] == sid].iloc[0]
397
 
398
  c1, c2, c3 = st.columns(3)
399
+ with c1:
400
+ st.metric("Grade", str(row.get("Grade", "β€”")))
401
+ with c2:
402
+ st.metric("Status", str(row.get("PassFail", "β€”")))
403
  with c3:
404
  if "Total" in d.columns and pd.notna(row.get("Total", np.nan)):
405
  st.metric("Total", f"{row['Total']:.2f}")
406
  else:
407
  st.metric("Total", "β€”")
408
 
409
+ hint = row.get("FailReasonHint", "")
410
+ if hint:
411
+ st.write("**Reason hint:**", hint)
412
 
413
+ numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
414
+ if numeric_comps:
415
+ comp_vals = {c: row.get(c) for c in numeric_comps}
416
  comp_df = pd.DataFrame({"Component": list(comp_vals.keys()), "Score": list(comp_vals.values())})
417
  fig = px.bar(comp_df, x="Component", y="Score")
418
  st.plotly_chart(fig, use_container_width=True)
 
420
  st.subheader("Raw record")
421
  st.dataframe(pd.DataFrame(row).T, use_container_width=True)
422
 
423
+
424
+ def export_view(d: pd.DataFrame):
425
  st.subheader("Export for Power BI")
426
+ st.caption("Download cleaned data with computed PassFail fields. Load into Power BI (Get Data β†’ Text/CSV).")
427
 
428
  clean_csv = d.to_csv(index=False).encode("utf-8")
429
+ st.download_button(
430
+ "⬇️ Download Cleaned Data (CSV)",
431
+ clean_csv,
432
+ file_name="cleaned_marks_with_passfail.csv",
433
+ mime="text/csv"
434
+ )
435
 
436
  st.subheader("Recommended Power BI Measures (DAX)")
437
+ st.code(
438
+ r"""
439
  Pass Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Pass")
440
  Fail Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Fail")
441
  Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
442
+ """,
443
+ language="text",
444
+ )
445
 
446
  st.subheader("Summary Tables")
447
  grade_summary = d["Grade"].value_counts(dropna=False).reset_index()
 
452
  pf_summary.columns = ["PassFail", "Count"]
453
  st.dataframe(pf_summary, use_container_width=True)
454
 
455
+
456
+ # Render view
457
  if view == "Executive (Management)":
458
  executive_view(filtered)
459
  elif view == "Risk & Intervention":