Deevyankar commited on
Commit
6e3c696
Β·
verified Β·
1 Parent(s): 75845be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -73
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py (FULL REPLACEMENT - HF/Streamlit safe upload + grade>=C pass logic)
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
@@ -45,7 +45,6 @@ def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
45
 
46
 
47
  def pick_grade_column(df: pd.DataFrame) -> str:
48
- # Prefer columns containing "grade"; otherwise last column
49
  candidates = [c for c in df.columns if "grade" in str(c).lower()]
50
  return candidates[-1] if candidates else df.columns[-1]
51
 
@@ -57,33 +56,14 @@ def coerce_numeric(df: pd.DataFrame, cols):
57
  return df
58
 
59
 
60
- @st.cache_data(show_spinner=False)
61
- def list_sheets(file_bytes: bytes):
62
- bio = io.BytesIO(file_bytes)
63
- xls = pd.ExcelFile(bio)
64
- return xls.sheet_names
65
-
66
-
67
- @st.cache_data(show_spinner=False)
68
- def read_excel_sheet(file_bytes: bytes, sheet_name: str):
69
- bio = io.BytesIO(file_bytes)
70
- df = pd.read_excel(bio, sheet_name=sheet_name)
71
- return df
72
-
73
-
74
  def detect_student_rows(df: pd.DataFrame, grade_col: str) -> pd.DataFrame:
75
- """
76
- Removes non-student rows robustly:
77
- Keeps rows that look like grade entries (A, B+, C-, etc.) OR have numeric marks in other cols.
78
- """
79
  tmp_grade = df[grade_col].astype(str).str.strip()
80
  grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
81
 
82
  other_cols = [c for c in df.columns if c != grade_col]
83
  numeric_signal = df[other_cols].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
84
 
85
- cleaned = df[grade_like | numeric_signal].copy()
86
- return cleaned
87
 
88
 
89
  def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
@@ -110,7 +90,6 @@ def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list
110
  component_cols = [c for c in df.columns if c in common and c not in [grade_col, sno_col]]
111
 
112
  if not component_cols:
113
- # fallback: numeric columns other than sno and grade
114
  numeric_cols = []
115
  for c in df.columns:
116
  if c in [grade_col, sno_col]:
@@ -120,9 +99,8 @@ def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list
120
  numeric_cols.append(c)
121
  component_cols = numeric_cols
122
 
123
- # Keep ordering friendly
124
- preferred_order = ["Test -1", "Test-1", "Test 1", "Test", "Mid Exam", "Mid", "Midterm", "Lab Total", "Lab", "Final Exam", "Final", "Total"]
125
- ordered = [c for c in preferred_order if c in component_cols]
126
  for c in component_cols:
127
  if c not in ordered:
128
  ordered.append(c)
@@ -131,7 +109,7 @@ def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list
131
 
132
  def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
133
  df = df.copy()
134
- cols_for_sd = [c for c in component_cols if c.lower() != "total"]
135
  if len(cols_for_sd) >= 2:
136
  df["Consistency_SD"] = df[cols_for_sd].std(axis=1, skipna=True)
137
  else:
@@ -140,16 +118,13 @@ def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame
140
 
141
 
142
  def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
143
- """
144
- Simple, management-friendly hints (NOT used for pass/fail).
145
- """
146
  df = df.copy()
147
- comps = [c for c in component_cols if c.lower() != "total" and pd.api.types.is_numeric_dtype(df[c])]
 
148
  if not comps:
149
  df["FailReasonHint"] = np.where(df["PassFail"] == "Fail", "Grade below C.", "")
150
  return df
151
 
152
- # Precompute quartiles safely
153
  q25 = {c: df[c].dropna().quantile(0.25) if df[c].dropna().shape[0] else np.nan for c in comps}
154
 
155
  def reason(row):
@@ -159,13 +134,14 @@ def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.Da
159
  for c in comps:
160
  v = row.get(c)
161
  if pd.notna(v) and pd.notna(q25[c]) and v < q25[c]:
162
- if "final" in c.lower():
 
163
  hints.append("Final exam is in the lower quartile")
164
- elif "lab" in c.lower():
165
  hints.append("Lab total is in the lower quartile")
166
- elif "mid" in c.lower():
167
  hints.append("Mid exam is in the lower quartile")
168
- elif "test" in c.lower():
169
  hints.append("Test score is in the lower quartile")
170
  else:
171
  hints.append(f"{c} is in the lower quartile")
@@ -176,20 +152,32 @@ def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.Da
176
 
177
 
178
  # -----------------------------
179
- # Upload + Read (HF SAFE)
180
  # -----------------------------
181
- uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
182
 
183
  if uploaded is None:
184
  st.info("Upload an Excel file to begin.")
185
  st.stop()
186
 
187
- file_bytes = uploaded.getvalue()
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- sheet_names = list_sheets(file_bytes)
190
- sheet = st.selectbox("Select sheet", sheet_names, index=0)
191
 
192
- raw = read_excel_sheet(file_bytes, sheet)
 
 
193
  raw = normalize_headers(raw)
194
 
195
  grade_col_name = pick_grade_column(raw)
@@ -197,17 +185,14 @@ grade_col_name = pick_grade_column(raw)
197
  df = detect_student_rows(raw, grade_col_name)
198
  df, sno_col = ensure_sno(df)
199
 
200
- # Grade column from chosen grade column name (fallback to last column already handled)
201
  df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
202
  df["PassFail"] = df["Grade"].apply(grade_pass_fail)
203
  df["Pass"] = df["PassFail"].eq("Pass")
204
  df["Fail"] = df["PassFail"].eq("Fail")
205
  df["At_Risk"] = df["Fail"]
206
 
207
- # Components (optional for insights)
208
  component_cols = infer_component_cols(df, grade_col_name, sno_col)
209
  df = coerce_numeric(df, component_cols)
210
-
211
  df = add_consistency(df, component_cols)
212
  df = make_fail_reason_hints(df, component_cols)
213
 
@@ -275,7 +260,6 @@ def executive_view(d: pd.DataFrame):
275
  st.subheader("Hidden Patterns (Quick Signals)")
276
  c1, c2, c3 = st.columns(3)
277
 
278
- # Strong Lab but Fail (if any lab-like col exists)
279
  lab_candidates = [c for c in component_cols if "lab" in c.lower() and c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
280
  if lab_candidates:
281
  lab_col = lab_candidates[0]
@@ -286,7 +270,6 @@ def executive_view(d: pd.DataFrame):
286
  with c1:
287
  st.metric("Fail with Strong Lab", "β€”")
288
 
289
- # High inconsistency
290
  if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
291
  top_incons = d["Consistency_SD"].quantile(0.90)
292
  with c2:
@@ -295,7 +278,6 @@ def executive_view(d: pd.DataFrame):
295
  with c2:
296
  st.metric("High Inconsistency (Top 10%)", "β€”")
297
 
298
- # Fail with high Total (if Total exists)
299
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
300
  good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
301
  with c3:
@@ -304,7 +286,6 @@ def executive_view(d: pd.DataFrame):
304
  with c3:
305
  st.metric("Fail with High Total", "β€”")
306
 
307
- # Correlation only if Total + numeric components exist
308
  numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
309
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and numeric_comps:
310
  st.subheader("What Drives Total? (Correlation)")
@@ -345,11 +326,11 @@ def risk_view(d: pd.DataFrame):
345
  height=420
346
  )
347
 
348
- st.subheader("Intervention Suggestions (Management-friendly)")
349
  st.markdown(
350
  """
351
  - **Many C- failures** β†’ target borderline support (revision plan + short formative checks).
352
- - **Failures linked with low Final** β†’ structured exam-prep support (mock tests + feedback).
353
  - **Failures with strong Lab** β†’ review exam alignment + study strategy support.
354
  """
355
  )
@@ -360,7 +341,7 @@ def assessment_quality_view(d: pd.DataFrame):
360
 
361
  numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
362
  if not numeric_comps:
363
- st.warning("No numeric component columns detected for assessment analysis. Add Test/Mid/Lab/Final columns for deeper analysis.")
364
  return
365
 
366
  comp = st.selectbox("Choose component", numeric_comps, index=0)
@@ -390,7 +371,6 @@ def assessment_quality_view(d: pd.DataFrame):
390
 
391
  def student_drilldown_view(d: pd.DataFrame):
392
  st.subheader("Student Drill-down")
393
- st.caption("Pick a student to view component breakdown and the grade-based decision (Grade β‰₯ C pass).")
394
 
395
  sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
396
  row = d[d[sno_col] == sid].iloc[0]
@@ -423,7 +403,6 @@ def student_drilldown_view(d: pd.DataFrame):
423
 
424
  def export_view(d: pd.DataFrame):
425
  st.subheader("Export for Power BI")
426
- st.caption("Download cleaned data with computed PassFail fields. Load into Power BI (Get Data β†’ Text/CSV).")
427
 
428
  clean_csv = d.to_csv(index=False).encode("utf-8")
429
  st.download_button(
@@ -433,25 +412,6 @@ def export_view(d: pd.DataFrame):
433
  mime="text/csv"
434
  )
435
 
436
- st.subheader("Recommended Power BI Measures (DAX)")
437
- st.code(
438
- r"""
439
- Pass Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Pass")
440
- Fail Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Fail")
441
- Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
442
- """,
443
- language="text",
444
- )
445
-
446
- st.subheader("Summary Tables")
447
- grade_summary = d["Grade"].value_counts(dropna=False).reset_index()
448
- grade_summary.columns = ["Grade", "Count"]
449
- st.dataframe(grade_summary, use_container_width=True)
450
-
451
- pf_summary = d["PassFail"].value_counts(dropna=False).reset_index()
452
- pf_summary.columns = ["PassFail", "Count"]
453
- st.dataframe(pf_summary, use_container_width=True)
454
-
455
 
456
  # Render view
457
  if view == "Executive (Management)":
 
1
+ # app.py (FULL REPLACEMENT - fixes NoneType getvalue + grade>=C pass logic)
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
 
45
 
46
 
47
  def pick_grade_column(df: pd.DataFrame) -> str:
 
48
  candidates = [c for c in df.columns if "grade" in str(c).lower()]
49
  return candidates[-1] if candidates else df.columns[-1]
50
 
 
56
  return df
57
 
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def detect_student_rows(df: pd.DataFrame, grade_col: str) -> pd.DataFrame:
 
 
 
 
60
  tmp_grade = df[grade_col].astype(str).str.strip()
61
  grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
62
 
63
  other_cols = [c for c in df.columns if c != grade_col]
64
  numeric_signal = df[other_cols].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
65
 
66
+ return df[grade_like | numeric_signal].copy()
 
67
 
68
 
69
  def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
 
90
  component_cols = [c for c in df.columns if c in common and c not in [grade_col, sno_col]]
91
 
92
  if not component_cols:
 
93
  numeric_cols = []
94
  for c in df.columns:
95
  if c in [grade_col, sno_col]:
 
99
  numeric_cols.append(c)
100
  component_cols = numeric_cols
101
 
102
+ preferred = ["Test -1", "Test-1", "Test 1", "Test", "Mid Exam", "Mid", "Midterm", "Lab Total", "Lab", "Final Exam", "Final", "Total"]
103
+ ordered = [c for c in preferred if c in component_cols]
 
104
  for c in component_cols:
105
  if c not in ordered:
106
  ordered.append(c)
 
109
 
110
  def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
111
  df = df.copy()
112
+ cols_for_sd = [c for c in component_cols if c.lower() != "total" and pd.api.types.is_numeric_dtype(df.get(c, pd.Series(dtype=float)))]
113
  if len(cols_for_sd) >= 2:
114
  df["Consistency_SD"] = df[cols_for_sd].std(axis=1, skipna=True)
115
  else:
 
118
 
119
 
120
  def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
 
 
 
121
  df = df.copy()
122
+ comps = [c for c in component_cols if c.lower() != "total" and c in df.columns and pd.api.types.is_numeric_dtype(df[c])]
123
+
124
  if not comps:
125
  df["FailReasonHint"] = np.where(df["PassFail"] == "Fail", "Grade below C.", "")
126
  return df
127
 
 
128
  q25 = {c: df[c].dropna().quantile(0.25) if df[c].dropna().shape[0] else np.nan for c in comps}
129
 
130
  def reason(row):
 
134
  for c in comps:
135
  v = row.get(c)
136
  if pd.notna(v) and pd.notna(q25[c]) and v < q25[c]:
137
+ cl = c.lower()
138
+ if "final" in cl:
139
  hints.append("Final exam is in the lower quartile")
140
+ elif "lab" in cl:
141
  hints.append("Lab total is in the lower quartile")
142
+ elif "mid" in cl:
143
  hints.append("Mid exam is in the lower quartile")
144
+ elif "test" in cl:
145
  hints.append("Test score is in the lower quartile")
146
  else:
147
  hints.append(f"{c} is in the lower quartile")
 
152
 
153
 
154
  # -----------------------------
155
+ # Upload (HF SAFE) β€” no getvalue until uploaded is confirmed
156
  # -----------------------------
157
+ uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"], key="uploader")
158
 
159
  if uploaded is None:
160
  st.info("Upload an Excel file to begin.")
161
  st.stop()
162
 
163
+ # ONLY HERE we access bytes
164
+ file_bytes = uploaded.read() # more robust than getvalue() on HF
165
+ if not file_bytes:
166
+ st.warning("Uploaded file appears empty. Please re-upload the Excel file.")
167
+ st.stop()
168
+
169
+ bio = io.BytesIO(file_bytes)
170
+ try:
171
+ xls = pd.ExcelFile(bio)
172
+ except Exception as e:
173
+ st.error(f"Could not read Excel file. Error: {e}")
174
+ st.stop()
175
 
176
+ sheet = st.selectbox("Select sheet", xls.sheet_names, index=0)
 
177
 
178
+ # Rewind and read sheet
179
+ bio.seek(0)
180
+ raw = pd.read_excel(bio, sheet_name=sheet)
181
  raw = normalize_headers(raw)
182
 
183
  grade_col_name = pick_grade_column(raw)
 
185
  df = detect_student_rows(raw, grade_col_name)
186
  df, sno_col = ensure_sno(df)
187
 
 
188
  df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
189
  df["PassFail"] = df["Grade"].apply(grade_pass_fail)
190
  df["Pass"] = df["PassFail"].eq("Pass")
191
  df["Fail"] = df["PassFail"].eq("Fail")
192
  df["At_Risk"] = df["Fail"]
193
 
 
194
  component_cols = infer_component_cols(df, grade_col_name, sno_col)
195
  df = coerce_numeric(df, component_cols)
 
196
  df = add_consistency(df, component_cols)
197
  df = make_fail_reason_hints(df, component_cols)
198
 
 
260
  st.subheader("Hidden Patterns (Quick Signals)")
261
  c1, c2, c3 = st.columns(3)
262
 
 
263
  lab_candidates = [c for c in component_cols if "lab" in c.lower() and c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
264
  if lab_candidates:
265
  lab_col = lab_candidates[0]
 
270
  with c1:
271
  st.metric("Fail with Strong Lab", "β€”")
272
 
 
273
  if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
274
  top_incons = d["Consistency_SD"].quantile(0.90)
275
  with c2:
 
278
  with c2:
279
  st.metric("High Inconsistency (Top 10%)", "β€”")
280
 
 
281
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
282
  good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
283
  with c3:
 
286
  with c3:
287
  st.metric("Fail with High Total", "β€”")
288
 
 
289
  numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
290
  if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and numeric_comps:
291
  st.subheader("What Drives Total? (Correlation)")
 
326
  height=420
327
  )
328
 
329
+ st.subheader("Intervention Suggestions")
330
  st.markdown(
331
  """
332
  - **Many C- failures** β†’ target borderline support (revision plan + short formative checks).
333
+ - **Failures with low Final** β†’ structured exam-prep support (mock tests + feedback).
334
  - **Failures with strong Lab** β†’ review exam alignment + study strategy support.
335
  """
336
  )
 
341
 
342
  numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
343
  if not numeric_comps:
344
+ st.warning("No numeric component columns detected for assessment analysis.")
345
  return
346
 
347
  comp = st.selectbox("Choose component", numeric_comps, index=0)
 
371
 
372
  def student_drilldown_view(d: pd.DataFrame):
373
  st.subheader("Student Drill-down")
 
374
 
375
  sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
376
  row = d[d[sno_col] == sid].iloc[0]
 
403
 
404
  def export_view(d: pd.DataFrame):
405
  st.subheader("Export for Power BI")
 
406
 
407
  clean_csv = d.to_csv(index=False).encode("utf-8")
408
  st.download_button(
 
412
  mime="text/csv"
413
  )
414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
  # Render view
417
  if view == "Executive (Management)":