Spaces:

Deevyankar
/

OSN

Runtime error

App Files Files Community

Deevyankar commited on Dec 29, 2025

Commit

6e3c696

verified ·

1 Parent(s): 75845be

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -73

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py (FULL REPLACEMENT - HF/Streamlit safe upload + grade>=C pass logic)
 import streamlit as st
 import pandas as pd
 import numpy as np
@@ -45,7 +45,6 @@ def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
 def pick_grade_column(df: pd.DataFrame) -> str:
-    # Prefer columns containing "grade"; otherwise last column
     candidates = [c for c in df.columns if "grade" in str(c).lower()]
     return candidates[-1] if candidates else df.columns[-1]
@@ -57,33 +56,14 @@ def coerce_numeric(df: pd.DataFrame, cols):
     return df
-@st.cache_data(show_spinner=False)
-def list_sheets(file_bytes: bytes):
-    bio = io.BytesIO(file_bytes)
-    xls = pd.ExcelFile(bio)
-    return xls.sheet_names
-@st.cache_data(show_spinner=False)
-def read_excel_sheet(file_bytes: bytes, sheet_name: str):
-    bio = io.BytesIO(file_bytes)
-    df = pd.read_excel(bio, sheet_name=sheet_name)
-    return df
 def detect_student_rows(df: pd.DataFrame, grade_col: str) -> pd.DataFrame:
-    """
-    Removes non-student rows robustly:
-    Keeps rows that look like grade entries (A, B+, C-, etc.) OR have numeric marks in other cols.
-    """
     tmp_grade = df[grade_col].astype(str).str.strip()
     grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
     other_cols = [c for c in df.columns if c != grade_col]
     numeric_signal = df[other_cols].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
-    cleaned = df[grade_like | numeric_signal].copy()
-    return cleaned
 def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
@@ -110,7 +90,6 @@ def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list
     component_cols = [c for c in df.columns if c in common and c not in [grade_col, sno_col]]
     if not component_cols:
-        # fallback: numeric columns other than sno and grade
         numeric_cols = []
         for c in df.columns:
             if c in [grade_col, sno_col]:
@@ -120,9 +99,8 @@ def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list
                 numeric_cols.append(c)
         component_cols = numeric_cols
-    # Keep ordering friendly
-    preferred_order = ["Test -1", "Test-1", "Test 1", "Test", "Mid Exam", "Mid", "Midterm", "Lab Total", "Lab", "Final Exam", "Final", "Total"]
-    ordered = [c for c in preferred_order if c in component_cols]
     for c in component_cols:
         if c not in ordered:
             ordered.append(c)
@@ -131,7 +109,7 @@ def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list
 def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
     df = df.copy()
-    cols_for_sd = [c for c in component_cols if c.lower() != "total"]
     if len(cols_for_sd) >= 2:
         df["Consistency_SD"] = df[cols_for_sd].std(axis=1, skipna=True)
     else:
@@ -140,16 +118,13 @@ def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame
 def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
-    """
-    Simple, management-friendly hints (NOT used for pass/fail).
-    """
     df = df.copy()
-    comps = [c for c in component_cols if c.lower() != "total" and pd.api.types.is_numeric_dtype(df[c])]
     if not comps:
         df["FailReasonHint"] = np.where(df["PassFail"] == "Fail", "Grade below C.", "")
         return df
-    # Precompute quartiles safely
     q25 = {c: df[c].dropna().quantile(0.25) if df[c].dropna().shape[0] else np.nan for c in comps}
     def reason(row):
@@ -159,13 +134,14 @@ def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.Da
         for c in comps:
             v = row.get(c)
             if pd.notna(v) and pd.notna(q25[c]) and v < q25[c]:
-                if "final" in c.lower():
                     hints.append("Final exam is in the lower quartile")
-                elif "lab" in c.lower():
                     hints.append("Lab total is in the lower quartile")
-                elif "mid" in c.lower():
                     hints.append("Mid exam is in the lower quartile")
-                elif "test" in c.lower():
                     hints.append("Test score is in the lower quartile")
                 else:
                     hints.append(f"{c} is in the lower quartile")
@@ -176,20 +152,32 @@ def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.Da
 # -----------------------------
-# Upload + Read (HF SAFE)
 # -----------------------------
-uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
 if uploaded is None:
     st.info("Upload an Excel file to begin.")
     st.stop()
-file_bytes = uploaded.getvalue()
-sheet_names = list_sheets(file_bytes)
-sheet = st.selectbox("Select sheet", sheet_names, index=0)
-raw = read_excel_sheet(file_bytes, sheet)
 raw = normalize_headers(raw)
 grade_col_name = pick_grade_column(raw)
@@ -197,17 +185,14 @@ grade_col_name = pick_grade_column(raw)
 df = detect_student_rows(raw, grade_col_name)
 df, sno_col = ensure_sno(df)
-# Grade column from chosen grade column name (fallback to last column already handled)
 df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
 df["PassFail"] = df["Grade"].apply(grade_pass_fail)
 df["Pass"] = df["PassFail"].eq("Pass")
 df["Fail"] = df["PassFail"].eq("Fail")
 df["At_Risk"] = df["Fail"]
-# Components (optional for insights)
 component_cols = infer_component_cols(df, grade_col_name, sno_col)
 df = coerce_numeric(df, component_cols)
 df = add_consistency(df, component_cols)
 df = make_fail_reason_hints(df, component_cols)
@@ -275,7 +260,6 @@ def executive_view(d: pd.DataFrame):
     st.subheader("Hidden Patterns (Quick Signals)")
     c1, c2, c3 = st.columns(3)
-    # Strong Lab but Fail (if any lab-like col exists)
     lab_candidates = [c for c in component_cols if "lab" in c.lower() and c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
     if lab_candidates:
         lab_col = lab_candidates[0]
@@ -286,7 +270,6 @@ def executive_view(d: pd.DataFrame):
         with c1:
             st.metric("Fail with Strong Lab", "—")
-    # High inconsistency
     if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
         top_incons = d["Consistency_SD"].quantile(0.90)
         with c2:
@@ -295,7 +278,6 @@ def executive_view(d: pd.DataFrame):
         with c2:
             st.metric("High Inconsistency (Top 10%)", "—")
-    # Fail with high Total (if Total exists)
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
         good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
         with c3:
@@ -304,7 +286,6 @@ def executive_view(d: pd.DataFrame):
         with c3:
             st.metric("Fail with High Total", "—")
-    # Correlation only if Total + numeric components exist
     numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and numeric_comps:
         st.subheader("What Drives Total? (Correlation)")
@@ -345,11 +326,11 @@ def risk_view(d: pd.DataFrame):
             height=420
         )
-    st.subheader("Intervention Suggestions (Management-friendly)")
     st.markdown(
         """
 - **Many C- failures** → target borderline support (revision plan + short formative checks).
-- **Failures linked with low Final** → structured exam-prep support (mock tests + feedback).
 - **Failures with strong Lab** → review exam alignment + study strategy support.
 """
     )
@@ -360,7 +341,7 @@ def assessment_quality_view(d: pd.DataFrame):
     numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
     if not numeric_comps:
-        st.warning("No numeric component columns detected for assessment analysis. Add Test/Mid/Lab/Final columns for deeper analysis.")
         return
     comp = st.selectbox("Choose component", numeric_comps, index=0)
@@ -390,7 +371,6 @@ def assessment_quality_view(d: pd.DataFrame):
 def student_drilldown_view(d: pd.DataFrame):
     st.subheader("Student Drill-down")
-    st.caption("Pick a student to view component breakdown and the grade-based decision (Grade ≥ C pass).")
     sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
     row = d[d[sno_col] == sid].iloc[0]
@@ -423,7 +403,6 @@ def student_drilldown_view(d: pd.DataFrame):
 def export_view(d: pd.DataFrame):
     st.subheader("Export for Power BI")
-    st.caption("Download cleaned data with computed PassFail fields. Load into Power BI (Get Data → Text/CSV).")
     clean_csv = d.to_csv(index=False).encode("utf-8")
     st.download_button(
@@ -433,25 +412,6 @@ def export_view(d: pd.DataFrame):
         mime="text/csv"
     )
-    st.subheader("Recommended Power BI Measures (DAX)")
-    st.code(
-        r"""
-Pass Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Pass")
-Fail Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Fail")
-Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
-""",
-        language="text",
-    )
-    st.subheader("Summary Tables")
-    grade_summary = d["Grade"].value_counts(dropna=False).reset_index()
-    grade_summary.columns = ["Grade", "Count"]
-    st.dataframe(grade_summary, use_container_width=True)
-    pf_summary = d["PassFail"].value_counts(dropna=False).reset_index()
-    pf_summary.columns = ["PassFail", "Count"]
-    st.dataframe(pf_summary, use_container_width=True)
 # Render view
 if view == "Executive (Management)":

+# app.py (FULL REPLACEMENT - fixes NoneType getvalue + grade>=C pass logic)
 import streamlit as st
 import pandas as pd
 import numpy as np
 def pick_grade_column(df: pd.DataFrame) -> str:
     candidates = [c for c in df.columns if "grade" in str(c).lower()]
     return candidates[-1] if candidates else df.columns[-1]
     return df
 def detect_student_rows(df: pd.DataFrame, grade_col: str) -> pd.DataFrame:
     tmp_grade = df[grade_col].astype(str).str.strip()
     grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
     other_cols = [c for c in df.columns if c != grade_col]
     numeric_signal = df[other_cols].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
+    return df[grade_like | numeric_signal].copy()
 def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
     component_cols = [c for c in df.columns if c in common and c not in [grade_col, sno_col]]
     if not component_cols:
         numeric_cols = []
         for c in df.columns:
             if c in [grade_col, sno_col]:
                 numeric_cols.append(c)
         component_cols = numeric_cols
+    preferred = ["Test -1", "Test-1", "Test 1", "Test", "Mid Exam", "Mid", "Midterm", "Lab Total", "Lab", "Final Exam", "Final", "Total"]
+    ordered = [c for c in preferred if c in component_cols]
     for c in component_cols:
         if c not in ordered:
             ordered.append(c)
 def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
     df = df.copy()
+    cols_for_sd = [c for c in component_cols if c.lower() != "total" and pd.api.types.is_numeric_dtype(df.get(c, pd.Series(dtype=float)))]
     if len(cols_for_sd) >= 2:
         df["Consistency_SD"] = df[cols_for_sd].std(axis=1, skipna=True)
     else:
 def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
     df = df.copy()
+    comps = [c for c in component_cols if c.lower() != "total" and c in df.columns and pd.api.types.is_numeric_dtype(df[c])]
     if not comps:
         df["FailReasonHint"] = np.where(df["PassFail"] == "Fail", "Grade below C.", "")
         return df
     q25 = {c: df[c].dropna().quantile(0.25) if df[c].dropna().shape[0] else np.nan for c in comps}
     def reason(row):
         for c in comps:
             v = row.get(c)
             if pd.notna(v) and pd.notna(q25[c]) and v < q25[c]:
+                cl = c.lower()
+                if "final" in cl:
                     hints.append("Final exam is in the lower quartile")
+                elif "lab" in cl:
                     hints.append("Lab total is in the lower quartile")
+                elif "mid" in cl:
                     hints.append("Mid exam is in the lower quartile")
+                elif "test" in cl:
                     hints.append("Test score is in the lower quartile")
                 else:
                     hints.append(f"{c} is in the lower quartile")
 # -----------------------------
+# Upload (HF SAFE) — no getvalue until uploaded is confirmed
 # -----------------------------
+uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"], key="uploader")
 if uploaded is None:
     st.info("Upload an Excel file to begin.")
     st.stop()
+# ONLY HERE we access bytes
+file_bytes = uploaded.read()  # more robust than getvalue() on HF
+if not file_bytes:
+    st.warning("Uploaded file appears empty. Please re-upload the Excel file.")
+    st.stop()
+bio = io.BytesIO(file_bytes)
+try:
+    xls = pd.ExcelFile(bio)
+except Exception as e:
+    st.error(f"Could not read Excel file. Error: {e}")
+    st.stop()
+sheet = st.selectbox("Select sheet", xls.sheet_names, index=0)
+# Rewind and read sheet
+bio.seek(0)
+raw = pd.read_excel(bio, sheet_name=sheet)
 raw = normalize_headers(raw)
 grade_col_name = pick_grade_column(raw)
 df = detect_student_rows(raw, grade_col_name)
 df, sno_col = ensure_sno(df)
 df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
 df["PassFail"] = df["Grade"].apply(grade_pass_fail)
 df["Pass"] = df["PassFail"].eq("Pass")
 df["Fail"] = df["PassFail"].eq("Fail")
 df["At_Risk"] = df["Fail"]
 component_cols = infer_component_cols(df, grade_col_name, sno_col)
 df = coerce_numeric(df, component_cols)
 df = add_consistency(df, component_cols)
 df = make_fail_reason_hints(df, component_cols)
     st.subheader("Hidden Patterns (Quick Signals)")
     c1, c2, c3 = st.columns(3)
     lab_candidates = [c for c in component_cols if "lab" in c.lower() and c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
     if lab_candidates:
         lab_col = lab_candidates[0]
         with c1:
             st.metric("Fail with Strong Lab", "—")
     if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
         top_incons = d["Consistency_SD"].quantile(0.90)
         with c2:
         with c2:
             st.metric("High Inconsistency (Top 10%)", "—")
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
         good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
         with c3:
         with c3:
             st.metric("Fail with High Total", "—")
     numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and numeric_comps:
         st.subheader("What Drives Total? (Correlation)")
             height=420
         )
+    st.subheader("Intervention Suggestions")
     st.markdown(
         """
 - **Many C- failures** → target borderline support (revision plan + short formative checks).
+- **Failures with low Final** → structured exam-prep support (mock tests + feedback).
 - **Failures with strong Lab** → review exam alignment + study strategy support.
 """
     )
     numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
     if not numeric_comps:
+        st.warning("No numeric component columns detected for assessment analysis.")
         return
     comp = st.selectbox("Choose component", numeric_comps, index=0)
 def student_drilldown_view(d: pd.DataFrame):
     st.subheader("Student Drill-down")
     sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
     row = d[d[sno_col] == sid].iloc[0]
 def export_view(d: pd.DataFrame):
     st.subheader("Export for Power BI")
     clean_csv = d.to_csv(index=False).encode("utf-8")
     st.download_button(
         mime="text/csv"
     )
 # Render view
 if view == "Executive (Management)":