Spaces:

Deevyankar
/

OSN

Runtime error

App Files Files Community

Deevyankar commited on Dec 29, 2025

Commit

a146115

verified ·

1 Parent(s): 9472d3b

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -286

app.py CHANGED Viewed

@@ -1,20 +1,16 @@
-# app.py (FINAL FULL REPLACEMENT - no NoneType crashes + openpyxl forced + Grade>=C pass)
 import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
 import io
-st.set_page_config(page_title="Excel → Management Insights (Power BI style)", layout="wide")
 st.title("📊 Excel → Interactive Management Dashboard (Power BI style)")
-st.caption(
-    "Decision rule: **PASS if Grade ≥ C (C, C+, B-, etc.)** and **FAIL if below C (C-, D, F, etc.)**. "
-    "Pass/Fail uses **Grade only**."
-)
 # -----------------------------
-# Grade logic (as specified)
 # -----------------------------
 def grade_pass_fail(g):
     if pd.isna(g):
@@ -25,9 +21,7 @@ def grade_pass_fail(g):
         return "Fail"
     if g.startswith("C"):
-        if g == "C-" or g.startswith("C-"):
-            return "Fail"
-        return "Pass"
     if g.startswith(("A", "B")):
         return "Pass"
@@ -35,35 +29,18 @@ def grade_pass_fail(g):
     return "Unknown"
-def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
     df.columns = [str(c).strip() for c in df.columns]
     return df
-def pick_grade_column(df: pd.DataFrame) -> str:
     candidates = [c for c in df.columns if "grade" in str(c).lower()]
     return candidates[-1] if candidates else df.columns[-1]
-def coerce_numeric(df: pd.DataFrame, cols):
-    for c in cols:
-        if c in df.columns:
-            df[c] = pd.to_numeric(df[c], errors="coerce")
-    return df
-def detect_student_rows(df: pd.DataFrame, grade_col: str) -> pd.DataFrame:
-    tmp_grade = df[grade_col].astype(str).str.strip()
-    grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
-    other_cols = [c for c in df.columns if c != grade_col]
-    numeric_signal = df[other_cols].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
-    return df[grade_like | numeric_signal].copy()
-def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
     sno_col = None
     for c in df.columns:
         if str(c).strip().lower() in ["sno", "sno.", "sr", "sr.", "id", "studentid", "student id"]:
@@ -76,329 +53,120 @@ def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
     return df, sno_col
-def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list[str]:
-    common = [
-        "Test -1", "Test-1", "Test 1", "Test",
-        "Mid Exam", "Mid", "Midterm",
-        "Lab Total", "Lab",
-        "Final Exam", "Final",
-        "Total"
-    ]
-    component_cols = [c for c in df.columns if c in common and c not in [grade_col, sno_col]]
-    if not component_cols:
-        numeric_cols = []
-        for c in df.columns:
-            if c in [grade_col, sno_col]:
-                continue
-            s = pd.to_numeric(df[c], errors="coerce")
-            if s.notna().mean() > 0.4:
-                numeric_cols.append(c)
-        component_cols = numeric_cols
-    preferred = ["Test -1", "Test-1", "Test 1", "Test", "Mid Exam", "Mid", "Midterm", "Lab Total", "Lab", "Final Exam", "Final", "Total"]
-    ordered = [c for c in preferred if c in component_cols]
-    for c in component_cols:
-        if c not in ordered:
-            ordered.append(c)
-    return ordered
-def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
-    df = df.copy()
-    cols_for_sd = [
-        c for c in component_cols
-        if c.lower() != "total" and c in df.columns and pd.api.types.is_numeric_dtype(df[c])
-    ]
-    df["Consistency_SD"] = df[cols_for_sd].std(axis=1, skipna=True) if len(cols_for_sd) >= 2 else np.nan
-    return df
-def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
-    df = df.copy()
-    comps = [
-        c for c in component_cols
-        if c.lower() != "total" and c in df.columns and pd.api.types.is_numeric_dtype(df[c])
-    ]
-    if not comps:
-        df["FailReasonHint"] = np.where(df["PassFail"] == "Fail", "Grade below C.", "")
-        return df
-    q25 = {c: df[c].dropna().quantile(0.25) if df[c].dropna().shape[0] else np.nan for c in comps}
-    def reason(row):
-        if row.get("PassFail") != "Fail":
-            return ""
-        hints = []
-        for c in comps:
-            v = row.get(c)
-            if pd.notna(v) and pd.notna(q25[c]) and v < q25[c]:
-                cl = c.lower()
-                if "final" in cl:
-                    hints.append("Final exam is in the lower quartile")
-                elif "lab" in cl:
-                    hints.append("Lab total is in the lower quartile")
-                elif "mid" in cl:
-                    hints.append("Mid exam is in the lower quartile")
-                elif "test" in cl:
-                    hints.append("Test score is in the lower quartile")
-                else:
-                    hints.append(f"{c} is in the lower quartile")
-        return " | ".join(hints) if hints else "Grade below C (review support plan)."
-    df["FailReasonHint"] = df.apply(reason, axis=1)
-    return df
-# -----------------------------
-# Session state init
-# -----------------------------
-if "file_bytes" not in st.session_state:
-    st.session_state["file_bytes"] = None
-if "file_name" not in st.session_state:
-    st.session_state["file_name"] = None
-if "sheet_names" not in st.session_state:
-    st.session_state["sheet_names"] = None
-# Reset button (helps a lot on HF reruns)
-topc1, topc2 = st.columns([1, 3])
-with topc1:
-    if st.button("🔄 Reset upload"):
-        st.session_state["file_bytes"] = None
-        st.session_state["file_name"] = None
-        st.session_state["sheet_names"] = None
-        st.rerun()
-with topc2:
-    if st.session_state["file_name"]:
-        st.info(f"Current file loaded: {st.session_state['file_name']}")
 # -----------------------------
-# Upload
 # -----------------------------
-uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"], key="uploader")
-# On upload, store bytes
-if uploaded is not None:
-    fb = uploaded.getvalue()
-    # fb can *rarely* be None/empty on buggy reruns; guard it
-    if fb:
-        st.session_state["file_bytes"] = fb
-        st.session_state["file_name"] = uploaded.name
-        st.session_state["sheet_names"] = None
-# Re-check bytes RIGHT BEFORE use
-file_bytes = st.session_state.get("file_bytes", None)
-if file_bytes is None or not isinstance(file_bytes, (bytes, bytearray)) or len(file_bytes) == 0:
     st.info("Upload an Excel file to begin.")
     st.stop()
-# Signature check for XLSX (zip => PK)
-if len(file_bytes) < 2 or file_bytes[:2] != b"PK":
-    st.error("This does not look like a valid .xlsx file. Please Save As → Excel Workbook (.xlsx) and upload again.")
     st.stop()
-# Load sheet names (FORCE openpyxl)
-if st.session_state["sheet_names"] is None:
-    try:
-        bio = io.BytesIO(file_bytes)
-        xls_local = pd.ExcelFile(bio, engine="openpyxl")
-        st.session_state["sheet_names"] = xls_local.sheet_names
-    except Exception as e:
-        st.error(f"Could not read Excel file (openpyxl). Error: {e}")
-        st.stop()
-sheet = st.selectbox("Select sheet", st.session_state["sheet_names"], index=0)
-# Read selected sheet (FORCE openpyxl)
 try:
     bio = io.BytesIO(file_bytes)
     raw = pd.read_excel(bio, sheet_name=sheet, engine="openpyxl")
 except Exception as e:
-    st.error(f"Could not read the selected sheet (openpyxl). Error: {e}")
     st.stop()
 raw = normalize_headers(raw)
-# -----------------------------
-# Build dataframe
-# -----------------------------
-grade_col_name = pick_grade_column(raw)
-df = detect_student_rows(raw, grade_col_name)
 df, sno_col = ensure_sno(df)
-df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
 df["PassFail"] = df["Grade"].apply(grade_pass_fail)
 df["Pass"] = df["PassFail"].eq("Pass")
 df["Fail"] = df["PassFail"].eq("Fail")
-df["At_Risk"] = df["Fail"]
-component_cols = infer_component_cols(df, grade_col_name, sno_col)
-df = coerce_numeric(df, component_cols)
-df = add_consistency(df, component_cols)
-df = make_fail_reason_hints(df, component_cols)
 # -----------------------------
-# Sidebar: Views + Filters
 # -----------------------------
 st.sidebar.header("Perspective")
 view = st.sidebar.radio(
     "Choose a view",
-    ["Executive (Management)", "Risk & Intervention", "Assessment Quality", "Student Drill-down", "Export for Power BI"],
     index=0
 )
 st.sidebar.header("Filters")
-pf_choices = ["Pass", "Fail", "Unknown"]
-pf = st.sidebar.multiselect("Pass/Fail", pf_choices, default=pf_choices)
-grade_unique = sorted([g for g in df["Grade"].dropna().unique()])
-sel_grades = st.sidebar.multiselect("Grades", grade_unique, default=grade_unique)
 filtered = df[df["PassFail"].isin(pf)]
 filtered = filtered[filtered["Grade"].isin(sel_grades)]
 # -----------------------------
-# KPI Row
 # -----------------------------
-k1, k2, k3, k4, k5 = st.columns(5)
-with k1:
     st.metric("Students", int(filtered.shape[0]))
-with k2:
     st.metric("Pass", int(filtered["Pass"].sum()))
-with k3:
     st.metric("Fail", int(filtered["Fail"].sum()))
-with k4:
     pr = (filtered["Pass"].mean() * 100) if filtered.shape[0] else 0
     st.metric("Pass Rate", f"{pr:.1f}%")
-with k5:
-    if "Total" in filtered.columns and pd.api.types.is_numeric_dtype(filtered["Total"]):
-        st.metric("Average Total", f"{filtered['Total'].mean():.2f}")
-    else:
-        st.metric("Average Total", "—")
 st.divider()
 # -----------------------------
 # Views
 # -----------------------------
-def executive_view(d: pd.DataFrame):
-    left, right = st.columns([1, 1])
     with left:
         st.subheader("Grade Distribution")
-        gc = d["Grade"].value_counts(dropna=False).reset_index()
         gc.columns = ["Grade", "Count"]
         st.plotly_chart(px.bar(gc, x="Grade", y="Count"), use_container_width=True)
     with right:
         st.subheader("Pass/Fail Distribution")
-        pc = d["PassFail"].value_counts(dropna=False).reset_index()
         pc.columns = ["Status", "Count"]
         st.plotly_chart(px.pie(pc, names="Status", values="Count"), use_container_width=True)
-    st.subheader("Hidden Patterns (Quick Signals)")
-    c1, c2, c3 = st.columns(3)
-    lab_candidates = [c for c in component_cols if "lab" in c.lower() and c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
-    if lab_candidates:
-        lab_col = lab_candidates[0]
-        strong_lab_fail = d[(d["Fail"]) & (d[lab_col].notna()) & (d[lab_col] >= d[lab_col].quantile(0.75))]
-        with c1:
-            st.metric("Fail with Strong Lab", int(strong_lab_fail.shape[0]))
-    else:
-        with c1:
-            st.metric("Fail with Strong Lab", "—")
-    if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
-        top_incons = d["Consistency_SD"].quantile(0.90)
-        with c2:
-            st.metric("High Inconsistency (Top 10%)", int((d["Consistency_SD"] >= top_incons).sum()))
-    else:
-        with c2:
-            st.metric("High Inconsistency (Top 10%)", "—")
-    if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
-        good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
-        with c3:
-            st.metric("Fail with High Total", int(good_total_fail.shape[0]))
-    else:
-        with c3:
-            st.metric("Fail with High Total", "—")
-def risk_view(d: pd.DataFrame):
     st.subheader("Fail List (Grade below C)")
-    fails = d[d["Fail"]].copy()
     if fails.empty:
         st.success("No failing students in the current filter.")
-        return
-    fails["FailType"] = np.where(fails["Grade"].str.startswith("C-"), "C- (Borderline Fail)", "Below C")
-    bucket = fails["FailType"].value_counts().reset_index()
-    bucket.columns = ["Fail Type", "Count"]
-    c1, c2 = st.columns([1, 2])
-    with c1:
-        st.plotly_chart(px.bar(bucket, x="Fail Type", y="Count"), use_container_width=True)
-    with c2:
         show_cols = [sno_col, "Grade", "PassFail"]
-        for c in ["Total"] + component_cols:
-            if c in fails.columns and c not in show_cols:
-                show_cols.append(c)
-        show_cols.append("FailReasonHint")
-        st.dataframe(fails[show_cols].sort_values(by=["Grade", sno_col]), use_container_width=True, height=420)
-def assessment_quality_view(d: pd.DataFrame):
-    st.subheader("Assessment Component Overview")
-    numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
-    if not numeric_comps:
-        st.warning("No numeric component columns detected for assessment analysis.")
-        return
-    comp = st.selectbox("Choose component", numeric_comps, index=0)
-    st.plotly_chart(px.histogram(d, x=comp, nbins=20), use_container_width=True)
-    st.subheader("Component vs Grade (Boxplot)")
-    st.plotly_chart(px.box(d, x="Grade", y=comp), use_container_width=True)
-def student_drilldown_view(d: pd.DataFrame):
     st.subheader("Student Drill-down")
-    sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
-    row = d[d[sno_col] == sid].iloc[0]
-    c1, c2, c3 = st.columns(3)
-    with c1:
-        st.metric("Grade", str(row.get("Grade", "—")))
-    with c2:
-        st.metric("Status", str(row.get("PassFail", "—")))
-    with c3:
-        st.metric("At Risk", "Yes" if row.get("Fail") else "No")
-    hint = row.get("FailReasonHint", "")
-    if hint:
-        st.write("**Reason hint:**", hint)
-def export_view(d: pd.DataFrame):
-    st.subheader("Export for Power BI")
-    clean_csv = d.to_csv(index=False).encode("utf-8")
-    st.download_button("⬇️ Download Cleaned Data (CSV)", clean_csv, file_name="cleaned_marks_with_passfail.csv", mime="text/csv")
-if view == "Executive (Management)":
-    executive_view(filtered)
-elif view == "Risk & Intervention":
-    risk_view(filtered)
-elif view == "Assessment Quality":
-    assessment_quality_view(filtered)
-elif view == "Student Drill-down":
-    student_drilldown_view(filtered)
 else:
-    export_view(filtered)

 import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
 import io
+st.set_page_config(page_title="Excel → Management Insights", layout="wide")
 st.title("📊 Excel → Interactive Management Dashboard (Power BI style)")
+st.caption("Rule: **PASS if Grade ≥ C** (C, C+, B-, etc.). **FAIL if below C** (C-, D, F...).")
 # -----------------------------
+# Grade logic
 # -----------------------------
 def grade_pass_fail(g):
     if pd.isna(g):
         return "Fail"
     if g.startswith("C"):
+        return "Fail" if g.startswith("C-") else "Pass"
     if g.startswith(("A", "B")):
         return "Pass"
     return "Unknown"
+def normalize_headers(df):
     df = df.copy()
     df.columns = [str(c).strip() for c in df.columns]
     return df
+def pick_grade_column(df):
     candidates = [c for c in df.columns if "grade" in str(c).lower()]
     return candidates[-1] if candidates else df.columns[-1]
+def ensure_sno(df):
     sno_col = None
     for c in df.columns:
         if str(c).strip().lower() in ["sno", "sno.", "sr", "sr.", "id", "studentid", "student id"]:
     return df, sno_col
 # -----------------------------
+# Upload (NO session_state)
 # -----------------------------
+uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
+if uploaded is None:
     st.info("Upload an Excel file to begin.")
     st.stop()
+# Read bytes safely
+file_bytes = uploaded.getvalue()
+if not file_bytes:
+    st.warning("Uploaded file is empty. Please re-upload.")
     st.stop()
+# Force openpyxl
+try:
+    bio = io.BytesIO(file_bytes)
+    xls = pd.ExcelFile(bio, engine="openpyxl")
+except Exception as e:
+    st.error(f"Cannot open this Excel file. Make sure it is a real .xlsx. Error: {e}")
+    st.stop()
+sheet = st.selectbox("Select sheet", xls.sheet_names, index=0)
 try:
     bio = io.BytesIO(file_bytes)
     raw = pd.read_excel(bio, sheet_name=sheet, engine="openpyxl")
 except Exception as e:
+    st.error(f"Cannot read this sheet. Error: {e}")
     st.stop()
 raw = normalize_headers(raw)
+grade_col = pick_grade_column(raw)
+df = raw.copy()
 df, sno_col = ensure_sno(df)
+df["Grade"] = df[grade_col].astype(str).str.strip().str.upper()
 df["PassFail"] = df["Grade"].apply(grade_pass_fail)
 df["Pass"] = df["PassFail"].eq("Pass")
 df["Fail"] = df["PassFail"].eq("Fail")
 # -----------------------------
+# Sidebar filters
 # -----------------------------
 st.sidebar.header("Perspective")
 view = st.sidebar.radio(
     "Choose a view",
+    ["Executive (Management)", "Risk & Intervention", "Student Drill-down", "Export for Power BI"],
     index=0
 )
 st.sidebar.header("Filters")
+pf = st.sidebar.multiselect("Pass/Fail", ["Pass", "Fail", "Unknown"], default=["Pass", "Fail", "Unknown"])
+grades = sorted([g for g in df["Grade"].dropna().unique()])
+sel_grades = st.sidebar.multiselect("Grades", grades, default=grades)
 filtered = df[df["PassFail"].isin(pf)]
 filtered = filtered[filtered["Grade"].isin(sel_grades)]
 # -----------------------------
+# KPIs
 # -----------------------------
+c1, c2, c3, c4 = st.columns(4)
+with c1:
     st.metric("Students", int(filtered.shape[0]))
+with c2:
     st.metric("Pass", int(filtered["Pass"].sum()))
+with c3:
     st.metric("Fail", int(filtered["Fail"].sum()))
+with c4:
     pr = (filtered["Pass"].mean() * 100) if filtered.shape[0] else 0
     st.metric("Pass Rate", f"{pr:.1f}%")
 st.divider()
 # -----------------------------
 # Views
 # -----------------------------
+if view == "Executive (Management)":
+    left, right = st.columns(2)
     with left:
         st.subheader("Grade Distribution")
+        gc = filtered["Grade"].value_counts(dropna=False).reset_index()
         gc.columns = ["Grade", "Count"]
         st.plotly_chart(px.bar(gc, x="Grade", y="Count"), use_container_width=True)
     with right:
         st.subheader("Pass/Fail Distribution")
+        pc = filtered["PassFail"].value_counts(dropna=False).reset_index()
         pc.columns = ["Status", "Count"]
         st.plotly_chart(px.pie(pc, names="Status", values="Count"), use_container_width=True)
+elif view == "Risk & Intervention":
     st.subheader("Fail List (Grade below C)")
+    fails = filtered[filtered["Fail"]].copy()
     if fails.empty:
         st.success("No failing students in the current filter.")
+    else:
         show_cols = [sno_col, "Grade", "PassFail"]
+        st.dataframe(fails[show_cols], use_container_width=True, height=450)
+elif view == "Student Drill-down":
     st.subheader("Student Drill-down")
+    sid = st.selectbox("Select student", sorted(filtered[sno_col].unique()))
+    row = filtered[filtered[sno_col] == sid].iloc[0]
+    st.write("**Grade:**", row["Grade"])
+    st.write("**Status:**", row["PassFail"])
+    st.dataframe(pd.DataFrame(row).T, use_container_width=True)
 else:
+    st.subheader("Export for Power BI")
+    out = filtered.to_csv(index=False).encode("utf-8")
+    st.download_button("⬇️ Download CSV", out, file_name="cleaned_marks_with_passfail.csv", mime="text/csv")