Spaces:

Deevyankar
/

OSN

Runtime error

App Files Files Community

Deevyankar commited on 20 days ago

Commit

75845be

verified ·

1 Parent(s): eae0add

Update app.py

Browse files

Files changed (1) hide show

app.py +230 -124

app.py CHANGED Viewed

@@ -1,16 +1,20 @@
-# app.py
 import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
 st.set_page_config(page_title="Excel → Management Insights (Power BI style)", layout="wide")
 st.title("📊 Excel → Interactive Management Dashboard (Power BI style)")
-st.caption("Grade-based decision rule: **PASS if Grade ≥ C (including C, C+, B-, etc.)** and **FAIL if below C (C-, D, F, etc.)**. Marks thresholds are not used.")
 # -----------------------------
-# Grade logic (FINAL as per you)
 # -----------------------------
 def grade_pass_fail(g):
     if pd.isna(g):
@@ -33,48 +37,146 @@ def grade_pass_fail(g):
     return "Unknown"
-def pick_grade_column(df: pd.DataFrame) -> str:
-    # User confirmed "Grade is last column" — we still try to be robust.
-    candidates = [c for c in df.columns if "grade" in str(c).lower()]
-    if candidates:
-        return candidates[-1]
-    return df.columns[-1]
 def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
-    # Clean common trailing spaces
     df = df.copy()
     df.columns = [str(c).strip() for c in df.columns]
     return df
 def coerce_numeric(df: pd.DataFrame, cols):
     for c in cols:
         if c in df.columns:
             df[c] = pd.to_numeric(df[c], errors="coerce")
     return df
-def describe_fail_reason(row, components):
-    # Human-readable reason (simple, management-friendly)
-    if row.get("PassFail") != "Fail":
-        return ""
-    hints = []
-    for c in components:
-        v = row.get(c)
-        if pd.notna(v):
-            # rough, non-controversial hinting — not using thresholds for pass/fail
-            if c.lower().find("final") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
-                hints.append("Final exam is in the lower quartile")
-            if c.lower().find("lab") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
-                hints.append("Lab total is in the lower quartile")
-            if c.lower().find("mid") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
-                hints.append("Mid exam is in the lower quartile")
-            if c.lower().find("test") >= 0 and v < np.nanpercentile(components_df[c].dropna(), 25):
-                hints.append("Test score is in the lower quartile")
-    if not hints:
-        return "Grade below C (check component performance & attendance/assessment issues)."
-    return " | ".join(hints)
 # -----------------------------
-# Upload + read
 # -----------------------------
 uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
@@ -82,65 +184,35 @@ if uploaded is None:
     st.info("Upload an Excel file to begin.")
     st.stop()
-xls = pd.ExcelFile(uploaded)
-sheet = st.selectbox("Select sheet", xls.sheet_names, index=0)
-raw = pd.read_excel(uploaded, sheet_name=sheet)
-raw = normalize_headers(raw)
-# Try to remove non-student rows (robust: keep rows with any numeric marks OR any grade-like text)
-grade_col_name = pick_grade_column(raw)
-tmp_grade = raw[grade_col_name].astype(str).str.strip()
-grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
-numeric_cols_guess = [c for c in raw.columns if c != grade_col_name]
-numeric_signal = raw[numeric_cols_guess].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
-df = raw[grade_like | numeric_signal].copy()
-# Add Sno if exists, else create row id
-sno_col = None
-for c in df.columns:
-    if str(c).strip().lower() in ["sno", "sno.", "sr", "sr.", "id", "studentid", "student id"]:
-        sno_col = c
-        break
-if sno_col is None:
-    df.insert(0, "Sno", range(1, len(df) + 1))
-    sno_col = "Sno"
-# Grade column
 df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
 df["PassFail"] = df["Grade"].apply(grade_pass_fail)
 df["Pass"] = df["PassFail"].eq("Pass")
 df["Fail"] = df["PassFail"].eq("Fail")
-# Identify likely mark columns (common names; if not found, pick numeric ones)
-common_components = ["Test -1", "Test-1", "Test 1", "Mid Exam", "Mid", "Lab Total", "Final Exam", "Total"]
-component_cols = [c for c in df.columns if c in common_components]
-if not component_cols:
-    # fallback: all numeric columns except Sno
-    num_cols = df.columns[df.apply(lambda s: pd.to_numeric(s, errors="coerce").notna().mean() > 0.4)]
-    component_cols = [c for c in num_cols if c != sno_col]
-# Coerce numerics (if present)
 df = coerce_numeric(df, component_cols)
-# Consistency score (std across available components)
-if len(component_cols) >= 2:
-    df["Consistency_SD"] = df[component_cols].std(axis=1, skipna=True)
-else:
-    df["Consistency_SD"] = np.nan
-# Global for hinting
-components_df = df.copy()
-# Optional “Fail reason” (for drilldown / risk view)
-if component_cols:
-    df["FailReasonHint"] = df.apply(lambda r: describe_fail_reason(r, component_cols), axis=1)
-else:
-    df["FailReasonHint"] = np.where(df["Fail"], "Grade below C.", "")
 # -----------------------------
-# Sidebar: “Power BI pages”
 # -----------------------------
 st.sidebar.header("Perspective")
 view = st.sidebar.radio(
@@ -150,7 +222,9 @@ view = st.sidebar.radio(
 )
 st.sidebar.header("Filters")
-pf = st.sidebar.multiselect("Pass/Fail", ["Pass", "Fail", "Unknown"], default=["Pass", "Fail", "Unknown"])
 grade_unique = sorted([g for g in df["Grade"].dropna().unique()])
 sel_grades = st.sidebar.multiselect("Grades", grade_unique, default=grade_unique)
@@ -161,9 +235,12 @@ filtered = filtered[filtered["Grade"].isin(sel_grades)]
 # KPI Row
 # -----------------------------
 k1, k2, k3, k4, k5 = st.columns(5)
-with k1: st.metric("Students", int(filtered.shape[0]))
-with k2: st.metric("Pass", int(filtered["Pass"].sum()))
-with k3: st.metric("Fail", int(filtered["Fail"].sum()))
 with k4:
     pr = (filtered["Pass"].mean() * 100) if filtered.shape[0] else 0
     st.metric("Pass Rate", f"{pr:.1f}%")
@@ -178,7 +255,7 @@ st.divider()
 # -----------------------------
 # Views
 # -----------------------------
-def executive_view(d):
     left, right = st.columns([1, 1])
     with left:
@@ -198,9 +275,10 @@ def executive_view(d):
     st.subheader("Hidden Patterns (Quick Signals)")
     c1, c2, c3 = st.columns(3)
-    # Pattern: Strong Lab but Fail (if lab exists)
-    if any("Lab" in c for c in component_cols):
-        lab_col = [c for c in component_cols if "Lab" in c][0]
         strong_lab_fail = d[(d["Fail"]) & (d[lab_col].notna()) & (d[lab_col] >= d[lab_col].quantile(0.75))]
         with c1:
             st.metric("Fail with Strong Lab", int(strong_lab_fail.shape[0]))
@@ -208,7 +286,7 @@ def executive_view(d):
         with c1:
             st.metric("Fail with Strong Lab", "—")
-    # Pattern: Inconsistent high SD
     if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
         top_incons = d["Consistency_SD"].quantile(0.90)
         with c2:
@@ -217,7 +295,7 @@ def executive_view(d):
         with c2:
             st.metric("High Inconsistency (Top 10%)", "—")
-    # Pattern: Fail with good Total (if Total exists)
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
         good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
         with c3:
@@ -226,97 +304,115 @@ def executive_view(d):
         with c3:
             st.metric("Fail with High Total", "—")
-    if component_cols and "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
         st.subheader("What Drives Total? (Correlation)")
-        corr_cols = [c for c in component_cols if c in d.columns] + ["Total"]
         corr = d[corr_cols].corr(numeric_only=True)
         fig = px.imshow(corr, text_auto=True, aspect="auto")
         st.plotly_chart(fig, use_container_width=True)
-def risk_view(d):
-    st.subheader("Fail List (Grade < C)")
     fails = d[d["Fail"]].copy()
-    # Bucket: C- vs D/F etc.
     fails["FailType"] = np.where(fails["Grade"].str.startswith("C-"), "C- (Borderline Fail)", "Below C")
     bucket = fails["FailType"].value_counts().reset_index()
     bucket.columns = ["Fail Type", "Count"]
     c1, c2 = st.columns([1, 2])
     with c1:
         fig = px.bar(bucket, x="Fail Type", y="Count")
         st.plotly_chart(fig, use_container_width=True)
     with c2:
         show_cols = [sno_col, "Grade", "PassFail"]
         for c in ["Total"] + component_cols:
             if c in fails.columns and c not in show_cols:
                 show_cols.append(c)
-        show_cols += ["FailReasonHint"]
-        st.dataframe(fails[show_cols].sort_values(by=["Grade", sno_col]), use_container_width=True, height=420)
     st.subheader("Intervention Suggestions (Management-friendly)")
     st.markdown(
         """
-- **Many C- failures** → run targeted revision + re-assessment readiness support (borderline group).
-- **Failures concentrated with low Final** → strengthen exam preparation (mock exams + feedback).
-- **Failures with strong Lab** → review exam alignment, study strategy, and assessment balance.
 """
     )
-def assessment_quality_view(d):
     st.subheader("Assessment Component Overview")
-    if not component_cols:
-        st.warning("No numeric component columns detected. Add columns like Test/Mid/Lab/Final/Total for deeper assessment analysis.")
         return
-    # Component distributions
-    comp = st.selectbox("Choose component", component_cols, index=min(0, len(component_cols)-1))
     fig = px.histogram(d, x=comp, nbins=20)
     st.plotly_chart(fig, use_container_width=True)
-    # Component vs Grade
     st.subheader("Component vs Grade (Boxplot)")
     fig = px.box(d, x="Grade", y=comp)
     st.plotly_chart(fig, use_container_width=True)
-    # Zero / missing checks
     st.subheader("Data Quality Flags")
     flags = []
-    for c in component_cols:
         series = d[c]
-        if pd.api.types.is_numeric_dtype(series):
-            missing = int(series.isna().sum())
-            zeros = int((series == 0).sum())
-            flags.append({"Component": c, "Missing": missing, "Zeros": zeros})
     st.dataframe(pd.DataFrame(flags), use_container_width=True)
-    # If Total exists: correlation heatmap
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
         st.subheader("Correlation Heatmap")
-        corr_cols = [c for c in component_cols if c in d.columns] + ["Total"]
         corr = d[corr_cols].corr(numeric_only=True)
         fig = px.imshow(corr, text_auto=True, aspect="auto")
         st.plotly_chart(fig, use_container_width=True)
-def student_drilldown_view(d):
     st.subheader("Student Drill-down")
-    st.caption("Pick a student to view component breakdown and the grade-based decision.")
     sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
     row = d[d[sno_col] == sid].iloc[0]
     c1, c2, c3 = st.columns(3)
-    with c1: st.metric("Grade", str(row.get("Grade", "—")))
-    with c2: st.metric("Status", str(row.get("PassFail", "—")))
     with c3:
         if "Total" in d.columns and pd.notna(row.get("Total", np.nan)):
             st.metric("Total", f"{row['Total']:.2f}")
         else:
             st.metric("Total", "—")
-    st.write("**Reason (simple hint):**", row.get("FailReasonHint", ""))
-    # Component bar
-    if component_cols:
-        comp_vals = {c: row.get(c) for c in component_cols if c in d.columns}
         comp_df = pd.DataFrame({"Component": list(comp_vals.keys()), "Score": list(comp_vals.values())})
         fig = px.bar(comp_df, x="Component", y="Score")
         st.plotly_chart(fig, use_container_width=True)
@@ -324,19 +420,28 @@ def student_drilldown_view(d):
     st.subheader("Raw record")
     st.dataframe(pd.DataFrame(row).T, use_container_width=True)
-def export_view(d):
     st.subheader("Export for Power BI")
-    st.caption("Download cleaned data with the computed PassFail fields. Load into Power BI (Get Data → Text/CSV).")
     clean_csv = d.to_csv(index=False).encode("utf-8")
-    st.download_button("⬇️ Download Cleaned Data (CSV)", clean_csv, file_name="cleaned_marks_with_passfail.csv", mime="text/csv")
     st.subheader("Recommended Power BI Measures (DAX)")
-    st.code(r"""
 Pass Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Pass")
 Fail Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Fail")
 Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
-""", language="text")
     st.subheader("Summary Tables")
     grade_summary = d["Grade"].value_counts(dropna=False).reset_index()
@@ -347,7 +452,8 @@ Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
     pf_summary.columns = ["PassFail", "Count"]
     st.dataframe(pf_summary, use_container_width=True)
-# Render selected view
 if view == "Executive (Management)":
     executive_view(filtered)
 elif view == "Risk & Intervention":

+# app.py (FULL REPLACEMENT - HF/Streamlit safe upload + grade>=C pass logic)
 import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
+import io
 st.set_page_config(page_title="Excel → Management Insights (Power BI style)", layout="wide")
 st.title("📊 Excel → Interactive Management Dashboard (Power BI style)")
+st.caption(
+    "Decision rule: **PASS if Grade ≥ C (C, C+, B-, etc.)** and **FAIL if below C (C-, D, F, etc.)**. "
+    "This dashboard uses the **Grade column only** for pass/fail."
+)
 # -----------------------------
+# Grade logic (FINAL as per user)
 # -----------------------------
 def grade_pass_fail(g):
     if pd.isna(g):
     return "Unknown"
 def normalize_headers(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
     df.columns = [str(c).strip() for c in df.columns]
     return df
+def pick_grade_column(df: pd.DataFrame) -> str:
+    # Prefer columns containing "grade"; otherwise last column
+    candidates = [c for c in df.columns if "grade" in str(c).lower()]
+    return candidates[-1] if candidates else df.columns[-1]
 def coerce_numeric(df: pd.DataFrame, cols):
     for c in cols:
         if c in df.columns:
             df[c] = pd.to_numeric(df[c], errors="coerce")
     return df
+@st.cache_data(show_spinner=False)
+def list_sheets(file_bytes: bytes):
+    bio = io.BytesIO(file_bytes)
+    xls = pd.ExcelFile(bio)
+    return xls.sheet_names
+@st.cache_data(show_spinner=False)
+def read_excel_sheet(file_bytes: bytes, sheet_name: str):
+    bio = io.BytesIO(file_bytes)
+    df = pd.read_excel(bio, sheet_name=sheet_name)
+    return df
+def detect_student_rows(df: pd.DataFrame, grade_col: str) -> pd.DataFrame:
+    """
+    Removes non-student rows robustly:
+    Keeps rows that look like grade entries (A, B+, C-, etc.) OR have numeric marks in other cols.
+    """
+    tmp_grade = df[grade_col].astype(str).str.strip()
+    grade_like = tmp_grade.str.match(r"^[A-Fa-f][\+\-]?$", na=False)
+    other_cols = [c for c in df.columns if c != grade_col]
+    numeric_signal = df[other_cols].apply(pd.to_numeric, errors="coerce").notna().sum(axis=1) > 0
+    cleaned = df[grade_like | numeric_signal].copy()
+    return cleaned
+def ensure_sno(df: pd.DataFrame) -> tuple[pd.DataFrame, str]:
+    sno_col = None
+    for c in df.columns:
+        if str(c).strip().lower() in ["sno", "sno.", "sr", "sr.", "id", "studentid", "student id"]:
+            sno_col = c
+            break
+    if sno_col is None:
+        df = df.copy()
+        df.insert(0, "Sno", range(1, len(df) + 1))
+        sno_col = "Sno"
+    return df, sno_col
+def infer_component_cols(df: pd.DataFrame, grade_col: str, sno_col: str) -> list[str]:
+    common = [
+        "Test -1", "Test-1", "Test 1", "Test",
+        "Mid Exam", "Mid", "Midterm",
+        "Lab Total", "Lab",
+        "Final Exam", "Final",
+        "Total"
+    ]
+    component_cols = [c for c in df.columns if c in common and c not in [grade_col, sno_col]]
+    if not component_cols:
+        # fallback: numeric columns other than sno and grade
+        numeric_cols = []
+        for c in df.columns:
+            if c in [grade_col, sno_col]:
+                continue
+            s = pd.to_numeric(df[c], errors="coerce")
+            if s.notna().mean() > 0.4:
+                numeric_cols.append(c)
+        component_cols = numeric_cols
+    # Keep ordering friendly
+    preferred_order = ["Test -1", "Test-1", "Test 1", "Test", "Mid Exam", "Mid", "Midterm", "Lab Total", "Lab", "Final Exam", "Final", "Total"]
+    ordered = [c for c in preferred_order if c in component_cols]
+    for c in component_cols:
+        if c not in ordered:
+            ordered.append(c)
+    return ordered
+def add_consistency(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
+    df = df.copy()
+    cols_for_sd = [c for c in component_cols if c.lower() != "total"]
+    if len(cols_for_sd) >= 2:
+        df["Consistency_SD"] = df[cols_for_sd].std(axis=1, skipna=True)
+    else:
+        df["Consistency_SD"] = np.nan
+    return df
+def make_fail_reason_hints(df: pd.DataFrame, component_cols: list[str]) -> pd.DataFrame:
+    """
+    Simple, management-friendly hints (NOT used for pass/fail).
+    """
+    df = df.copy()
+    comps = [c for c in component_cols if c.lower() != "total" and pd.api.types.is_numeric_dtype(df[c])]
+    if not comps:
+        df["FailReasonHint"] = np.where(df["PassFail"] == "Fail", "Grade below C.", "")
+        return df
+    # Precompute quartiles safely
+    q25 = {c: df[c].dropna().quantile(0.25) if df[c].dropna().shape[0] else np.nan for c in comps}
+    def reason(row):
+        if row.get("PassFail") != "Fail":
+            return ""
+        hints = []
+        for c in comps:
+            v = row.get(c)
+            if pd.notna(v) and pd.notna(q25[c]) and v < q25[c]:
+                if "final" in c.lower():
+                    hints.append("Final exam is in the lower quartile")
+                elif "lab" in c.lower():
+                    hints.append("Lab total is in the lower quartile")
+                elif "mid" in c.lower():
+                    hints.append("Mid exam is in the lower quartile")
+                elif "test" in c.lower():
+                    hints.append("Test score is in the lower quartile")
+                else:
+                    hints.append(f"{c} is in the lower quartile")
+        return " | ".join(hints) if hints else "Grade below C (review support plan)."
+    df["FailReasonHint"] = df.apply(reason, axis=1)
+    return df
 # -----------------------------
+# Upload + Read (HF SAFE)
 # -----------------------------
 uploaded = st.file_uploader("Upload Excel (.xlsx)", type=["xlsx"])
     st.info("Upload an Excel file to begin.")
     st.stop()
+file_bytes = uploaded.getvalue()
+sheet_names = list_sheets(file_bytes)
+sheet = st.selectbox("Select sheet", sheet_names, index=0)
+raw = read_excel_sheet(file_bytes, sheet)
+raw = normalize_headers(raw)
+grade_col_name = pick_grade_column(raw)
+df = detect_student_rows(raw, grade_col_name)
+df, sno_col = ensure_sno(df)
+# Grade column from chosen grade column name (fallback to last column already handled)
 df["Grade"] = df[grade_col_name].astype(str).str.strip().str.upper()
 df["PassFail"] = df["Grade"].apply(grade_pass_fail)
 df["Pass"] = df["PassFail"].eq("Pass")
 df["Fail"] = df["PassFail"].eq("Fail")
+df["At_Risk"] = df["Fail"]
+# Components (optional for insights)
+component_cols = infer_component_cols(df, grade_col_name, sno_col)
 df = coerce_numeric(df, component_cols)
+df = add_consistency(df, component_cols)
+df = make_fail_reason_hints(df, component_cols)
 # -----------------------------
+# Sidebar: "Power BI pages"
 # -----------------------------
 st.sidebar.header("Perspective")
 view = st.sidebar.radio(
 )
 st.sidebar.header("Filters")
+pf_choices = ["Pass", "Fail", "Unknown"]
+pf = st.sidebar.multiselect("Pass/Fail", pf_choices, default=pf_choices)
 grade_unique = sorted([g for g in df["Grade"].dropna().unique()])
 sel_grades = st.sidebar.multiselect("Grades", grade_unique, default=grade_unique)
 # KPI Row
 # -----------------------------
 k1, k2, k3, k4, k5 = st.columns(5)
+with k1:
+    st.metric("Students", int(filtered.shape[0]))
+with k2:
+    st.metric("Pass", int(filtered["Pass"].sum()))
+with k3:
+    st.metric("Fail", int(filtered["Fail"].sum()))
 with k4:
     pr = (filtered["Pass"].mean() * 100) if filtered.shape[0] else 0
     st.metric("Pass Rate", f"{pr:.1f}%")
 # -----------------------------
 # Views
 # -----------------------------
+def executive_view(d: pd.DataFrame):
     left, right = st.columns([1, 1])
     with left:
     st.subheader("Hidden Patterns (Quick Signals)")
     c1, c2, c3 = st.columns(3)
+    # Strong Lab but Fail (if any lab-like col exists)
+    lab_candidates = [c for c in component_cols if "lab" in c.lower() and c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
+    if lab_candidates:
+        lab_col = lab_candidates[0]
         strong_lab_fail = d[(d["Fail"]) & (d[lab_col].notna()) & (d[lab_col] >= d[lab_col].quantile(0.75))]
         with c1:
             st.metric("Fail with Strong Lab", int(strong_lab_fail.shape[0]))
         with c1:
             st.metric("Fail with Strong Lab", "—")
+    # High inconsistency
     if "Consistency_SD" in d.columns and d["Consistency_SD"].notna().any():
         top_incons = d["Consistency_SD"].quantile(0.90)
         with c2:
         with c2:
             st.metric("High Inconsistency (Top 10%)", "—")
+    # Fail with high Total (if Total exists)
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and d["Total"].notna().any():
         good_total_fail = d[(d["Fail"]) & (d["Total"] >= d["Total"].quantile(0.75))]
         with c3:
         with c3:
             st.metric("Fail with High Total", "—")
+    # Correlation only if Total + numeric components exist
+    numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c])]
+    if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]) and numeric_comps:
         st.subheader("What Drives Total? (Correlation)")
+        corr_cols = numeric_comps + ["Total"]
         corr = d[corr_cols].corr(numeric_only=True)
         fig = px.imshow(corr, text_auto=True, aspect="auto")
         st.plotly_chart(fig, use_container_width=True)
+def risk_view(d: pd.DataFrame):
+    st.subheader("Fail List (Grade below C)")
     fails = d[d["Fail"]].copy()
+    if fails.empty:
+        st.success("No failing students in the current filter.")
+        return
     fails["FailType"] = np.where(fails["Grade"].str.startswith("C-"), "C- (Borderline Fail)", "Below C")
     bucket = fails["FailType"].value_counts().reset_index()
     bucket.columns = ["Fail Type", "Count"]
     c1, c2 = st.columns([1, 2])
     with c1:
         fig = px.bar(bucket, x="Fail Type", y="Count")
         st.plotly_chart(fig, use_container_width=True)
     with c2:
         show_cols = [sno_col, "Grade", "PassFail"]
         for c in ["Total"] + component_cols:
             if c in fails.columns and c not in show_cols:
                 show_cols.append(c)
+        show_cols.append("FailReasonHint")
+        st.dataframe(
+            fails[show_cols].sort_values(by=["Grade", sno_col]),
+            use_container_width=True,
+            height=420
+        )
     st.subheader("Intervention Suggestions (Management-friendly)")
     st.markdown(
         """
+- **Many C- failures** → target borderline support (revision plan + short formative checks).
+- **Failures linked with low Final** → structured exam-prep support (mock tests + feedback).
+- **Failures with strong Lab** → review exam alignment + study strategy support.
 """
     )
+def assessment_quality_view(d: pd.DataFrame):
     st.subheader("Assessment Component Overview")
+    numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
+    if not numeric_comps:
+        st.warning("No numeric component columns detected for assessment analysis. Add Test/Mid/Lab/Final columns for deeper analysis.")
         return
+    comp = st.selectbox("Choose component", numeric_comps, index=0)
     fig = px.histogram(d, x=comp, nbins=20)
     st.plotly_chart(fig, use_container_width=True)
     st.subheader("Component vs Grade (Boxplot)")
     fig = px.box(d, x="Grade", y=comp)
     st.plotly_chart(fig, use_container_width=True)
     st.subheader("Data Quality Flags")
     flags = []
+    for c in numeric_comps:
         series = d[c]
+        missing = int(series.isna().sum())
+        zeros = int((series == 0).sum())
+        flags.append({"Component": c, "Missing": missing, "Zeros": zeros})
     st.dataframe(pd.DataFrame(flags), use_container_width=True)
     if "Total" in d.columns and pd.api.types.is_numeric_dtype(d["Total"]):
         st.subheader("Correlation Heatmap")
+        corr_cols = numeric_comps + ["Total"]
         corr = d[corr_cols].corr(numeric_only=True)
         fig = px.imshow(corr, text_auto=True, aspect="auto")
         st.plotly_chart(fig, use_container_width=True)
+def student_drilldown_view(d: pd.DataFrame):
     st.subheader("Student Drill-down")
+    st.caption("Pick a student to view component breakdown and the grade-based decision (Grade ≥ C pass).")
     sid = st.selectbox("Select student (Sno)", sorted(d[sno_col].unique()))
     row = d[d[sno_col] == sid].iloc[0]
     c1, c2, c3 = st.columns(3)
+    with c1:
+        st.metric("Grade", str(row.get("Grade", "—")))
+    with c2:
+        st.metric("Status", str(row.get("PassFail", "—")))
     with c3:
         if "Total" in d.columns and pd.notna(row.get("Total", np.nan)):
             st.metric("Total", f"{row['Total']:.2f}")
         else:
             st.metric("Total", "—")
+    hint = row.get("FailReasonHint", "")
+    if hint:
+        st.write("**Reason hint:**", hint)
+    numeric_comps = [c for c in component_cols if c in d.columns and pd.api.types.is_numeric_dtype(d[c]) and c.lower() != "total"]
+    if numeric_comps:
+        comp_vals = {c: row.get(c) for c in numeric_comps}
         comp_df = pd.DataFrame({"Component": list(comp_vals.keys()), "Score": list(comp_vals.values())})
         fig = px.bar(comp_df, x="Component", y="Score")
         st.plotly_chart(fig, use_container_width=True)
     st.subheader("Raw record")
     st.dataframe(pd.DataFrame(row).T, use_container_width=True)
+def export_view(d: pd.DataFrame):
     st.subheader("Export for Power BI")
+    st.caption("Download cleaned data with computed PassFail fields. Load into Power BI (Get Data → Text/CSV).")
     clean_csv = d.to_csv(index=False).encode("utf-8")
+    st.download_button(
+        "⬇️ Download Cleaned Data (CSV)",
+        clean_csv,
+        file_name="cleaned_marks_with_passfail.csv",
+        mime="text/csv"
+    )
     st.subheader("Recommended Power BI Measures (DAX)")
+    st.code(
+        r"""
 Pass Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Pass")
 Fail Count = CALCULATE(COUNTROWS(cleaned_marks), cleaned_marks[PassFail] = "Fail")
 Pass Rate % = DIVIDE([Pass Count], COUNTROWS(cleaned_marks))
+""",
+        language="text",
+    )
     st.subheader("Summary Tables")
     grade_summary = d["Grade"].value_counts(dropna=False).reset_index()
     pf_summary.columns = ["PassFail", "Count"]
     st.dataframe(pf_summary, use_container_width=True)
+# Render view
 if view == "Executive (Management)":
     executive_view(filtered)
 elif view == "Risk & Intervention":