Spaces:

SanthiSastra
/

Feb18

Sleeping

App Files Files Community

SanthiSastra commited on Feb 18

Commit

ea148ac

verified ·

1 Parent(s): 6e976d9

Update app.py

Browse files

Files changed (1) hide show

app.py +362 -0

app.py CHANGED Viewed

	@@ -0,0 +1,362 @@

+# app.py — Student Mark Analysis (HF Spaces + Gradio)
+# Upload a CSV with columns: RegNo, Name, Subject1, Subject2, ...
+# Outputs: per-student totals/average/rank/remark, top lists, subject stats, charts, and downloadable CSV.
+import io
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import gradio as gr
+def _safe_numeric(df: pd.DataFrame, cols):
+    """Convert columns to numeric; invalid -> NaN."""
+    out = df.copy()
+    for c in cols:
+        out[c] = pd.to_numeric(out[c], errors="coerce")
+    return out
+def _compute_analysis(
+    df: pd.DataFrame,
+    pass_mark: int,
+    top2_pct: float,
+    top10_pct: float,
+    sort_order: str,
+    regno_search: str,
+    name_search: str,
+    fail_filter: str,
+    selected_subject: str,
+    topk_subject: int,
+):
+    if df is None or df.empty:
+        raise gr.Error("Uploaded file is empty. Please upload a valid CSV.")
+    # Basic required columns
+    required = {"RegNo", "Name"}
+    if not required.issubset(set(df.columns)):
+        raise gr.Error("CSV must contain at least these columns: RegNo, Name")
+    # Detect subject columns: everything except RegNo/Name
+    base_cols = ["RegNo", "Name"]
+    subject_cols = [c for c in df.columns if c not in base_cols]
+    if len(subject_cols) == 0:
+        raise gr.Error("No subject columns found. Add subject mark columns after RegNo and Name.")
+    # Numeric conversion for subject marks
+    df2 = df.copy()
+    df2["RegNo"] = df2["RegNo"].astype(str).str.strip()
+    df2["Name"] = df2["Name"].astype(str).str.strip()
+    df2 = _safe_numeric(df2, subject_cols)
+    # If any subject column is entirely NaN -> likely wrong format
+    all_nan_cols = [c for c in subject_cols if df2[c].isna().all()]
+    if all_nan_cols:
+        raise gr.Error(f"These subject columns have no valid numeric marks: {all_nan_cols}")
+    # Per-student metrics
+    df2["Total"] = df2[subject_cols].sum(axis=1, skipna=False)
+    df2["Average"] = df2[subject_cols].mean(axis=1, skipna=False)
+    # Fail count (arrears)
+    df2["Fail_Count"] = (df2[subject_cols] < pass_mark).sum(axis=1)
+    # Remark
+    def remark(row):
+        fc = int(row["Fail_Count"])
+        if np.isnan(row["Total"]) or np.isnan(row["Average"]):
+            return "Invalid Marks"
+        if fc == 0:
+            return "Pass"
+        return f"Arrear: {fc}"
+    df2["Remark"] = df2.apply(remark, axis=1)
+    # Rank (only for valid totals)
+    valid_mask = df2["Total"].notna()
+    # Higher total => better rank (1 is best)
+    df2.loc[valid_mask, "Rank"] = df2.loc[valid_mask, "Total"].rank(ascending=False, method="min").astype(int)
+    df2.loc[~valid_mask, "Rank"] = np.nan
+    # Class stats
+    n_students = len(df2)
+    pass_count = int((df2["Fail_Count"] == 0).sum())
+    fail_any_count = int((df2["Fail_Count"] > 0).sum())
+    # Top lists (based on Total)
+    df_ranked = df2[valid_mask].sort_values("Total", ascending=False).copy()
+    n_valid = len(df_ranked)
+    def top_n_by_pct(pct: float) -> int:
+        # Always at least 1 if data exists
+        if n_valid == 0:
+            return 0
+        return max(1, int(np.ceil((pct / 100.0) * n_valid)))
+    top2_n = top_n_by_pct(top2_pct)
+    top10_n = top_n_by_pct(top10_pct)
+    top2_df = df_ranked.head(top2_n)[["RegNo", "Name", "Total", "Average", "Rank", "Fail_Count", "Remark"]]
+    top10_df = df_ranked.head(top10_n)[["RegNo", "Name", "Total", "Average", "Rank", "Fail_Count", "Remark"]]
+    # Subject averages
+    subject_avg = df2[subject_cols].mean(axis=0, skipna=True).sort_values(ascending=False)
+    least_subject = subject_avg.idxmin()
+    least_subject_avg = float(subject_avg.min())
+    # Top-K per selected subject
+    if selected_subject not in subject_cols:
+        selected_subject = subject_cols[0]
+    topk_sub_df = (
+        df2[["RegNo", "Name", selected_subject]]
+        .dropna(subset=[selected_subject])
+        .sort_values(selected_subject, ascending=False)
+        .head(int(topk_subject))
+        .rename(columns={selected_subject: "Mark"})
+    )
+    # Filtering
+    filtered = df2.copy()
+    if regno_search.strip():
+        key = regno_search.strip()
+        filtered = filtered[filtered["RegNo"].str.contains(key, case=False, na=False)]
+    if name_search.strip():
+        key = name_search.strip()
+        filtered = filtered[filtered["Name"].str.contains(key, case=False, na=False)]
+    if fail_filter != "All":
+        if fail_filter == "Pass only (Fail_Count = 0)":
+            filtered = filtered[filtered["Fail_Count"] == 0]
+        elif fail_filter == "Arrear only (Fail_Count >= 1)":
+            filtered = filtered[filtered["Fail_Count"] >= 1]
+        else:
+            # "Fail_Count = k"
+            try:
+                k = int(fail_filter.split("=")[-1].strip())
+                filtered = filtered[filtered["Fail_Count"] == k]
+            except Exception:
+                pass
+    # Sorting
+    if sort_order == "Rank (Best first)":
+        filtered = filtered.sort_values(["Rank", "Total"], ascending=[True, False], na_position="last")
+    elif sort_order == "Total (High to Low)":
+        filtered = filtered.sort_values("Total", ascending=False, na_position="last")
+    elif sort_order == "Total (Low to High)":
+        filtered = filtered.sort_values("Total", ascending=True, na_position="last")
+    elif sort_order == "Name (A to Z)":
+        filtered = filtered.sort_values("Name", ascending=True, na_position="last")
+    # Output table columns
+    out_cols = ["RegNo", "Name"] + subject_cols + ["Total", "Average", "Rank", "Fail_Count", "Remark"]
+    filtered_out = filtered[out_cols].copy()
+    # Summary text
+    summary_lines = [
+        f"Students: {n_students} (Valid totals: {n_valid})",
+        f"Pass (Fail_Count=0): {pass_count}",
+        f"Arrear (Fail_Count>=1): {fail_any_count}",
+        f"Pass %: {((pass_count / n_students) * 100.0):.2f}%" if n_students else "Pass %: N/A",
+        f"Top {top2_pct:.1f}% count: {top2_n}",
+        f"Top {top10_pct:.1f}% count: {top10_n}",
+        f"Least average subject: {least_subject} (Avg = {least_subject_avg:.2f})",
+    ]
+    summary = "\n".join(summary_lines)
+    # Charts
+    # 1) Subject average bar chart
+    fig1 = plt.figure()
+    plt.bar(subject_avg.index.astype(str), subject_avg.values)
+    plt.xticks(rotation=45, ha="right")
+    plt.ylabel("Average Mark")
+    plt.title("Subject-wise Average")
+    plt.tight_layout()
+    # 2) Fail count distribution
+    fig2 = plt.figure()
+    vc = df2["Fail_Count"].value_counts().sort_index()
+    plt.bar(vc.index.astype(str), vc.values)
+    plt.xlabel("Fail_Count (No. of subjects below pass mark)")
+    plt.ylabel("Number of students")
+    plt.title("Arrear Distribution")
+    plt.tight_layout()
+    # Downloadable CSV (filtered output)
+    csv_bytes = filtered_out.to_csv(index=False).encode("utf-8")
+    download_file = ("student_mark_analysis.csv", csv_bytes)
+    # Tables: keep concise for display
+    subject_avg_table = pd.DataFrame({"Subject": subject_avg.index, "Average": subject_avg.values})
+    subject_avg_table["Average"] = subject_avg_table["Average"].round(2)
+    return (
+        summary,
+        filtered_out,
+        top2_df,
+        top10_df,
+        subject_avg_table,
+        topk_sub_df,
+        fig1,
+        fig2,
+        download_file,
+        subject_cols,
+    )
+def analyze(
+    file_obj,
+    pass_mark,
+    top2_pct,
+    top10_pct,
+    sort_order,
+    regno_search,
+    name_search,
+    fail_filter,
+    selected_subject,
+    topk_subject,
+):
+    if file_obj is None:
+        raise gr.Error("Please upload a CSV file.")
+    # Read CSV
+    try:
+        df = pd.read_csv(file_obj.name)
+    except Exception:
+        # sometimes HF gives bytes-like
+        file_obj.seek(0)
+        df = pd.read_csv(file_obj)
+    return _compute_analysis(
+        df=df,
+        pass_mark=pass_mark,
+        top2_pct=top2_pct,
+        top10_pct=top10_pct,
+        sort_order=sort_order,
+        regno_search=regno_search,
+        name_search=name_search,
+        fail_filter=fail_filter,
+        selected_subject=selected_subject,
+        topk_subject=topk_subject,
+    )
+def update_subject_dropdown(file_obj):
+    if file_obj is None:
+        return gr.Dropdown(choices=[], value=None)
+    try:
+        df = pd.read_csv(file_obj.name)
+    except Exception:
+        file_obj.seek(0)
+        df = pd.read_csv(file_obj)
+    if not {"RegNo", "Name"}.issubset(set(df.columns)):
+        return gr.Dropdown(choices=[], value=None)
+    subject_cols = [c for c in df.columns if c not in ["RegNo", "Name"]]
+    value = subject_cols[0] if subject_cols else None
+    return gr.Dropdown(choices=subject_cols, value=value)
+with gr.Blocks(title="Student Mark Analysis") as demo:
+    gr.Markdown(
+        """
+# 📊 Student Mark Analysis (CSV → Report)
+**CSV format:** `RegNo, Name, Subject1, Subject2, ...`
+Example: `RegNo,Name,Tamil,English,Maths,Science,Social`
+"""
+    )
+    with gr.Row():
+        file_in = gr.File(label="Upload CSV", file_types=[".csv"])
+        with gr.Column():
+            pass_mark = gr.Slider(0, 100, value=50, step=1, label="Pass mark (per subject)")
+            top2_pct = gr.Slider(0.5, 20, value=2.0, step=0.5, label="Top % (List-1)")
+            top10_pct = gr.Slider(1, 50, value=10.0, step=1, label="Top % (List-2)")
+    with gr.Row():
+        sort_order = gr.Dropdown(
+            choices=["Rank (Best first)", "Total (High to Low)", "Total (Low to High)", "Name (A to Z)"],
+            value="Rank (Best first)",
+            label="Sort result table",
+        )
+        fail_filter = gr.Dropdown(
+            choices=[
+                "All",
+                "Pass only (Fail_Count = 0)",
+                "Arrear only (Fail_Count >= 1)",
+                "Fail_Count = 1",
+                "Fail_Count = 2",
+                "Fail_Count = 3",
+                "Fail_Count = 4",
+                "Fail_Count = 5",
+            ],
+            value="All",
+            label="Filter by arrears",
+        )
+    with gr.Row():
+        regno_search = gr.Textbox(label="Search by RegNo (contains)", placeholder="e.g., 2026")
+        name_search = gr.Textbox(label="Search by Name (contains)", placeholder="e.g., Priya")
+    with gr.Row():
+        selected_subject = gr.Dropdown(choices=[], label="Choose a subject (Top-K in subject)", value=None)
+        topk_subject = gr.Slider(1, 20, value=3, step=1, label="Top-K students in selected subject")
+    analyze_btn = gr.Button("Generate Analysis", variant="primary")
+    summary = gr.Textbox(label="Class Summary", lines=8)
+    gr.Markdown("## ✅ Student-wise Result Table")
+    result_table = gr.Dataframe(interactive=False, wrap=True)
+    with gr.Row():
+        top2_table = gr.Dataframe(interactive=False, label="Top % (List-1)")
+        top10_table = gr.Dataframe(interactive=False, label="Top % (List-2)")
+    with gr.Row():
+        subj_avg_table = gr.Dataframe(interactive=False, label="Subject Averages")
+        topk_sub_table = gr.Dataframe(interactive=False, label="Top-K in Selected Subject")
+    with gr.Row():
+        chart1 = gr.Plot(label="Subject-wise Average (Bar Chart)")
+        chart2 = gr.Plot(label="Arrear Distribution (Fail_Count)")
+    download = gr.File(label="Download filtered report (CSV)")
+    # Update subject dropdown when a file is uploaded
+    file_in.change(fn=update_subject_dropdown, inputs=[file_in], outputs=[selected_subject])
+    analyze_btn.click(
+        fn=analyze,
+        inputs=[
+            file_in,
+            pass_mark,
+            top2_pct,
+            top10_pct,
+            sort_order,
+            regno_search,
+            name_search,
+            fail_filter,
+            selected_subject,
+            topk_subject,
+        ],
+        outputs=[
+            summary,
+            result_table,
+            top2_table,
+            top10_table,
+            subj_avg_table,
+            topk_sub_table,
+            chart1,
+            chart2,
+            download,
+            selected_subject,  # refresh list too
+        ],
+    )
+demo.launch()