# app.py (Fast-build Hugging Face Gradio) # School Mark Analysis: RegNo, Name, Tamil, English, Maths, Science, Social # Features: # - Total, Average, Rank, Remark # - Subject-wise average + bar chart # - Fail-count distribution (failed in 1..5 subjects) # - Top-3 overall, Top-3 in each subject # - Search by RegNo # - Download final result CSV # - Logo display (reliable on Hugging Face) using gr.Image + Pillow import os import tempfile import numpy as np import pandas as pd import gradio as gr import matplotlib.pyplot as plt from PIL import Image SUBJECTS_DEFAULT = ["Tamil", "English", "Maths", "Science", "Social"] ID_COL_DEFAULT = "RegNo" NAME_COL_DEFAULT = "Name" # ----------------------------- # Utilities # ----------------------------- def _clean_columns(df: pd.DataFrame) -> pd.DataFrame: df = df.copy() df.columns = [c.strip() for c in df.columns] return df def _validate_and_prepare(df: pd.DataFrame, id_col: str, name_col: str, subjects: list[str]) -> pd.DataFrame: df = _clean_columns(df) missing = [c for c in [id_col, name_col] + subjects if c not in df.columns] if missing: raise ValueError(f"Missing required columns: {missing}") # Ensure subject columns are numeric for s in subjects: df[s] = pd.to_numeric(df[s], errors="coerce") # Basic sanity if df[subjects].isna().all(axis=None): raise ValueError("All subject columns became NaN after numeric conversion. Check CSV data.") return df def _remark(avg: float, failed_subjects: int) -> str: if failed_subjects > 0: return "Fail" if avg >= 80: return "Distinction" if 60 <= avg <= 79: return "First Class" if 35 <= avg <= 59: return "Second Class" return "Pass" # ----------------------------- # Core computations # ----------------------------- def compute_marks( df: pd.DataFrame, pass_mark: int = 35, id_col: str = ID_COL_DEFAULT, name_col: str = NAME_COL_DEFAULT, subjects: list[str] = SUBJECTS_DEFAULT, ): df = _validate_and_prepare(df, id_col, name_col, subjects) out = df.copy() out["Total"] = out[subjects].sum(axis=1) out["Average"] = out[subjects].mean(axis=1) out["Failed_Subjects"] = (out[subjects] < pass_mark).sum(axis=1) out["Remark"] = out.apply(lambda r: _remark(float(r["Average"]), int(r["Failed_Subjects"])), axis=1) # Rank by Total (descending), ties get same minimum rank out["Rank"] = out["Total"].rank(method="min", ascending=False).astype(int) out = out.sort_values(["Rank", id_col], ascending=[True, True]).reset_index(drop=True) # Subject-wise average subj_avg = pd.DataFrame({ "Subject": subjects, "Class_Average": [float(out[s].mean()) for s in subjects] }) # Fail distribution (1..5) fail_dist = pd.DataFrame({ "Failed_Subjects": [1, 2, 3, 4, 5], "Student_Count": [int((out["Failed_Subjects"] == k).sum()) for k in [1, 2, 3, 4, 5]] }) # Top 3 overall top3_overall_cols = [id_col, name_col, "Total", "Average", "Rank", "Remark"] top3_overall = out.nsmallest(3, "Rank")[top3_overall_cols] # Top 3 per subject rows = [] for s in subjects: t = out.sort_values(s, ascending=False).head(3)[[id_col, name_col, s, "Total", "Average", "Rank", "Remark"]].copy() t.insert(0, "Subject", s) t.rename(columns={s: "Subject_Mark"}, inplace=True) rows.append(t) top3_each_subject = pd.concat(rows, ignore_index=True) if rows else pd.DataFrame() # Summary total_students = out.shape[0] pass_count = int((out["Failed_Subjects"] == 0).sum()) fail_count = total_students - pass_count dist_count = int((out["Remark"] == "Distinction").sum()) first_count = int((out["Remark"] == "First Class").sum()) second_count = int((out["Remark"] == "Second Class").sum()) summary = ( f"Students: {total_students} | Pass: {pass_count} | Fail: {fail_count} | " f"Distinction: {dist_count} | First Class: {first_count} | Second Class: {second_count}" ) return out, subj_avg, fail_dist, top3_overall, top3_each_subject, summary # ----------------------------- # Plots (close figs to avoid memory growth) # ----------------------------- def plot_subject_avg(subj_avg: pd.DataFrame): fig, ax = plt.subplots(figsize=(7, 4)) ax.bar(subj_avg["Subject"], subj_avg["Class_Average"]) ax.set_title("Subject-wise Class Average") ax.set_xlabel("Subject") ax.set_ylabel("Average Marks") ax.set_ylim(0, 100) plt.xticks(rotation=25, ha="right") plt.tight_layout() plt.close(fig) return fig def plot_remark_distribution(result_df: pd.DataFrame): order = ["Distinction", "First Class", "Second Class", "Fail", "Pass"] counts = result_df["Remark"].value_counts() labels = [x for x in order if x in counts.index] values = [int(counts[x]) for x in labels] fig, ax = plt.subplots(figsize=(7, 4)) ax.bar(labels, values) ax.set_title("Remark Distribution") ax.set_xlabel("Remark") ax.set_ylabel("Number of Students") plt.xticks(rotation=20, ha="right") plt.tight_layout() plt.close(fig) return fig # ----------------------------- # Gradio handlers # ----------------------------- def load_csv(file_obj): if file_obj is None: return None, "Please upload a CSV.", None try: df = pd.read_csv(file_obj.name) df = _clean_columns(df) msg = f"Loaded: {os.path.basename(file_obj.name)} | Rows={df.shape[0]} | Cols={df.shape[1]}" return df, msg, df.head(20) except Exception as e: return None, f"Could not read CSV: {e}", None def run_all(df, pass_mark): if df is None: return "Upload a CSV first.", None, None, None, None, None, None, None, None try: result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, summary = compute_marks( df, pass_mark=int(pass_mark) ) fig_avg = plot_subject_avg(subj_avg) fig_remark = plot_remark_distribution(result_df) # Save downloadable CSV out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name result_df.to_csv(out_path, index=False) return summary, result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, fig_avg, fig_remark, out_path except Exception as e: return f"Error: {e}", None, None, None, None, None, None, None, None def search_regno(result_df, regno_value): if result_df is None or isinstance(result_df, str): return "Run analysis first.", pd.DataFrame() if not regno_value: return "Enter RegNo to search.", pd.DataFrame() col = result_df[ID_COL_DEFAULT] if pd.api.types.is_numeric_dtype(col): try: q = float(regno_value) res = result_df[result_df[ID_COL_DEFAULT] == q] except Exception: res = result_df[col.astype(str) == str(regno_value)] else: res = result_df[col.astype(str) == str(regno_value)] if res.empty: return "No matching record found.", pd.DataFrame() return f"Found {len(res)} record(s).", res # ----------------------------- # Logo loader (reliable in HF) # ----------------------------- def load_logo(): for fn in ["logo.jpg", "logo.png", "logo.jpeg", "Logo.jpg", "Logo.png", "Logo.jpeg"]: if os.path.exists(fn): try: return Image.open(fn) except Exception: return None return None # ----------------------------- # UI # ----------------------------- CSS = """ #titleblock {text-align:center; margin-top: 6px; margin-bottom: 8px;} #t1 {font-size:30px; font-weight:800; color:#1E5AA8;} #t2 {font-size:18px; font-weight:800; color:#1E5AA8;} """ with gr.Blocks(css=CSS, title="School Mark Analysis") as demo: # Logo + title logo = load_logo() if logo is not None: gr.Image(value=logo, show_label=False, interactive=False, height=160) gr.HTML("""