Analysis / app.py
SanthiSastra's picture
Update app.py
dc82a53 verified
# app.py (Fast-build Hugging Face Gradio)
# School Mark Analysis: RegNo, Name, Tamil, English, Maths, Science, Social
# Features:
# - Total, Average, Rank, Remark
# - Subject-wise average + bar chart
# - Fail-count distribution (failed in 1..5 subjects)
# - Top-3 overall, Top-3 in each subject
# - Search by RegNo
# - Download final result CSV
# - Logo display (reliable on Hugging Face) using gr.Image + Pillow
import os
import tempfile
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
from PIL import Image
SUBJECTS_DEFAULT = ["Tamil", "English", "Maths", "Science", "Social"]
ID_COL_DEFAULT = "RegNo"
NAME_COL_DEFAULT = "Name"
# -----------------------------
# Utilities
# -----------------------------
def _clean_columns(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
df.columns = [c.strip() for c in df.columns]
return df
def _validate_and_prepare(df: pd.DataFrame, id_col: str, name_col: str, subjects: list[str]) -> pd.DataFrame:
df = _clean_columns(df)
missing = [c for c in [id_col, name_col] + subjects if c not in df.columns]
if missing:
raise ValueError(f"Missing required columns: {missing}")
# Ensure subject columns are numeric
for s in subjects:
df[s] = pd.to_numeric(df[s], errors="coerce")
# Basic sanity
if df[subjects].isna().all(axis=None):
raise ValueError("All subject columns became NaN after numeric conversion. Check CSV data.")
return df
def _remark(avg: float, failed_subjects: int) -> str:
if failed_subjects > 0:
return "Fail"
if avg >= 80:
return "Distinction"
if 60 <= avg <= 79:
return "First Class"
if 35 <= avg <= 59:
return "Second Class"
return "Pass"
# -----------------------------
# Core computations
# -----------------------------
def compute_marks(
df: pd.DataFrame,
pass_mark: int = 35,
id_col: str = ID_COL_DEFAULT,
name_col: str = NAME_COL_DEFAULT,
subjects: list[str] = SUBJECTS_DEFAULT,
):
df = _validate_and_prepare(df, id_col, name_col, subjects)
out = df.copy()
out["Total"] = out[subjects].sum(axis=1)
out["Average"] = out[subjects].mean(axis=1)
out["Failed_Subjects"] = (out[subjects] < pass_mark).sum(axis=1)
out["Remark"] = out.apply(lambda r: _remark(float(r["Average"]), int(r["Failed_Subjects"])), axis=1)
# Rank by Total (descending), ties get same minimum rank
out["Rank"] = out["Total"].rank(method="min", ascending=False).astype(int)
out = out.sort_values(["Rank", id_col], ascending=[True, True]).reset_index(drop=True)
# Subject-wise average
subj_avg = pd.DataFrame({
"Subject": subjects,
"Class_Average": [float(out[s].mean()) for s in subjects]
})
# Fail distribution (1..5)
fail_dist = pd.DataFrame({
"Failed_Subjects": [1, 2, 3, 4, 5],
"Student_Count": [int((out["Failed_Subjects"] == k).sum()) for k in [1, 2, 3, 4, 5]]
})
# Top 3 overall
top3_overall_cols = [id_col, name_col, "Total", "Average", "Rank", "Remark"]
top3_overall = out.nsmallest(3, "Rank")[top3_overall_cols]
# Top 3 per subject
rows = []
for s in subjects:
t = out.sort_values(s, ascending=False).head(3)[[id_col, name_col, s, "Total", "Average", "Rank", "Remark"]].copy()
t.insert(0, "Subject", s)
t.rename(columns={s: "Subject_Mark"}, inplace=True)
rows.append(t)
top3_each_subject = pd.concat(rows, ignore_index=True) if rows else pd.DataFrame()
# Summary
total_students = out.shape[0]
pass_count = int((out["Failed_Subjects"] == 0).sum())
fail_count = total_students - pass_count
dist_count = int((out["Remark"] == "Distinction").sum())
first_count = int((out["Remark"] == "First Class").sum())
second_count = int((out["Remark"] == "Second Class").sum())
summary = (
f"Students: {total_students} | Pass: {pass_count} | Fail: {fail_count} | "
f"Distinction: {dist_count} | First Class: {first_count} | Second Class: {second_count}"
)
return out, subj_avg, fail_dist, top3_overall, top3_each_subject, summary
# -----------------------------
# Plots (close figs to avoid memory growth)
# -----------------------------
def plot_subject_avg(subj_avg: pd.DataFrame):
fig, ax = plt.subplots(figsize=(7, 4))
ax.bar(subj_avg["Subject"], subj_avg["Class_Average"])
ax.set_title("Subject-wise Class Average")
ax.set_xlabel("Subject")
ax.set_ylabel("Average Marks")
ax.set_ylim(0, 100)
plt.xticks(rotation=25, ha="right")
plt.tight_layout()
plt.close(fig)
return fig
def plot_remark_distribution(result_df: pd.DataFrame):
order = ["Distinction", "First Class", "Second Class", "Fail", "Pass"]
counts = result_df["Remark"].value_counts()
labels = [x for x in order if x in counts.index]
values = [int(counts[x]) for x in labels]
fig, ax = plt.subplots(figsize=(7, 4))
ax.bar(labels, values)
ax.set_title("Remark Distribution")
ax.set_xlabel("Remark")
ax.set_ylabel("Number of Students")
plt.xticks(rotation=20, ha="right")
plt.tight_layout()
plt.close(fig)
return fig
# -----------------------------
# Gradio handlers
# -----------------------------
def load_csv(file_obj):
if file_obj is None:
return None, "Please upload a CSV.", None
try:
df = pd.read_csv(file_obj.name)
df = _clean_columns(df)
msg = f"Loaded: {os.path.basename(file_obj.name)} | Rows={df.shape[0]} | Cols={df.shape[1]}"
return df, msg, df.head(20)
except Exception as e:
return None, f"Could not read CSV: {e}", None
def run_all(df, pass_mark):
if df is None:
return "Upload a CSV first.", None, None, None, None, None, None, None, None
try:
result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, summary = compute_marks(
df, pass_mark=int(pass_mark)
)
fig_avg = plot_subject_avg(subj_avg)
fig_remark = plot_remark_distribution(result_df)
# Save downloadable CSV
out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name
result_df.to_csv(out_path, index=False)
return summary, result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, fig_avg, fig_remark, out_path
except Exception as e:
return f"Error: {e}", None, None, None, None, None, None, None, None
def search_regno(result_df, regno_value):
if result_df is None or isinstance(result_df, str):
return "Run analysis first.", pd.DataFrame()
if not regno_value:
return "Enter RegNo to search.", pd.DataFrame()
col = result_df[ID_COL_DEFAULT]
if pd.api.types.is_numeric_dtype(col):
try:
q = float(regno_value)
res = result_df[result_df[ID_COL_DEFAULT] == q]
except Exception:
res = result_df[col.astype(str) == str(regno_value)]
else:
res = result_df[col.astype(str) == str(regno_value)]
if res.empty:
return "No matching record found.", pd.DataFrame()
return f"Found {len(res)} record(s).", res
# -----------------------------
# Logo loader (reliable in HF)
# -----------------------------
def load_logo():
for fn in ["logo.jpg", "logo.png", "logo.jpeg", "Logo.jpg", "Logo.png", "Logo.jpeg"]:
if os.path.exists(fn):
try:
return Image.open(fn)
except Exception:
return None
return None
# -----------------------------
# UI
# -----------------------------
CSS = """
#titleblock {text-align:center; margin-top: 6px; margin-bottom: 8px;}
#t1 {font-size:30px; font-weight:800; color:#1E5AA8;}
#t2 {font-size:18px; font-weight:800; color:#1E5AA8;}
"""
with gr.Blocks(css=CSS, title="School Mark Analysis") as demo:
# Logo + title
logo = load_logo()
if logo is not None:
gr.Image(value=logo, show_label=False, interactive=False, height=160)
gr.HTML("""
<div id="titleblock" style="margin-top:-10px;">
<div id="t1">Amrita Manthana</div>
<div id="t2">Prof.B.Santhi,SRC,SASTRA</div>
</div>
""")
df_state = gr.State(None)
result_state = gr.State(None)
with gr.Row():
with gr.Column(scale=1, min_width=340):
gr.Markdown("### Upload Marks CSV")
file_in = gr.File(label="Upload CSV", file_types=[".csv"])
load_btn = gr.Button("Load CSV", variant="primary")
load_msg = gr.Textbox(label="Status", interactive=False)
preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)
gr.Markdown("### Analysis Settings")
pass_mark = gr.Slider(label="Pass mark (per subject)", minimum=0, maximum=100, value=35, step=1)
run_btn = gr.Button("Run Mark Analysis", variant="primary")
summary = gr.Textbox(label="Summary", interactive=False)
download_file = gr.File(label="Download Result CSV")
with gr.Column(scale=2):
with gr.Tabs():
with gr.Tab("Result Table"):
result_table = gr.Dataframe(
label="Result (Total, Average, Rank, Remark, Failed_Subjects)",
interactive=False,
wrap=True
)
with gr.Tab("Subject Averages"):
subj_avg_table = gr.Dataframe(label="Subject-wise Averages", interactive=False, wrap=True)
avg_plot = gr.Plot(label="Bar Chart: Subject-wise Average")
with gr.Tab("Fail Counts"):
fail_dist_table = gr.Dataframe(
label="Students failed in 1/2/3/4/5 subjects",
interactive=False,
wrap=True
)
with gr.Tab("Toppers"):
top3_overall_table = gr.Dataframe(label="Overall Top 3", interactive=False, wrap=True)
top3_each_subject_table = gr.Dataframe(label="Top 3 in each subject", interactive=False, wrap=True)
with gr.Tab("Remarks"):
remark_plot = gr.Plot(label="Remark Distribution")
with gr.Tab("Search"):
regno_in = gr.Textbox(label="Enter RegNo (exact match)")
search_btn = gr.Button("Search")
search_msg = gr.Textbox(label="Search Status", interactive=False)
search_out = gr.Dataframe(label="Matching Record(s)", interactive=False, wrap=True)
# Events
load_btn.click(
load_csv,
inputs=[file_in],
outputs=[df_state, load_msg, preview]
)
def run_and_store(df, pm):
s, res, subj, faild, t3, t3sub, f1, f2, fcsv = run_all(df, pm)
return s, res, subj, faild, t3, t3sub, f1, f2, fcsv, res
run_btn.click(
run_and_store,
inputs=[df_state, pass_mark],
outputs=[
summary, result_table, subj_avg_table, fail_dist_table, top3_overall_table, top3_each_subject_table,
avg_plot, remark_plot, download_file, result_state
]
)
search_btn.click(
search_regno,
inputs=[result_state, regno_in],
outputs=[search_msg, search_out]
)
if __name__ == "__main__":
demo.launch()