Spaces:

SanthiSastra
/

Analysis

Sleeping

App Files Files Community

Analysis / app.py

SanthiSastra

Update app.py

dc82a53 verified 6 days ago

raw

history blame contribute delete

11.4 kB

	# app.py (Fast-build Hugging Face Gradio)
	# School Mark Analysis: RegNo, Name, Tamil, English, Maths, Science, Social
	# Features:
	# - Total, Average, Rank, Remark
	# - Subject-wise average + bar chart
	# - Fail-count distribution (failed in 1..5 subjects)
	# - Top-3 overall, Top-3 in each subject
	# - Search by RegNo
	# - Download final result CSV
	# - Logo display (reliable on Hugging Face) using gr.Image + Pillow

	import os
	import tempfile
	import numpy as np
	import pandas as pd
	import gradio as gr
	import matplotlib.pyplot as plt
	from PIL import Image

	SUBJECTS_DEFAULT = ["Tamil", "English", "Maths", "Science", "Social"]
	ID_COL_DEFAULT = "RegNo"
	NAME_COL_DEFAULT = "Name"


	# -----------------------------
	# Utilities
	# -----------------------------
	def _clean_columns(df: pd.DataFrame) -> pd.DataFrame:
	df = df.copy()
	df.columns = [c.strip() for c in df.columns]
	return df


	def _validate_and_prepare(df: pd.DataFrame, id_col: str, name_col: str, subjects: list[str]) -> pd.DataFrame:
	df = _clean_columns(df)

	missing = [c for c in [id_col, name_col] + subjects if c not in df.columns]
	if missing:
	raise ValueError(f"Missing required columns: {missing}")

	# Ensure subject columns are numeric
	for s in subjects:
	df[s] = pd.to_numeric(df[s], errors="coerce")

	# Basic sanity
	if df[subjects].isna().all(axis=None):
	raise ValueError("All subject columns became NaN after numeric conversion. Check CSV data.")

	return df


	def _remark(avg: float, failed_subjects: int) -> str:
	if failed_subjects > 0:
	return "Fail"
	if avg >= 80:
	return "Distinction"
	if 60 <= avg <= 79:
	return "First Class"
	if 35 <= avg <= 59:
	return "Second Class"
	return "Pass"


	# -----------------------------
	# Core computations
	# -----------------------------
	def compute_marks(
	df: pd.DataFrame,
	pass_mark: int = 35,
	id_col: str = ID_COL_DEFAULT,
	name_col: str = NAME_COL_DEFAULT,
	subjects: list[str] = SUBJECTS_DEFAULT,
	):
	df = _validate_and_prepare(df, id_col, name_col, subjects)

	out = df.copy()
	out["Total"] = out[subjects].sum(axis=1)
	out["Average"] = out[subjects].mean(axis=1)

	out["Failed_Subjects"] = (out[subjects] < pass_mark).sum(axis=1)
	out["Remark"] = out.apply(lambda r: _remark(float(r["Average"]), int(r["Failed_Subjects"])), axis=1)

	# Rank by Total (descending), ties get same minimum rank
	out["Rank"] = out["Total"].rank(method="min", ascending=False).astype(int)
	out = out.sort_values(["Rank", id_col], ascending=[True, True]).reset_index(drop=True)

	# Subject-wise average
	subj_avg = pd.DataFrame({
	"Subject": subjects,
	"Class_Average": [float(out[s].mean()) for s in subjects]
	})

	# Fail distribution (1..5)
	fail_dist = pd.DataFrame({
	"Failed_Subjects": [1, 2, 3, 4, 5],
	"Student_Count": [int((out["Failed_Subjects"] == k).sum()) for k in [1, 2, 3, 4, 5]]
	})

	# Top 3 overall
	top3_overall_cols = [id_col, name_col, "Total", "Average", "Rank", "Remark"]
	top3_overall = out.nsmallest(3, "Rank")[top3_overall_cols]

	# Top 3 per subject
	rows = []
	for s in subjects:
	t = out.sort_values(s, ascending=False).head(3)[[id_col, name_col, s, "Total", "Average", "Rank", "Remark"]].copy()
	t.insert(0, "Subject", s)
	t.rename(columns={s: "Subject_Mark"}, inplace=True)
	rows.append(t)
	top3_each_subject = pd.concat(rows, ignore_index=True) if rows else pd.DataFrame()

	# Summary
	total_students = out.shape[0]
	pass_count = int((out["Failed_Subjects"] == 0).sum())
	fail_count = total_students - pass_count

	dist_count = int((out["Remark"] == "Distinction").sum())
	first_count = int((out["Remark"] == "First Class").sum())
	second_count = int((out["Remark"] == "Second Class").sum())

	summary = (
	f"Students: {total_students} \| Pass: {pass_count} \| Fail: {fail_count} \| "
	f"Distinction: {dist_count} \| First Class: {first_count} \| Second Class: {second_count}"
	)

	return out, subj_avg, fail_dist, top3_overall, top3_each_subject, summary


	# -----------------------------
	# Plots (close figs to avoid memory growth)
	# -----------------------------
	def plot_subject_avg(subj_avg: pd.DataFrame):
	fig, ax = plt.subplots(figsize=(7, 4))
	ax.bar(subj_avg["Subject"], subj_avg["Class_Average"])
	ax.set_title("Subject-wise Class Average")
	ax.set_xlabel("Subject")
	ax.set_ylabel("Average Marks")
	ax.set_ylim(0, 100)
	plt.xticks(rotation=25, ha="right")
	plt.tight_layout()
	plt.close(fig)
	return fig


	def plot_remark_distribution(result_df: pd.DataFrame):
	order = ["Distinction", "First Class", "Second Class", "Fail", "Pass"]
	counts = result_df["Remark"].value_counts()
	labels = [x for x in order if x in counts.index]
	values = [int(counts[x]) for x in labels]

	fig, ax = plt.subplots(figsize=(7, 4))
	ax.bar(labels, values)
	ax.set_title("Remark Distribution")
	ax.set_xlabel("Remark")
	ax.set_ylabel("Number of Students")
	plt.xticks(rotation=20, ha="right")
	plt.tight_layout()
	plt.close(fig)
	return fig


	# -----------------------------
	# Gradio handlers
	# -----------------------------
	def load_csv(file_obj):
	if file_obj is None:
	return None, "Please upload a CSV.", None

	try:
	df = pd.read_csv(file_obj.name)
	df = _clean_columns(df)
	msg = f"Loaded: {os.path.basename(file_obj.name)} \| Rows={df.shape[0]} \| Cols={df.shape[1]}"
	return df, msg, df.head(20)
	except Exception as e:
	return None, f"Could not read CSV: {e}", None


	def run_all(df, pass_mark):
	if df is None:
	return "Upload a CSV first.", None, None, None, None, None, None, None, None

	try:
	result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, summary = compute_marks(
	df, pass_mark=int(pass_mark)
	)
	fig_avg = plot_subject_avg(subj_avg)
	fig_remark = plot_remark_distribution(result_df)

	# Save downloadable CSV
	out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name
	result_df.to_csv(out_path, index=False)

	return summary, result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, fig_avg, fig_remark, out_path
	except Exception as e:
	return f"Error: {e}", None, None, None, None, None, None, None, None


	def search_regno(result_df, regno_value):
	if result_df is None or isinstance(result_df, str):
	return "Run analysis first.", pd.DataFrame()
	if not regno_value:
	return "Enter RegNo to search.", pd.DataFrame()

	col = result_df[ID_COL_DEFAULT]
	if pd.api.types.is_numeric_dtype(col):
	try:
	q = float(regno_value)
	res = result_df[result_df[ID_COL_DEFAULT] == q]
	except Exception:
	res = result_df[col.astype(str) == str(regno_value)]
	else:
	res = result_df[col.astype(str) == str(regno_value)]

	if res.empty:
	return "No matching record found.", pd.DataFrame()
	return f"Found {len(res)} record(s).", res


	# -----------------------------
	# Logo loader (reliable in HF)
	# -----------------------------
	def load_logo():
	for fn in ["logo.jpg", "logo.png", "logo.jpeg", "Logo.jpg", "Logo.png", "Logo.jpeg"]:
	if os.path.exists(fn):
	try:
	return Image.open(fn)
	except Exception:
	return None
	return None


	# -----------------------------
	# UI
	# -----------------------------
	CSS = """
	#titleblock {text-align:center; margin-top: 6px; margin-bottom: 8px;}
	#t1 {font-size:30px; font-weight:800; color:#1E5AA8;}
	#t2 {font-size:18px; font-weight:800; color:#1E5AA8;}
	"""

	with gr.Blocks(css=CSS, title="School Mark Analysis") as demo:
	# Logo + title
	logo = load_logo()
	if logo is not None:
	gr.Image(value=logo, show_label=False, interactive=False, height=160)

	gr.HTML("""
	<div id="titleblock" style="margin-top:-10px;">
	<div id="t1">Amrita Manthana</div>
	<div id="t2">Prof.B.Santhi,SRC,SASTRA</div>
	</div>
	""")

	df_state = gr.State(None)
	result_state = gr.State(None)

	with gr.Row():
	with gr.Column(scale=1, min_width=340):
	gr.Markdown("### Upload Marks CSV")
	file_in = gr.File(label="Upload CSV", file_types=[".csv"])
	load_btn = gr.Button("Load CSV", variant="primary")
	load_msg = gr.Textbox(label="Status", interactive=False)
	preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)

	gr.Markdown("### Analysis Settings")
	pass_mark = gr.Slider(label="Pass mark (per subject)", minimum=0, maximum=100, value=35, step=1)
	run_btn = gr.Button("Run Mark Analysis", variant="primary")
	summary = gr.Textbox(label="Summary", interactive=False)

	download_file = gr.File(label="Download Result CSV")

	with gr.Column(scale=2):
	with gr.Tabs():
	with gr.Tab("Result Table"):
	result_table = gr.Dataframe(
	label="Result (Total, Average, Rank, Remark, Failed_Subjects)",
	interactive=False,
	wrap=True
	)
	with gr.Tab("Subject Averages"):
	subj_avg_table = gr.Dataframe(label="Subject-wise Averages", interactive=False, wrap=True)
	avg_plot = gr.Plot(label="Bar Chart: Subject-wise Average")
	with gr.Tab("Fail Counts"):
	fail_dist_table = gr.Dataframe(
	label="Students failed in 1/2/3/4/5 subjects",
	interactive=False,
	wrap=True
	)
	with gr.Tab("Toppers"):
	top3_overall_table = gr.Dataframe(label="Overall Top 3", interactive=False, wrap=True)
	top3_each_subject_table = gr.Dataframe(label="Top 3 in each subject", interactive=False, wrap=True)
	with gr.Tab("Remarks"):
	remark_plot = gr.Plot(label="Remark Distribution")
	with gr.Tab("Search"):
	regno_in = gr.Textbox(label="Enter RegNo (exact match)")
	search_btn = gr.Button("Search")
	search_msg = gr.Textbox(label="Search Status", interactive=False)
	search_out = gr.Dataframe(label="Matching Record(s)", interactive=False, wrap=True)

	# Events
	load_btn.click(
	load_csv,
	inputs=[file_in],
	outputs=[df_state, load_msg, preview]
	)

	def run_and_store(df, pm):
	s, res, subj, faild, t3, t3sub, f1, f2, fcsv = run_all(df, pm)
	return s, res, subj, faild, t3, t3sub, f1, f2, fcsv, res

	run_btn.click(
	run_and_store,
	inputs=[df_state, pass_mark],
	outputs=[
	summary, result_table, subj_avg_table, fail_dist_table, top3_overall_table, top3_each_subject_table,
	avg_plot, remark_plot, download_file, result_state
	]
	)

	search_btn.click(
	search_regno,
	inputs=[result_state, regno_in],
	outputs=[search_msg, search_out]
	)

	if __name__ == "__main__":
	demo.launch()