Spaces:

HarshitaSuri
/

StudentDropoutPrediction

Sleeping

App Files Files Community

StudentDropoutPrediction / app.py

HarshitaSuri

Update app.py

50c55fd verified 6 months ago

raw

history blame contribute delete

14.3 kB

	# app.py (improved)
	# Requirements: pandas, numpy, gradio, matplotlib, openpyxl

	import os, io, math
	from datetime import datetime
	import pandas as pd
	import numpy as np
	import gradio as gr
	import matplotlib.pyplot as plt
	import smtplib
	from email.message import EmailMessage

	# ---------------- config defaults ----------------
	DEFAULT_ATTENDANCE_THRESHOLD = 75.0
	DEFAULT_ATTENDANCE_HIGH_RISK = 60.0
	DEFAULT_TEST_DECLINE_PERCENT = 10.0
	DEFAULT_MAX_ATTEMPTS = 3
	DEFAULT_HIGH_CUT = 0.6
	DEFAULT_MED_CUT = 0.4

	# ---------------- utils (kept/cleaned) ----------------
	def load_df_from_file(f):
	if f is None:
	return None
	try:
	name = getattr(f, "name", "")
	if str(name).lower().endswith((".xls", ".xlsx")):
	return pd.read_excel(f)
	else:
	return pd.read_csv(f)
	except Exception:
	try:
	f.seek(0)
	return pd.read_csv(f)
	except Exception as e:
	raise RuntimeError(f"Could not read file: {e}")

	def normalise_colnames(df):
	if df is None:
	return None
	df = df.copy()
	df.columns = [c.strip() for c in df.columns]
	colmap = {}
	for c in df.columns:
	lc = c.lower().replace(" ", "").replace("_", "")
	if "roll" in lc and ("no" in lc or "number" in lc or "id" in lc):
	colmap[c] = "Roll Number"
	elif "name" == lc or "studentname" == lc or lc == "student":
	colmap[c] = "Name"
	elif "attendance" in lc:
	colmap[c] = "Attendance (%)"
	elif "mark" in lc or "score" in lc or "percentage" in lc:
	colmap[c] = "Marks (%)"
	elif "fee" in lc:
	colmap[c] = "Fees Paid"
	elif "attempt" in lc:
	colmap[c] = "Attempts"
	elif any(k in lc for k in ("test","exam","mid","quiz","assessment")):
	colmap[c] = c # preserve test columns
	return df.rename(columns=colmap)

	def merge_sources(att_df, test_df, fee_df):
	att = normalise_colnames(att_df)
	test = normalise_colnames(test_df)
	fee = normalise_colnames(fee_df)
	# choose merge key
	key = None
	for d in (att, test, fee):
	if d is not None and 'Roll Number' in d.columns:
	key = 'Roll Number'
	break
	if key is None:
	key = 'Name'
	dfs = []
	for d in (att, test, fee):
	if d is None:
	continue
	if key not in d.columns:
	d[key] = np.nan
	dfs.append(d)
	if not dfs:
	return pd.DataFrame()
	merged = dfs[0]
	for d in dfs[1:]:
	merged = pd.merge(merged, d, on=key, how='outer', suffixes=(False, False))
	return merged

	def parse_test_scores(df):
	if df is None:
	return []
	test_cols = [c for c in df.columns if any(k in c.lower() for k in ("test","exam","mid","quiz","assessment","score")) and c not in ["Marks (%)", "Attendance (%)"]]
	if "Marks (%)" in df.columns:
	test_cols.append("Marks (%)")
	return test_cols

	def attendance_risk(att, threshold, high_risk):
	try:
	att = float(att)
	except:
	return 0.0
	if math.isnan(att):
	return 0.0
	if att < high_risk:
	return 1.0
	if att < threshold:
	span = max(1.0, threshold - high_risk)
	return (threshold - att) / span * 0.9
	return 0.0

	def test_decline_risk(row, test_cols, decline_pct):
	scores = []
	for c in test_cols:
	v = row.get(c, None)
	try:
	scores.append(float(v))
	except:
	continue
	if len(scores) < 2:
	return 0.0
	latest, prev = scores[-1], scores[-2]
	if prev == 0:
	return 0.0
	drop = (prev - latest) / prev * 100.0
	if drop >= decline_pct:
	return min(1.0, drop / (2 * decline_pct))
	return 0.0

	def attempts_risk(attempts_val, max_attempts):
	try:
	a = int(attempts_val)
	except:
	return 0.0
	if a >= max_attempts:
	return 1.0
	if a == 0:
	return 0.0
	return a / max(1, max_attempts)

	def combine_signals(signals, mode="max", weights=None):
	arr = np.array(signals)
	if mode == "max":
	return np.max(arr, axis=0)
	elif mode == "weighted" and weights is not None:
	w = np.array(weights)
	w = w / w.sum()
	return (arr * w[:, None]).sum(axis=0)
	return np.mean(arr, axis=0)

	def label_from_score(score, high_cut=DEFAULT_HIGH_CUT, med_cut=DEFAULT_MED_CUT):
	if score >= high_cut:
	return "High"
	elif score >= med_cut:
	return "Medium"
	else:
	return "Low"

	def send_email_notification(to_emails, subject, body, smtp_host, smtp_port, smtp_user, smtp_pass):
	try:
	msg = EmailMessage()
	msg["Subject"] = subject
	msg["From"] = smtp_user
	msg["To"] = ", ".join(to_emails)
	msg.set_content(body)
	with smtplib.SMTP(smtp_host, smtp_port, timeout=15) as s:
	s.starttls()
	s.login(smtp_user, smtp_pass)
	s.send_message(msg)
	return True, "Sent"
	except Exception as e:
	return False, str(e)

	# ---------------- main processing ----------------
	def process_and_report(att_file, test_file, fee_file,
	attendance_threshold=DEFAULT_ATTENDANCE_THRESHOLD,
	attendance_high_risk=DEFAULT_ATTENDANCE_HIGH_RISK,
	decline_pct=DEFAULT_TEST_DECLINE_PERCENT,
	max_attempts=DEFAULT_MAX_ATTEMPTS,
	combine_mode="max",
	weight_att=0.5, weight_test=0.3, weight_attempt=0.2,
	notify=False, notify_threshold="High", notify_emails="", smtp_overrides=None):
	# load files
	try:
	att_df = load_df_from_file(att_file) if att_file else None
	test_df = load_df_from_file(test_file) if test_file else None
	fee_df = load_df_from_file(fee_file) if fee_file else None
	except Exception as e:
	return pd.DataFrame(), {"error": str(e)}
	merged = merge_sources(att_df, test_df, fee_df)
	if merged.empty:
	return pd.DataFrame(), {"error": "No data loaded. Upload at least one sheet."}
	merged = merged.reset_index(drop=True)
	merged['Attendance (%)'] = merged.get('Attendance (%)', np.nan)
	merged['Fees Paid'] = merged.get('Fees Paid', merged.get('FeesPaid', merged.get('Fees', np.nan)))
	test_cols = parse_test_scores(merged)
	n = len(merged)
	att_risks = np.zeros(n); test_risks = np.zeros(n); attempt_risks = np.zeros(n)
	for i, row in merged.iterrows():
	att_risks[i] = attendance_risk(row.get('Attendance (%)', np.nan), attendance_threshold, attendance_high_risk)
	test_risks[i] = test_decline_risk(row, test_cols, decline_pct)
	attempt_risks[i] = attempts_risk(row.get('Attempts', 0), max_attempts)
	merged['Attendance_Risk'] = att_risks
	merged['Test_Decline_Risk'] = test_risks
	merged['Attempts_Risk'] = attempt_risks

	signals = [att_risks, test_risks, attempt_risks]
	if combine_mode == "max":
	combined = combine_signals(signals, mode="max")
	else:
	weights = [weight_att, weight_test, weight_attempt]
	combined = combine_signals(signals, mode="weighted", weights=weights)
	merged['Combined_Risk_Score'] = combined
	merged['Risk_Label'] = merged['Combined_Risk_Score'].apply(label_from_score)

	reasons = []
	for i, row in merged.iterrows():
	r = []
	if row['Attendance_Risk'] >= 0.7:
	r.append(f"Low attendance ({row.get('Attendance (%)', 'NA')}%)")
	elif row['Attendance_Risk'] > 0:
	r.append(f"Attendance below threshold ({row.get('Attendance (%)', 'NA')}%)")
	if row['Test_Decline_Risk'] > 0:
	r.append("Recent test decline")
	if row['Attempts_Risk'] > 0:
	r.append(f"High attempts ({row.get('Attempts','NA')})")
	if str(row.get('Fees Paid','')).strip().lower() in ("no","n","false","0","unpaid"):
	r.append("Fees unpaid")
	reasons.append("; ".join(r) if r else "None")
	merged['Flag_Reason'] = reasons
	summary = merged['Risk_Label'].value_counts().to_dict()

	notif_result = None
	if notify:
	notify_mask = merged['Risk_Label'] == notify_threshold
	notify_rows = merged[notify_mask]
	smtp = smtp_overrides or {
	"host": os.environ.get("SMTP_HOST"),
	"port": int(os.environ.get("SMTP_PORT", 587)),
	"user": os.environ.get("SMTP_USER"),
	"pass": os.environ.get("SMTP_PASS")
	}
	if not notify_rows.empty and smtp["user"] and smtp["pass"] and smtp["host"]:
	emails = [e.strip() for e in str(notify_emails).split(",") if e.strip()]
	subject = f"[Early Warning] {len(notify_rows)} students flagged {notify_threshold}"
	body_lines = ["Students flagged:\n"]
	for _, r in notify_rows.iterrows():
	body_lines.append(f"{r.get('Name','')}\t{r.get('Roll Number','')}\t{r.get('Risk_Label')}\tReasons: {r.get('Flag_Reason')}")
	ok, msg = send_email_notification(emails, subject, "\n".join(body_lines), smtp["host"], smtp["port"], smtp["user"], smtp["pass"])
	notif_result = (ok, msg)
	else:
	notif_result = (False, "Notification missing SMTP settings or no flagged students")
	return merged, {"summary": summary, "notify_result": notif_result}

	# --------------- UI ----------------
	def build_plot(df):
	if df.empty:
	return None
	counts = df['Risk_Label'].value_counts().reindex(['High','Medium','Low']).fillna(0)
	fig, ax = plt.subplots(figsize=(6,3))
	ax.bar(counts.index, counts.values)
	ax.set_title("Risk distribution")
	ax.set_ylabel("Number of students")
	plt.tight_layout()
	return fig

	def df_to_colored_html(df):
	if df.empty:
	return "<p>No data</p>"
	df = df.copy()
	# show a few important cols if available
	cols = [c for c in ['Roll Number','Name','Attendance (%)','Marks (%)','Fees Paid','Combined_Risk_Score','Risk_Label','Flag_Reason'] if c in df.columns]
	df = df[cols]
	def row_style(r):
	label = r.get("Risk_Label","Low")
	if label=="High":
	return 'background:#ffcccc' # pale red
	if label=="Medium":
	return 'background:#fff2cc' # pale orange
	return ''
	styled = df.style.apply(lambda r: [row_style(r)]*len(r), axis=1)
	return styled.hide_index().to_html()

	with gr.Blocks() as demo:
	gr.Markdown("## Student Early-Warning Dashboard")
	with gr.Row():
	with gr.Column():
	att_file = gr.File(label="Attendance", file_types=['.csv','.xlsx'])
	test_file = gr.File(label="Tests", file_types=['.csv','.xlsx'])
	fee_file = gr.File(label="Fees", file_types=['.csv','.xlsx'])
	run_btn = gr.Button("Run")
	download = gr.File()
	with gr.Column():
	att_thresh = gr.Slider(50,100,value=DEFAULT_ATTENDANCE_THRESHOLD,label="Attendance threshold")
	att_high = gr.Slider(20,80,value=DEFAULT_ATTENDANCE_HIGH_RISK,label="High-risk attendance cutoff")
	decline_pct = gr.Slider(1,50,value=DEFAULT_TEST_DECLINE_PERCENT,label="Test decline % to flag")
	max_attempts = gr.Number(value=DEFAULT_MAX_ATTEMPTS,label="Max attempts before flag",precision=0)
	combine_mode = gr.Radio(["max","weighted"],value="max",label="Combine mode")
	weight_att = gr.Slider(0.0,1.0,value=0.5,label="Weight: attendance (only for weighted)")
	weight_test = gr.Slider(0.0,1.0,value=0.3,label="Weight: test decline")
	weight_attempt = gr.Slider(0.0,1.0,value=0.2,label="Weight: attempts")
	notify = gr.Checkbox(False,label="Send email notifications to mentors (uses SMTP env vars or enter below)")
	notify_emails = gr.Textbox(label="Notify emails (comma-separated)")
	smtp_host = gr.Textbox(label="SMTP host (optional override)")
	smtp_port = gr.Number(value=587,label="SMTP port (optional override)",precision=0)
	smtp_user = gr.Textbox(label="SMTP user (optional override)")
	smtp_pass = gr.Textbox(type="password", label="SMTP pass (optional override)")
	result_html = gr.HTML()
	risk_plot = gr.Plot()
	summary_box = gr.Textbox()

	def on_run(att_file_, test_file_, fee_file_,
	att_thresh_, att_high_, decline_pct_, max_attempts_,
	combine_mode_, weight_att_, weight_test_, weight_attempt_,
	notify_, notify_emails_, smtp_host_, smtp_port_, smtp_user_, smtp_pass_):
	smtp_overrides = None
	if smtp_user_ and smtp_pass_ and smtp_host_:
	smtp_overrides = {"host": smtp_host_, "port": int(smtp_port_), "user": smtp_user_, "pass": smtp_pass_}
	df, meta = process_and_report(
	att_file=att_file_, test_file=test_file_, fee_file=fee_file_,
	attendance_threshold=float(att_thresh_), attendance_high_risk=float(att_high_),
	decline_pct=float(decline_pct_), max_attempts=int(max_attempts_ or DEFAULT_MAX_ATTEMPTS),
	combine_mode=combine_mode_, weight_att=float(weight_att_), weight_test=float(weight_test_), weight_attempt=float(weight_attempt_),
	notify=notify_, notify_threshold="High", notify_emails=notify_emails_, smtp_overrides=smtp_overrides
	)
	if isinstance(meta, dict) and meta.get("error"):
	return f"<pre style='color:red'>{meta['error']}</pre>", None, str(meta)
	html = df_to_colored_html(df)
	fig = build_plot(df)
	# prepare CSV bytes for download
	csv_bytes = df.to_csv(index=False).encode('utf-8')
	file_obj = io.BytesIO(csv_bytes)
	file_obj.name = f"risk_report_{datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')}.csv"
	return html, fig, str(meta), file_obj

	run_btn.click(fn=on_run, inputs=[att_file, test_file, fee_file,
	att_thresh, att_high, decline_pct, max_attempts,
	combine_mode, weight_att, weight_test, weight_attempt,
	notify, notify_emails, smtp_host, smtp_port, smtp_user, smtp_pass],
	outputs=[result_html, risk_plot, summary_box, download])

	demo.launch()