HarshitaSuri's picture
Update app.py
50c55fd verified
# app.py (improved)
# Requirements: pandas, numpy, gradio, matplotlib, openpyxl
import os, io, math
from datetime import datetime
import pandas as pd
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
import smtplib
from email.message import EmailMessage
# ---------------- config defaults ----------------
DEFAULT_ATTENDANCE_THRESHOLD = 75.0
DEFAULT_ATTENDANCE_HIGH_RISK = 60.0
DEFAULT_TEST_DECLINE_PERCENT = 10.0
DEFAULT_MAX_ATTEMPTS = 3
DEFAULT_HIGH_CUT = 0.6
DEFAULT_MED_CUT = 0.4
# ---------------- utils (kept/cleaned) ----------------
def load_df_from_file(f):
if f is None:
return None
try:
name = getattr(f, "name", "")
if str(name).lower().endswith((".xls", ".xlsx")):
return pd.read_excel(f)
else:
return pd.read_csv(f)
except Exception:
try:
f.seek(0)
return pd.read_csv(f)
except Exception as e:
raise RuntimeError(f"Could not read file: {e}")
def normalise_colnames(df):
if df is None:
return None
df = df.copy()
df.columns = [c.strip() for c in df.columns]
colmap = {}
for c in df.columns:
lc = c.lower().replace(" ", "").replace("_", "")
if "roll" in lc and ("no" in lc or "number" in lc or "id" in lc):
colmap[c] = "Roll Number"
elif "name" == lc or "studentname" == lc or lc == "student":
colmap[c] = "Name"
elif "attendance" in lc:
colmap[c] = "Attendance (%)"
elif "mark" in lc or "score" in lc or "percentage" in lc:
colmap[c] = "Marks (%)"
elif "fee" in lc:
colmap[c] = "Fees Paid"
elif "attempt" in lc:
colmap[c] = "Attempts"
elif any(k in lc for k in ("test","exam","mid","quiz","assessment")):
colmap[c] = c # preserve test columns
return df.rename(columns=colmap)
def merge_sources(att_df, test_df, fee_df):
att = normalise_colnames(att_df)
test = normalise_colnames(test_df)
fee = normalise_colnames(fee_df)
# choose merge key
key = None
for d in (att, test, fee):
if d is not None and 'Roll Number' in d.columns:
key = 'Roll Number'
break
if key is None:
key = 'Name'
dfs = []
for d in (att, test, fee):
if d is None:
continue
if key not in d.columns:
d[key] = np.nan
dfs.append(d)
if not dfs:
return pd.DataFrame()
merged = dfs[0]
for d in dfs[1:]:
merged = pd.merge(merged, d, on=key, how='outer', suffixes=(False, False))
return merged
def parse_test_scores(df):
if df is None:
return []
test_cols = [c for c in df.columns if any(k in c.lower() for k in ("test","exam","mid","quiz","assessment","score")) and c not in ["Marks (%)", "Attendance (%)"]]
if "Marks (%)" in df.columns:
test_cols.append("Marks (%)")
return test_cols
def attendance_risk(att, threshold, high_risk):
try:
att = float(att)
except:
return 0.0
if math.isnan(att):
return 0.0
if att < high_risk:
return 1.0
if att < threshold:
span = max(1.0, threshold - high_risk)
return (threshold - att) / span * 0.9
return 0.0
def test_decline_risk(row, test_cols, decline_pct):
scores = []
for c in test_cols:
v = row.get(c, None)
try:
scores.append(float(v))
except:
continue
if len(scores) < 2:
return 0.0
latest, prev = scores[-1], scores[-2]
if prev == 0:
return 0.0
drop = (prev - latest) / prev * 100.0
if drop >= decline_pct:
return min(1.0, drop / (2 * decline_pct))
return 0.0
def attempts_risk(attempts_val, max_attempts):
try:
a = int(attempts_val)
except:
return 0.0
if a >= max_attempts:
return 1.0
if a == 0:
return 0.0
return a / max(1, max_attempts)
def combine_signals(signals, mode="max", weights=None):
arr = np.array(signals)
if mode == "max":
return np.max(arr, axis=0)
elif mode == "weighted" and weights is not None:
w = np.array(weights)
w = w / w.sum()
return (arr * w[:, None]).sum(axis=0)
return np.mean(arr, axis=0)
def label_from_score(score, high_cut=DEFAULT_HIGH_CUT, med_cut=DEFAULT_MED_CUT):
if score >= high_cut:
return "High"
elif score >= med_cut:
return "Medium"
else:
return "Low"
def send_email_notification(to_emails, subject, body, smtp_host, smtp_port, smtp_user, smtp_pass):
try:
msg = EmailMessage()
msg["Subject"] = subject
msg["From"] = smtp_user
msg["To"] = ", ".join(to_emails)
msg.set_content(body)
with smtplib.SMTP(smtp_host, smtp_port, timeout=15) as s:
s.starttls()
s.login(smtp_user, smtp_pass)
s.send_message(msg)
return True, "Sent"
except Exception as e:
return False, str(e)
# ---------------- main processing ----------------
def process_and_report(att_file, test_file, fee_file,
attendance_threshold=DEFAULT_ATTENDANCE_THRESHOLD,
attendance_high_risk=DEFAULT_ATTENDANCE_HIGH_RISK,
decline_pct=DEFAULT_TEST_DECLINE_PERCENT,
max_attempts=DEFAULT_MAX_ATTEMPTS,
combine_mode="max",
weight_att=0.5, weight_test=0.3, weight_attempt=0.2,
notify=False, notify_threshold="High", notify_emails="", smtp_overrides=None):
# load files
try:
att_df = load_df_from_file(att_file) if att_file else None
test_df = load_df_from_file(test_file) if test_file else None
fee_df = load_df_from_file(fee_file) if fee_file else None
except Exception as e:
return pd.DataFrame(), {"error": str(e)}
merged = merge_sources(att_df, test_df, fee_df)
if merged.empty:
return pd.DataFrame(), {"error": "No data loaded. Upload at least one sheet."}
merged = merged.reset_index(drop=True)
merged['Attendance (%)'] = merged.get('Attendance (%)', np.nan)
merged['Fees Paid'] = merged.get('Fees Paid', merged.get('FeesPaid', merged.get('Fees', np.nan)))
test_cols = parse_test_scores(merged)
n = len(merged)
att_risks = np.zeros(n); test_risks = np.zeros(n); attempt_risks = np.zeros(n)
for i, row in merged.iterrows():
att_risks[i] = attendance_risk(row.get('Attendance (%)', np.nan), attendance_threshold, attendance_high_risk)
test_risks[i] = test_decline_risk(row, test_cols, decline_pct)
attempt_risks[i] = attempts_risk(row.get('Attempts', 0), max_attempts)
merged['Attendance_Risk'] = att_risks
merged['Test_Decline_Risk'] = test_risks
merged['Attempts_Risk'] = attempt_risks
signals = [att_risks, test_risks, attempt_risks]
if combine_mode == "max":
combined = combine_signals(signals, mode="max")
else:
weights = [weight_att, weight_test, weight_attempt]
combined = combine_signals(signals, mode="weighted", weights=weights)
merged['Combined_Risk_Score'] = combined
merged['Risk_Label'] = merged['Combined_Risk_Score'].apply(label_from_score)
reasons = []
for i, row in merged.iterrows():
r = []
if row['Attendance_Risk'] >= 0.7:
r.append(f"Low attendance ({row.get('Attendance (%)', 'NA')}%)")
elif row['Attendance_Risk'] > 0:
r.append(f"Attendance below threshold ({row.get('Attendance (%)', 'NA')}%)")
if row['Test_Decline_Risk'] > 0:
r.append("Recent test decline")
if row['Attempts_Risk'] > 0:
r.append(f"High attempts ({row.get('Attempts','NA')})")
if str(row.get('Fees Paid','')).strip().lower() in ("no","n","false","0","unpaid"):
r.append("Fees unpaid")
reasons.append("; ".join(r) if r else "None")
merged['Flag_Reason'] = reasons
summary = merged['Risk_Label'].value_counts().to_dict()
notif_result = None
if notify:
notify_mask = merged['Risk_Label'] == notify_threshold
notify_rows = merged[notify_mask]
smtp = smtp_overrides or {
"host": os.environ.get("SMTP_HOST"),
"port": int(os.environ.get("SMTP_PORT", 587)),
"user": os.environ.get("SMTP_USER"),
"pass": os.environ.get("SMTP_PASS")
}
if not notify_rows.empty and smtp["user"] and smtp["pass"] and smtp["host"]:
emails = [e.strip() for e in str(notify_emails).split(",") if e.strip()]
subject = f"[Early Warning] {len(notify_rows)} students flagged {notify_threshold}"
body_lines = ["Students flagged:\n"]
for _, r in notify_rows.iterrows():
body_lines.append(f"{r.get('Name','')}\t{r.get('Roll Number','')}\t{r.get('Risk_Label')}\tReasons: {r.get('Flag_Reason')}")
ok, msg = send_email_notification(emails, subject, "\n".join(body_lines), smtp["host"], smtp["port"], smtp["user"], smtp["pass"])
notif_result = (ok, msg)
else:
notif_result = (False, "Notification missing SMTP settings or no flagged students")
return merged, {"summary": summary, "notify_result": notif_result}
# --------------- UI ----------------
def build_plot(df):
if df.empty:
return None
counts = df['Risk_Label'].value_counts().reindex(['High','Medium','Low']).fillna(0)
fig, ax = plt.subplots(figsize=(6,3))
ax.bar(counts.index, counts.values)
ax.set_title("Risk distribution")
ax.set_ylabel("Number of students")
plt.tight_layout()
return fig
def df_to_colored_html(df):
if df.empty:
return "<p>No data</p>"
df = df.copy()
# show a few important cols if available
cols = [c for c in ['Roll Number','Name','Attendance (%)','Marks (%)','Fees Paid','Combined_Risk_Score','Risk_Label','Flag_Reason'] if c in df.columns]
df = df[cols]
def row_style(r):
label = r.get("Risk_Label","Low")
if label=="High":
return 'background:#ffcccc' # pale red
if label=="Medium":
return 'background:#fff2cc' # pale orange
return ''
styled = df.style.apply(lambda r: [row_style(r)]*len(r), axis=1)
return styled.hide_index().to_html()
with gr.Blocks() as demo:
gr.Markdown("## Student Early-Warning Dashboard")
with gr.Row():
with gr.Column():
att_file = gr.File(label="Attendance", file_types=['.csv','.xlsx'])
test_file = gr.File(label="Tests", file_types=['.csv','.xlsx'])
fee_file = gr.File(label="Fees", file_types=['.csv','.xlsx'])
run_btn = gr.Button("Run")
download = gr.File()
with gr.Column():
att_thresh = gr.Slider(50,100,value=DEFAULT_ATTENDANCE_THRESHOLD,label="Attendance threshold")
att_high = gr.Slider(20,80,value=DEFAULT_ATTENDANCE_HIGH_RISK,label="High-risk attendance cutoff")
decline_pct = gr.Slider(1,50,value=DEFAULT_TEST_DECLINE_PERCENT,label="Test decline % to flag")
max_attempts = gr.Number(value=DEFAULT_MAX_ATTEMPTS,label="Max attempts before flag",precision=0)
combine_mode = gr.Radio(["max","weighted"],value="max",label="Combine mode")
weight_att = gr.Slider(0.0,1.0,value=0.5,label="Weight: attendance (only for weighted)")
weight_test = gr.Slider(0.0,1.0,value=0.3,label="Weight: test decline")
weight_attempt = gr.Slider(0.0,1.0,value=0.2,label="Weight: attempts")
notify = gr.Checkbox(False,label="Send email notifications to mentors (uses SMTP env vars or enter below)")
notify_emails = gr.Textbox(label="Notify emails (comma-separated)")
smtp_host = gr.Textbox(label="SMTP host (optional override)")
smtp_port = gr.Number(value=587,label="SMTP port (optional override)",precision=0)
smtp_user = gr.Textbox(label="SMTP user (optional override)")
smtp_pass = gr.Textbox(type="password", label="SMTP pass (optional override)")
result_html = gr.HTML()
risk_plot = gr.Plot()
summary_box = gr.Textbox()
def on_run(att_file_, test_file_, fee_file_,
att_thresh_, att_high_, decline_pct_, max_attempts_,
combine_mode_, weight_att_, weight_test_, weight_attempt_,
notify_, notify_emails_, smtp_host_, smtp_port_, smtp_user_, smtp_pass_):
smtp_overrides = None
if smtp_user_ and smtp_pass_ and smtp_host_:
smtp_overrides = {"host": smtp_host_, "port": int(smtp_port_), "user": smtp_user_, "pass": smtp_pass_}
df, meta = process_and_report(
att_file=att_file_, test_file=test_file_, fee_file=fee_file_,
attendance_threshold=float(att_thresh_), attendance_high_risk=float(att_high_),
decline_pct=float(decline_pct_), max_attempts=int(max_attempts_ or DEFAULT_MAX_ATTEMPTS),
combine_mode=combine_mode_, weight_att=float(weight_att_), weight_test=float(weight_test_), weight_attempt=float(weight_attempt_),
notify=notify_, notify_threshold="High", notify_emails=notify_emails_, smtp_overrides=smtp_overrides
)
if isinstance(meta, dict) and meta.get("error"):
return f"<pre style='color:red'>{meta['error']}</pre>", None, str(meta)
html = df_to_colored_html(df)
fig = build_plot(df)
# prepare CSV bytes for download
csv_bytes = df.to_csv(index=False).encode('utf-8')
file_obj = io.BytesIO(csv_bytes)
file_obj.name = f"risk_report_{datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')}.csv"
return html, fig, str(meta), file_obj
run_btn.click(fn=on_run, inputs=[att_file, test_file, fee_file,
att_thresh, att_high, decline_pct, max_attempts,
combine_mode, weight_att, weight_test, weight_attempt,
notify, notify_emails, smtp_host, smtp_port, smtp_user, smtp_pass],
outputs=[result_html, risk_plot, summary_box, download])
demo.launch()