# app.py (improved)
# Requirements: pandas, numpy, gradio, matplotlib, openpyxl

import os, io, math
from datetime import datetime
import pandas as pd
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
import smtplib
from email.message import EmailMessage

# ---------------- config defaults ----------------
DEFAULT_ATTENDANCE_THRESHOLD = 75.0
DEFAULT_ATTENDANCE_HIGH_RISK = 60.0
DEFAULT_TEST_DECLINE_PERCENT = 10.0
DEFAULT_MAX_ATTEMPTS = 3
DEFAULT_HIGH_CUT = 0.6
DEFAULT_MED_CUT = 0.4

# ---------------- utils (kept/cleaned) ----------------
def load_df_from_file(f):
    if f is None:
        return None
    try:
        name = getattr(f, "name", "")
        if str(name).lower().endswith((".xls", ".xlsx")):
            return pd.read_excel(f)
        else:
            return pd.read_csv(f)
    except Exception:
        try:
            f.seek(0)
            return pd.read_csv(f)
        except Exception as e:
            raise RuntimeError(f"Could not read file: {e}")

def normalise_colnames(df):
    if df is None:
        return None
    df = df.copy()
    df.columns = [c.strip() for c in df.columns]
    colmap = {}
    for c in df.columns:
        lc = c.lower().replace(" ", "").replace("_", "")
        if "roll" in lc and ("no" in lc or "number" in lc or "id" in lc):
            colmap[c] = "Roll Number"
        elif "name" == lc or "studentname" == lc or lc == "student":
            colmap[c] = "Name"
        elif "attendance" in lc:
            colmap[c] = "Attendance (%)"
        elif "mark" in lc or "score" in lc or "percentage" in lc:
            colmap[c] = "Marks (%)"
        elif "fee" in lc:
            colmap[c] = "Fees Paid"
        elif "attempt" in lc:
            colmap[c] = "Attempts"
        elif any(k in lc for k in ("test","exam","mid","quiz","assessment")):
            colmap[c] = c  # preserve test columns
    return df.rename(columns=colmap)

def merge_sources(att_df, test_df, fee_df):
    att = normalise_colnames(att_df)
    test = normalise_colnames(test_df)
    fee = normalise_colnames(fee_df)
    # choose merge key
    key = None
    for d in (att, test, fee):
        if d is not None and 'Roll Number' in d.columns:
            key = 'Roll Number'
            break
    if key is None:
        key = 'Name'
    dfs = []
    for d in (att, test, fee):
        if d is None:
            continue
        if key not in d.columns:
            d[key] = np.nan
        dfs.append(d)
    if not dfs:
        return pd.DataFrame()
    merged = dfs[0]
    for d in dfs[1:]:
        merged = pd.merge(merged, d, on=key, how='outer', suffixes=(False, False))
    return merged

def parse_test_scores(df):
    if df is None:
        return []
    test_cols = [c for c in df.columns if any(k in c.lower() for k in ("test","exam","mid","quiz","assessment","score")) and c not in ["Marks (%)", "Attendance (%)"]]
    if "Marks (%)" in df.columns:
        test_cols.append("Marks (%)")
    return test_cols

def attendance_risk(att, threshold, high_risk):
    try:
        att = float(att)
    except:
        return 0.0
    if math.isnan(att):
        return 0.0
    if att < high_risk:
        return 1.0
    if att < threshold:
        span = max(1.0, threshold - high_risk)
        return (threshold - att) / span * 0.9
    return 0.0

def test_decline_risk(row, test_cols, decline_pct):
    scores = []
    for c in test_cols:
        v = row.get(c, None)
        try:
            scores.append(float(v))
        except:
            continue
    if len(scores) < 2:
        return 0.0
    latest, prev = scores[-1], scores[-2]
    if prev == 0:
        return 0.0
    drop = (prev - latest) / prev * 100.0
    if drop >= decline_pct:
        return min(1.0, drop / (2 * decline_pct))
    return 0.0

def attempts_risk(attempts_val, max_attempts):
    try:
        a = int(attempts_val)
    except:
        return 0.0
    if a >= max_attempts:
        return 1.0
    if a == 0:
        return 0.0
    return a / max(1, max_attempts)

def combine_signals(signals, mode="max", weights=None):
    arr = np.array(signals)
    if mode == "max":
        return np.max(arr, axis=0)
    elif mode == "weighted" and weights is not None:
        w = np.array(weights)
        w = w / w.sum()
        return (arr * w[:, None]).sum(axis=0)
    return np.mean(arr, axis=0)

def label_from_score(score, high_cut=DEFAULT_HIGH_CUT, med_cut=DEFAULT_MED_CUT):
    if score >= high_cut:
        return "High"
    elif score >= med_cut:
        return "Medium"
    else:
        return "Low"

def send_email_notification(to_emails, subject, body, smtp_host, smtp_port, smtp_user, smtp_pass):
    try:
        msg = EmailMessage()
        msg["Subject"] = subject
        msg["From"] = smtp_user
        msg["To"] = ", ".join(to_emails)
        msg.set_content(body)
        with smtplib.SMTP(smtp_host, smtp_port, timeout=15) as s:
            s.starttls()
            s.login(smtp_user, smtp_pass)
            s.send_message(msg)
        return True, "Sent"
    except Exception as e:
        return False, str(e)

# ---------------- main processing ----------------
def process_and_report(att_file, test_file, fee_file,
                       attendance_threshold=DEFAULT_ATTENDANCE_THRESHOLD,
                       attendance_high_risk=DEFAULT_ATTENDANCE_HIGH_RISK,
                       decline_pct=DEFAULT_TEST_DECLINE_PERCENT,
                       max_attempts=DEFAULT_MAX_ATTEMPTS,
                       combine_mode="max",
                       weight_att=0.5, weight_test=0.3, weight_attempt=0.2,
                       notify=False, notify_threshold="High", notify_emails="", smtp_overrides=None):
    # load files
    try:
        att_df = load_df_from_file(att_file) if att_file else None
        test_df = load_df_from_file(test_file) if test_file else None
        fee_df = load_df_from_file(fee_file) if fee_file else None
    except Exception as e:
        return pd.DataFrame(), {"error": str(e)}
    merged = merge_sources(att_df, test_df, fee_df)
    if merged.empty:
        return pd.DataFrame(), {"error": "No data loaded. Upload at least one sheet."}
    merged = merged.reset_index(drop=True)
    merged['Attendance (%)'] = merged.get('Attendance (%)', np.nan)
    merged['Fees Paid'] = merged.get('Fees Paid', merged.get('FeesPaid', merged.get('Fees', np.nan)))
    test_cols = parse_test_scores(merged)
    n = len(merged)
    att_risks = np.zeros(n); test_risks = np.zeros(n); attempt_risks = np.zeros(n)
    for i, row in merged.iterrows():
        att_risks[i] = attendance_risk(row.get('Attendance (%)', np.nan), attendance_threshold, attendance_high_risk)
        test_risks[i] = test_decline_risk(row, test_cols, decline_pct)
        attempt_risks[i] = attempts_risk(row.get('Attempts', 0), max_attempts)
    merged['Attendance_Risk'] = att_risks
    merged['Test_Decline_Risk'] = test_risks
    merged['Attempts_Risk'] = attempt_risks

    signals = [att_risks, test_risks, attempt_risks]
    if combine_mode == "max":
        combined = combine_signals(signals, mode="max")
    else:
        weights = [weight_att, weight_test, weight_attempt]
        combined = combine_signals(signals, mode="weighted", weights=weights)
    merged['Combined_Risk_Score'] = combined
    merged['Risk_Label'] = merged['Combined_Risk_Score'].apply(label_from_score)

    reasons = []
    for i, row in merged.iterrows():
        r = []
        if row['Attendance_Risk'] >= 0.7:
            r.append(f"Low attendance ({row.get('Attendance (%)', 'NA')}%)")
        elif row['Attendance_Risk'] > 0:
            r.append(f"Attendance below threshold ({row.get('Attendance (%)', 'NA')}%)")
        if row['Test_Decline_Risk'] > 0:
            r.append("Recent test decline")
        if row['Attempts_Risk'] > 0:
            r.append(f"High attempts ({row.get('Attempts','NA')})")
        if str(row.get('Fees Paid','')).strip().lower() in ("no","n","false","0","unpaid"):
            r.append("Fees unpaid")
        reasons.append("; ".join(r) if r else "None")
    merged['Flag_Reason'] = reasons
    summary = merged['Risk_Label'].value_counts().to_dict()

    notif_result = None
    if notify:
        notify_mask = merged['Risk_Label'] == notify_threshold
        notify_rows = merged[notify_mask]
        smtp = smtp_overrides or {
            "host": os.environ.get("SMTP_HOST"),
            "port": int(os.environ.get("SMTP_PORT", 587)),
            "user": os.environ.get("SMTP_USER"),
            "pass": os.environ.get("SMTP_PASS")
        }
        if not notify_rows.empty and smtp["user"] and smtp["pass"] and smtp["host"]:
            emails = [e.strip() for e in str(notify_emails).split(",") if e.strip()]
            subject = f"[Early Warning] {len(notify_rows)} students flagged {notify_threshold}"
            body_lines = ["Students flagged:\n"]
            for _, r in notify_rows.iterrows():
                body_lines.append(f"{r.get('Name','')}\t{r.get('Roll Number','')}\t{r.get('Risk_Label')}\tReasons: {r.get('Flag_Reason')}")
            ok, msg = send_email_notification(emails, subject, "\n".join(body_lines), smtp["host"], smtp["port"], smtp["user"], smtp["pass"])
            notif_result = (ok, msg)
        else:
            notif_result = (False, "Notification missing SMTP settings or no flagged students")
    return merged, {"summary": summary, "notify_result": notif_result}

# --------------- UI ----------------
def build_plot(df):
    if df.empty:
        return None
    counts = df['Risk_Label'].value_counts().reindex(['High','Medium','Low']).fillna(0)
    fig, ax = plt.subplots(figsize=(6,3))
    ax.bar(counts.index, counts.values)
    ax.set_title("Risk distribution")
    ax.set_ylabel("Number of students")
    plt.tight_layout()
    return fig

def df_to_colored_html(df):
    if df.empty:
        return "<p>No data</p>"
    df = df.copy()
    # show a few important cols if available
    cols = [c for c in ['Roll Number','Name','Attendance (%)','Marks (%)','Fees Paid','Combined_Risk_Score','Risk_Label','Flag_Reason'] if c in df.columns]
    df = df[cols]
    def row_style(r):
        label = r.get("Risk_Label","Low")
        if label=="High":
            return 'background:#ffcccc'  # pale red
        if label=="Medium":
            return 'background:#fff2cc'  # pale orange
        return ''
    styled = df.style.apply(lambda r: [row_style(r)]*len(r), axis=1)
    return styled.hide_index().to_html()

with gr.Blocks() as demo:
    gr.Markdown("## Student Early-Warning Dashboard")
    with gr.Row():
        with gr.Column():
            att_file = gr.File(label="Attendance", file_types=['.csv','.xlsx'])
            test_file = gr.File(label="Tests", file_types=['.csv','.xlsx'])
            fee_file = gr.File(label="Fees", file_types=['.csv','.xlsx'])
            run_btn = gr.Button("Run")
            download = gr.File()
        with gr.Column():
            att_thresh = gr.Slider(50,100,value=DEFAULT_ATTENDANCE_THRESHOLD,label="Attendance threshold")
            att_high = gr.Slider(20,80,value=DEFAULT_ATTENDANCE_HIGH_RISK,label="High-risk attendance cutoff")
            decline_pct = gr.Slider(1,50,value=DEFAULT_TEST_DECLINE_PERCENT,label="Test decline % to flag")
            max_attempts = gr.Number(value=DEFAULT_MAX_ATTEMPTS,label="Max attempts before flag",precision=0)
            combine_mode = gr.Radio(["max","weighted"],value="max",label="Combine mode")
            weight_att = gr.Slider(0.0,1.0,value=0.5,label="Weight: attendance (only for weighted)")
            weight_test = gr.Slider(0.0,1.0,value=0.3,label="Weight: test decline")
            weight_attempt = gr.Slider(0.0,1.0,value=0.2,label="Weight: attempts")
            notify = gr.Checkbox(False,label="Send email notifications to mentors (uses SMTP env vars or enter below)")
            notify_emails = gr.Textbox(label="Notify emails (comma-separated)")
            smtp_host = gr.Textbox(label="SMTP host (optional override)")
            smtp_port = gr.Number(value=587,label="SMTP port (optional override)",precision=0)
            smtp_user = gr.Textbox(label="SMTP user (optional override)")
            smtp_pass = gr.Textbox(type="password", label="SMTP pass (optional override)")
    result_html = gr.HTML()
    risk_plot = gr.Plot()
    summary_box = gr.Textbox()

    def on_run(att_file_, test_file_, fee_file_,
               att_thresh_, att_high_, decline_pct_, max_attempts_,
               combine_mode_, weight_att_, weight_test_, weight_attempt_,
               notify_, notify_emails_, smtp_host_, smtp_port_, smtp_user_, smtp_pass_):
        smtp_overrides = None
        if smtp_user_ and smtp_pass_ and smtp_host_:
            smtp_overrides = {"host": smtp_host_, "port": int(smtp_port_), "user": smtp_user_, "pass": smtp_pass_}
        df, meta = process_and_report(
            att_file=att_file_, test_file=test_file_, fee_file=fee_file_,
            attendance_threshold=float(att_thresh_), attendance_high_risk=float(att_high_),
            decline_pct=float(decline_pct_), max_attempts=int(max_attempts_ or DEFAULT_MAX_ATTEMPTS),
            combine_mode=combine_mode_, weight_att=float(weight_att_), weight_test=float(weight_test_), weight_attempt=float(weight_attempt_),
            notify=notify_, notify_threshold="High", notify_emails=notify_emails_, smtp_overrides=smtp_overrides
        )
        if isinstance(meta, dict) and meta.get("error"):
            return f"<pre style='color:red'>{meta['error']}</pre>", None, str(meta)
        html = df_to_colored_html(df)
        fig = build_plot(df)
        # prepare CSV bytes for download
        csv_bytes = df.to_csv(index=False).encode('utf-8')
        file_obj = io.BytesIO(csv_bytes)
        file_obj.name = f"risk_report_{datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')}.csv"
        return html, fig, str(meta), file_obj

    run_btn.click(fn=on_run, inputs=[att_file, test_file, fee_file,
                                     att_thresh, att_high, decline_pct, max_attempts,
                                     combine_mode, weight_att, weight_test, weight_attempt,
                                     notify, notify_emails, smtp_host, smtp_port, smtp_user, smtp_pass],
                  outputs=[result_html, risk_plot, summary_box, download])

demo.launch()