import io
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt


# =============================
# Helpers
# =============================
def _read_file_bytes(file_obj):
    if file_obj is None:
        raise ValueError("No file uploaded.")
    file_path = getattr(file_obj, "name", None)
    if not file_path:
        raise ValueError("Invalid uploaded file object.")
    with open(file_path, "rb") as f:
        b = f.read()
    if not b:
        raise ValueError("Uploaded file is empty.")
    return b


def _drop_useless(df: pd.DataFrame) -> pd.DataFrame:
    df = df.dropna(axis=1, how="all").dropna(axis=0, how="all")
    unnamed = [c for c in df.columns if str(c).strip().lower().startswith("unnamed")]
    if unnamed:
        df = df.drop(columns=unnamed, errors="ignore")
    return df


def _safe_numeric(s):
    return pd.to_numeric(s, errors="coerce")


def _guess_marks_col(df: pd.DataFrame):
    cols = list(df.columns)
    best, best_score = cols[0], -1
    for c in cols:
        score = _safe_numeric(df[c]).notna().mean()
        if score > best_score:
            best_score = score
            best = c
    return best


def _guess_grade_col(cols):
    low = {c: str(c).strip().lower() for c in cols}
    return next((c for c in cols if "grade" in low[c] or "grde" in low[c]), cols[0])


def _guess_optional(cols):
    low = {c: str(c).strip().lower() for c in cols}
    course = next((c for c in cols if any(k in low[c] for k in ["course", "module", "subject"])), None)
    section = next((c for c in cols if any(k in low[c] for k in ["section", "group", "batch", "class"])), None)
    return course, section


def apply_filters(df, course_col, section_col, course_filter, section_filter):
    d = df.copy()
    if course_col and course_col in d.columns and course_filter and course_filter != "(all)":
        d = d[d[course_col].astype(str).fillna("NA") == course_filter]
    if section_col and section_col in d.columns and section_filter and section_filter != "(all)":
        d = d[d[section_col].astype(str).fillna("NA") == section_filter]
    return d


# =============================
# Core HoD Insights (no student tables)
# =============================
def compute_hod_insights(df, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter):
    if df is None or df.empty:
        raise gr.Error("Sheet is empty.")

    d = apply_filters(df, course_col, section_col, course_filter, section_filter).copy()

    d["_marks"] = _safe_numeric(d[marks_col]) if marks_col in d.columns else np.nan
    d["_grade"] = d[grade_col].astype(str).str.strip().replace({"nan": "NA"}) if grade_col in d.columns else "NA"

    total = int(len(d))
    valid = d[d["_marks"].notna()].copy()
    n = int(len(valid))
    missing = int(d["_marks"].isna().sum())

    pass_mark = int(pass_mark)
    mean = float(valid["_marks"].mean()) if n else 0.0
    std = float(valid["_marks"].std(ddof=0)) if n else 0.0
    minv = float(valid["_marks"].min()) if n else 0.0
    maxv = float(valid["_marks"].max()) if n else 0.0

    pass_count = int((valid["_marks"] >= pass_mark).sum()) if n else 0
    pass_rate = (pass_count / n * 100.0) if n else 0.0

    # Borderline (pass to pass+5) and just-below (pass-5 to pass-1)
    borderline_pass = int(((valid["_marks"] >= pass_mark) & (valid["_marks"] < pass_mark + 5)).sum()) if n else 0
    borderline_fail = int(((valid["_marks"] < pass_mark) & (valid["_marks"] >= pass_mark - 5)).sum()) if n else 0

    # Distribution shape
    skew = float(valid["_marks"].skew()) if n else 0.0
    kurt = float(valid["_marks"].kurt()) if n else 0.0

    # Outliers by IQR
    outlier_count = 0
    low_thr = high_thr = 0.0
    if n:
        q1 = float(np.percentile(valid["_marks"], 25))
        q3 = float(np.percentile(valid["_marks"], 75))
        iqr = q3 - q1
        low_thr = q1 - 1.5 * iqr
        high_thr = q3 + 1.5 * iqr
        outlier_count = int(((valid["_marks"] < low_thr) | (valid["_marks"] > high_thr)).sum())

    # Percentiles
    pct_df = pd.DataFrame(
        [(f"P{p}", round(float(np.percentile(valid["_marks"], p)), 2)) for p in [10, 25, 50, 75, 90]]
        if n else [],
        columns=["Percentile", "Marks"]
    )

    # Grade distribution
    grade_dist = d["_grade"].value_counts(dropna=False).rename("count").to_frame().reset_index()
    grade_dist.columns = [grade_col, "count"]
    grade_dist["%"] = (grade_dist["count"] / grade_dist["count"].sum() * 100).round(2) if len(grade_dist) else 0

    # Grade ↔ marks mapping (moderation evidence)
    grade_stats = (
        valid.groupby(d["_grade"])["_marks"]
        .agg(["count", "mean", "std", "min", "median", "max"])
        .reset_index()
        .rename(columns={"_grade": "Grade"})
        .sort_values("mean", ascending=False)
    )

    # Mark heaping (repeated marks)
    heaping = (
        valid["_marks"].round(0).astype(int)
        .value_counts().head(12)
        .rename("count").reset_index()
        .rename(columns={"index": "Mark"})
    )

    # Course status
    if pass_rate >= 80:
        status = "GREEN"
    elif pass_rate >= 60:
        status = "AMBER"
    else:
        status = "RED"

    # Flags
    flags = []
    if missing > 0:
        flags.append(f"{missing} missing mark(s) — verify completeness.")
    if borderline_fail > max(5, 0.03 * n):
        flags.append("Many students just below pass — consider targeted support / moderation review.")
    if borderline_pass > max(5, 0.03 * n):
        flags.append("Many students just above pass — borderline attainment cluster.")
    if abs(skew) > 0.7:
        flags.append("Skewed distribution — check assessment balance and marking consistency.")
    if outlier_count > 0:
        flags.append(f"{outlier_count} outlier(s) by IQR — spot-check extremes.")
    if len(heaping) and heaping["count"].iloc[0] >= max(10, 0.06 * n):
        flags.append("Heaping detected — many students share identical marks (rounding/marking pattern).")

    flags_text = " | ".join(flags) if flags else "No major warning patterns detected."

    insight = (
        f"**Status:** {status}  \n"
        f"**Pass rate:** {pass_rate:.1f}% (Pass mark = {pass_mark})  \n"
        f"**Avg:** {mean:.1f} | **Std:** {std:.1f} | **Min/Max:** {minv:.1f}/{maxv:.1f}  \n"
        f"**Borderline (just below pass):** {borderline_fail} | **Borderline (just above pass):** {borderline_pass}  \n"
        f"**Skew:** {skew:.2f} | **Kurtosis:** {kurt:.2f} | **Outliers:** {outlier_count} | **Missing:** {missing}  \n"
        f"**Flags:** {flags_text}"
    )

    # KPI table
    kpi = pd.DataFrame(
        [
            ("Total rows (filtered)", total),
            ("Students with numeric marks", n),
            ("Missing marks", missing),
            ("Pass mark", pass_mark),
            ("Pass count", pass_count),
            ("Pass rate (%)", round(pass_rate, 2)),
            ("Borderline just below pass", borderline_fail),
            ("Borderline just above pass", borderline_pass),
            ("Average", round(mean, 2)),
            ("Std deviation", round(std, 2)),
            ("Minimum", round(minv, 2)),
            ("Maximum", round(maxv, 2)),
            ("Skewness", round(skew, 3)),
            ("Kurtosis", round(kurt, 3)),
            ("Outlier low threshold (IQR)", round(low_thr, 2)),
            ("Outlier high threshold (IQR)", round(high_thr, 2)),
            ("Outlier count (IQR)", outlier_count),
            ("Status", status),
        ],
        columns=["Metric", "Value"],
    )

    # Charts
    # 1 Histogram
    fig1 = plt.figure()
    plt.hist(valid["_marks"].dropna(), bins=12)
    plt.axvline(pass_mark, linestyle="--")
    plt.title("Marks distribution (Histogram)")
    plt.xlabel("Marks")
    plt.ylabel("Students")

    # 2 CDF
    fig2 = plt.figure()
    xs = np.sort(valid["_marks"].dropna().values) if n else np.array([])
    ys = np.arange(1, len(xs) + 1) / len(xs) if len(xs) else np.array([])
    if len(xs):
        plt.plot(xs, ys)
        plt.axvline(pass_mark, linestyle="--")
    plt.title("CDF (Proportion of students ≤ mark)")
    plt.xlabel("Marks")
    plt.ylabel("Proportion")

    # 3 Grade distribution
    fig3 = plt.figure()
    gd = grade_dist.set_index(grade_col)["count"]
    plt.bar(gd.index.astype(str), gd.values)
    plt.title("Grade distribution")
    plt.xlabel("Grade")
    plt.ylabel("Count")
    plt.xticks(rotation=45, ha="right")

    # 4 Boxplot by grade (moderation)
    fig4 = plt.figure()
    if not grade_stats.empty:
        order = grade_stats["Grade"].tolist()
        data = [valid.loc[d["_grade"] == g, "_marks"].dropna().values for g in order]
        plt.boxplot(data, tick_labels=[str(g) for g in order], vert=True)
        plt.title("Marks spread by Grade (Boxplot)")
        plt.xlabel("Grade")
        plt.ylabel("Marks")
        plt.xticks(rotation=45, ha="right")
    else:
        plt.title("Marks spread by Grade (Boxplot)")

    # 5 Section comparison (optional)
    fig5 = plt.figure()
    section_table = pd.DataFrame()
    if section_col and section_col in d.columns and n:
        sec = valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].agg(["count", "mean"]).reset_index()
        sec["pass_rate_%"] = (valid.groupby(d[section_col].astype(str).fillna("NA"))["_marks"].apply(lambda x: (x >= pass_mark).mean() * 100)).values
        sec = sec.rename(columns={section_col: "Section"})
        section_table = sec.sort_values("pass_rate_%", ascending=False)

        plt.bar(section_table["Section"].astype(str), section_table["pass_rate_%"].values)
        plt.title("Section-wise Pass Rate (%)")
        plt.xlabel("Section")
        plt.ylabel("Pass rate (%)")
        plt.xticks(rotation=45, ha="right")
    else:
        plt.title("Section-wise Pass Rate (%) — not available (no section column)")
        plt.axis("off")

    return kpi, pct_df, grade_dist, grade_stats, heaping, section_table, insight, fig1, fig2, fig3, fig4, fig5


# =============================
# UI
# =============================
with gr.Blocks(title="HoD Result Dashboard") as demo:
    gr.Markdown("## 📊 HoD Result Dashboard — Insights & Patterns (No Student Tables, No PDF)")

    file_bytes_state = gr.State(None)   # bytes
    sheet_state = gr.State(None)        # sheet name string

    with gr.Row():
        upload = gr.File(label="Upload Excel (.xlsx)", file_types=[".xlsx"])
        sheet_dd = gr.Dropdown(label="Sheet", choices=[], interactive=False)

    with gr.Row():
        marks_col = gr.Dropdown(label="Marks column", choices=[], interactive=False)
        grade_col = gr.Dropdown(label="Grade column", choices=[], interactive=False)
        pass_mark = gr.Number(label="Pass mark", value=50, precision=0)

    with gr.Row():
        course_col = gr.Dropdown(label="Course column (optional)", choices=[], interactive=False, visible=False)
        section_col = gr.Dropdown(label="Section column (optional)", choices=[], interactive=False, visible=False)

    with gr.Row():
        course_filter = gr.Dropdown(label="Course filter", choices=["(all)"], value="(all)", interactive=False, visible=False)
        section_filter = gr.Dropdown(label="Section filter", choices=["(all)"], value="(all)", interactive=False, visible=False)

    analyze_btn = gr.Button("🔍 Refresh HoD Dashboard")

    insight_md = gr.Markdown("")

    with gr.Tab("Tables"):
        with gr.Row():
            kpi_table = gr.Dataframe(label="KPI Summary", interactive=False, wrap=True)
            pct_table = gr.Dataframe(label="Percentiles", interactive=False, wrap=True)
        with gr.Row():
            grade_dist_table = gr.Dataframe(label="Grade Distribution", interactive=False, wrap=True)
            heaping_table = gr.Dataframe(label="Mark Heaping (Top repeated marks)", interactive=False, wrap=True)
        grade_stats_table = gr.Dataframe(label="Grade ↔ Marks (Moderation evidence)", interactive=False, wrap=True)
        section_table = gr.Dataframe(label="Section Comparison (if available)", interactive=False, wrap=True)

    with gr.Tab("Charts"):
        with gr.Row():
            hist_plot = gr.Plot(label="Histogram")
            cdf_plot = gr.Plot(label="CDF")
        with gr.Row():
            grade_plot = gr.Plot(label="Grade distribution")
            grade_box = gr.Plot(label="Boxplot by grade")
        section_plot = gr.Plot(label="Section-wise pass rate")

    # -------- callbacks
    def on_upload(file_obj):
        b = _read_file_bytes(file_obj)
        xls = pd.ExcelFile(io.BytesIO(b), engine="openpyxl")
        sheets = xls.sheet_names or []
        if not sheets:
            raise gr.Error("No sheets found in workbook.")

        sheet0 = sheets[0]
        df0 = _drop_useless(pd.read_excel(io.BytesIO(b), sheet_name=sheet0, engine="openpyxl"))

        cols = list(df0.columns)
        m_guess = _guess_marks_col(df0)
        g_guess = _guess_grade_col(cols)
        c_guess, s_guess = _guess_optional(cols)

        # Optional filters
        course_col_upd = gr.update(choices=cols, value=(c_guess or cols[0]), visible=bool(c_guess), interactive=bool(c_guess))
        section_col_upd = gr.update(choices=cols, value=(s_guess or cols[0]), visible=bool(s_guess), interactive=bool(s_guess))

        course_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
        section_filter_upd = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)

        if c_guess and c_guess in df0.columns:
            vals = ["(all)"] + sorted(df0[c_guess].astype(str).fillna("NA").unique().tolist())
            course_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        if s_guess and s_guess in df0.columns:
            vals = ["(all)"] + sorted(df0[s_guess].astype(str).fillna("NA").unique().tolist())
            section_filter_upd = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        return (
            gr.update(choices=sheets, value=sheet0, interactive=True),   # sheet_dd
            gr.update(choices=cols, value=m_guess, interactive=True),    # marks_col
            gr.update(choices=cols, value=g_guess, interactive=True),    # grade_col
            course_col_upd,
            section_col_upd,
            course_filter_upd,
            section_filter_upd,
            b,          # file_bytes_state
            sheet0,     # sheet_state
        )

    upload.change(
        fn=on_upload,
        inputs=[upload],
        outputs=[sheet_dd, marks_col, grade_col, course_col, section_col, course_filter, section_filter, file_bytes_state, sheet_state],
    )

    def on_sheet_change(sheet_name, file_bytes, course_col_val, section_col_val):
        if not file_bytes:
            raise gr.Error("Upload Excel first.")

        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))

        cf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)
        sf = gr.update(choices=["(all)"], value="(all)", visible=False, interactive=False)

        if course_col_val and course_col_val in df.columns:
            vals = ["(all)"] + sorted(df[course_col_val].astype(str).fillna("NA").unique().tolist())
            cf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        if section_col_val and section_col_val in df.columns:
            vals = ["(all)"] + sorted(df[section_col_val].astype(str).fillna("NA").unique().tolist())
            sf = gr.update(choices=vals, value="(all)", visible=True, interactive=True)

        return cf, sf, sheet_name

    sheet_dd.change(
        fn=on_sheet_change,
        inputs=[sheet_dd, file_bytes_state, course_col, section_col],
        outputs=[course_filter, section_filter, sheet_state],
    )

    def on_refresh(file_bytes, sheet_name, m_col, g_col, pmark, c_col, s_col, c_filter, s_filter):
        if not file_bytes:
            raise gr.Error("Upload Excel first.")
        if not sheet_name:
            raise gr.Error("Select a sheet.")

        df = _drop_useless(pd.read_excel(io.BytesIO(file_bytes), sheet_name=sheet_name, engine="openpyxl"))

        kpi, pct, gdist, gstats, heap, sec_tbl, insight, f1, f2, f3, f4, f5 = compute_hod_insights(
            df, m_col, g_col, int(pmark), c_col, s_col, c_filter, s_filter
        )

        return (
            "### HoD Insight\n\n" + insight,
            kpi, pct, gdist, heap, gstats, sec_tbl,
            f1, f2, f3, f4, f5
        )

    analyze_btn.click(
        fn=on_refresh,
        inputs=[file_bytes_state, sheet_state, marks_col, grade_col, pass_mark, course_col, section_col, course_filter, section_filter],
        outputs=[insight_md, kpi_table, pct_table, grade_dist_table, heaping_table, grade_stats_table, section_table,
                 hist_plot, cdf_plot, grade_plot, grade_box, section_plot],
    )

demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)