# app.py  (Fast-build Hugging Face Gradio)
# School Mark Analysis: RegNo, Name, Tamil, English, Maths, Science, Social
# Features:
# - Total, Average, Rank, Remark
# - Subject-wise average + bar chart
# - Fail-count distribution (failed in 1..5 subjects)
# - Top-3 overall, Top-3 in each subject
# - Search by RegNo
# - Download final result CSV
# - Logo display (reliable on Hugging Face) using gr.Image + Pillow

import os
import tempfile
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib.pyplot as plt
from PIL import Image

SUBJECTS_DEFAULT = ["Tamil", "English", "Maths", "Science", "Social"]
ID_COL_DEFAULT = "RegNo"
NAME_COL_DEFAULT = "Name"


# -----------------------------
# Utilities
# -----------------------------
def _clean_columns(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = [c.strip() for c in df.columns]
    return df


def _validate_and_prepare(df: pd.DataFrame, id_col: str, name_col: str, subjects: list[str]) -> pd.DataFrame:
    df = _clean_columns(df)

    missing = [c for c in [id_col, name_col] + subjects if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

    # Ensure subject columns are numeric
    for s in subjects:
        df[s] = pd.to_numeric(df[s], errors="coerce")

    # Basic sanity
    if df[subjects].isna().all(axis=None):
        raise ValueError("All subject columns became NaN after numeric conversion. Check CSV data.")

    return df


def _remark(avg: float, failed_subjects: int) -> str:
    if failed_subjects > 0:
        return "Fail"
    if avg >= 80:
        return "Distinction"
    if 60 <= avg <= 79:
        return "First Class"
    if 35 <= avg <= 59:
        return "Second Class"
    return "Pass"


# -----------------------------
# Core computations
# -----------------------------
def compute_marks(
    df: pd.DataFrame,
    pass_mark: int = 35,
    id_col: str = ID_COL_DEFAULT,
    name_col: str = NAME_COL_DEFAULT,
    subjects: list[str] = SUBJECTS_DEFAULT,
):
    df = _validate_and_prepare(df, id_col, name_col, subjects)

    out = df.copy()
    out["Total"] = out[subjects].sum(axis=1)
    out["Average"] = out[subjects].mean(axis=1)

    out["Failed_Subjects"] = (out[subjects] < pass_mark).sum(axis=1)
    out["Remark"] = out.apply(lambda r: _remark(float(r["Average"]), int(r["Failed_Subjects"])), axis=1)

    # Rank by Total (descending), ties get same minimum rank
    out["Rank"] = out["Total"].rank(method="min", ascending=False).astype(int)
    out = out.sort_values(["Rank", id_col], ascending=[True, True]).reset_index(drop=True)

    # Subject-wise average
    subj_avg = pd.DataFrame({
        "Subject": subjects,
        "Class_Average": [float(out[s].mean()) for s in subjects]
    })

    # Fail distribution (1..5)
    fail_dist = pd.DataFrame({
        "Failed_Subjects": [1, 2, 3, 4, 5],
        "Student_Count": [int((out["Failed_Subjects"] == k).sum()) for k in [1, 2, 3, 4, 5]]
    })

    # Top 3 overall
    top3_overall_cols = [id_col, name_col, "Total", "Average", "Rank", "Remark"]
    top3_overall = out.nsmallest(3, "Rank")[top3_overall_cols]

    # Top 3 per subject
    rows = []
    for s in subjects:
        t = out.sort_values(s, ascending=False).head(3)[[id_col, name_col, s, "Total", "Average", "Rank", "Remark"]].copy()
        t.insert(0, "Subject", s)
        t.rename(columns={s: "Subject_Mark"}, inplace=True)
        rows.append(t)
    top3_each_subject = pd.concat(rows, ignore_index=True) if rows else pd.DataFrame()

    # Summary
    total_students = out.shape[0]
    pass_count = int((out["Failed_Subjects"] == 0).sum())
    fail_count = total_students - pass_count

    dist_count = int((out["Remark"] == "Distinction").sum())
    first_count = int((out["Remark"] == "First Class").sum())
    second_count = int((out["Remark"] == "Second Class").sum())

    summary = (
        f"Students: {total_students} | Pass: {pass_count} | Fail: {fail_count} | "
        f"Distinction: {dist_count} | First Class: {first_count} | Second Class: {second_count}"
    )

    return out, subj_avg, fail_dist, top3_overall, top3_each_subject, summary


# -----------------------------
# Plots (close figs to avoid memory growth)
# -----------------------------
def plot_subject_avg(subj_avg: pd.DataFrame):
    fig, ax = plt.subplots(figsize=(7, 4))
    ax.bar(subj_avg["Subject"], subj_avg["Class_Average"])
    ax.set_title("Subject-wise Class Average")
    ax.set_xlabel("Subject")
    ax.set_ylabel("Average Marks")
    ax.set_ylim(0, 100)
    plt.xticks(rotation=25, ha="right")
    plt.tight_layout()
    plt.close(fig)
    return fig


def plot_remark_distribution(result_df: pd.DataFrame):
    order = ["Distinction", "First Class", "Second Class", "Fail", "Pass"]
    counts = result_df["Remark"].value_counts()
    labels = [x for x in order if x in counts.index]
    values = [int(counts[x]) for x in labels]

    fig, ax = plt.subplots(figsize=(7, 4))
    ax.bar(labels, values)
    ax.set_title("Remark Distribution")
    ax.set_xlabel("Remark")
    ax.set_ylabel("Number of Students")
    plt.xticks(rotation=20, ha="right")
    plt.tight_layout()
    plt.close(fig)
    return fig


# -----------------------------
# Gradio handlers
# -----------------------------
def load_csv(file_obj):
    if file_obj is None:
        return None, "Please upload a CSV.", None

    try:
        df = pd.read_csv(file_obj.name)
        df = _clean_columns(df)
        msg = f"Loaded: {os.path.basename(file_obj.name)} | Rows={df.shape[0]} | Cols={df.shape[1]}"
        return df, msg, df.head(20)
    except Exception as e:
        return None, f"Could not read CSV: {e}", None


def run_all(df, pass_mark):
    if df is None:
        return "Upload a CSV first.", None, None, None, None, None, None, None, None

    try:
        result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, summary = compute_marks(
            df, pass_mark=int(pass_mark)
        )
        fig_avg = plot_subject_avg(subj_avg)
        fig_remark = plot_remark_distribution(result_df)

        # Save downloadable CSV
        out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".csv").name
        result_df.to_csv(out_path, index=False)

        return summary, result_df, subj_avg, fail_dist, top3_overall, top3_each_subject, fig_avg, fig_remark, out_path
    except Exception as e:
        return f"Error: {e}", None, None, None, None, None, None, None, None


def search_regno(result_df, regno_value):
    if result_df is None or isinstance(result_df, str):
        return "Run analysis first.", pd.DataFrame()
    if not regno_value:
        return "Enter RegNo to search.", pd.DataFrame()

    col = result_df[ID_COL_DEFAULT]
    if pd.api.types.is_numeric_dtype(col):
        try:
            q = float(regno_value)
            res = result_df[result_df[ID_COL_DEFAULT] == q]
        except Exception:
            res = result_df[col.astype(str) == str(regno_value)]
    else:
        res = result_df[col.astype(str) == str(regno_value)]

    if res.empty:
        return "No matching record found.", pd.DataFrame()
    return f"Found {len(res)} record(s).", res


# -----------------------------
# Logo loader (reliable in HF)
# -----------------------------
def load_logo():
    for fn in ["logo.jpg", "logo.png", "logo.jpeg", "Logo.jpg", "Logo.png", "Logo.jpeg"]:
        if os.path.exists(fn):
            try:
                return Image.open(fn)
            except Exception:
                return None
    return None


# -----------------------------
# UI
# -----------------------------
CSS = """
#titleblock {text-align:center; margin-top: 6px; margin-bottom: 8px;}
#t1 {font-size:30px; font-weight:800; color:#1E5AA8;}
#t2 {font-size:18px; font-weight:800; color:#1E5AA8;}
"""

with gr.Blocks(css=CSS, title="School Mark Analysis") as demo:
    # Logo + title
    logo = load_logo()
    if logo is not None:
        gr.Image(value=logo, show_label=False, interactive=False, height=160)

    gr.HTML("""
    <div id="titleblock" style="margin-top:-10px;">
      <div id="t1">Amrita Manthana</div>
      <div id="t2">Prof.B.Santhi,SRC,SASTRA</div>
    </div>
    """)

    df_state = gr.State(None)
    result_state = gr.State(None)

    with gr.Row():
        with gr.Column(scale=1, min_width=340):
            gr.Markdown("### Upload Marks CSV")
            file_in = gr.File(label="Upload CSV", file_types=[".csv"])
            load_btn = gr.Button("Load CSV", variant="primary")
            load_msg = gr.Textbox(label="Status", interactive=False)
            preview = gr.Dataframe(label="Preview", interactive=False, wrap=True)

            gr.Markdown("### Analysis Settings")
            pass_mark = gr.Slider(label="Pass mark (per subject)", minimum=0, maximum=100, value=35, step=1)
            run_btn = gr.Button("Run Mark Analysis", variant="primary")
            summary = gr.Textbox(label="Summary", interactive=False)

            download_file = gr.File(label="Download Result CSV")

        with gr.Column(scale=2):
            with gr.Tabs():
                with gr.Tab("Result Table"):
                    result_table = gr.Dataframe(
                        label="Result (Total, Average, Rank, Remark, Failed_Subjects)",
                        interactive=False,
                        wrap=True
                    )
                with gr.Tab("Subject Averages"):
                    subj_avg_table = gr.Dataframe(label="Subject-wise Averages", interactive=False, wrap=True)
                    avg_plot = gr.Plot(label="Bar Chart: Subject-wise Average")
                with gr.Tab("Fail Counts"):
                    fail_dist_table = gr.Dataframe(
                        label="Students failed in 1/2/3/4/5 subjects",
                        interactive=False,
                        wrap=True
                    )
                with gr.Tab("Toppers"):
                    top3_overall_table = gr.Dataframe(label="Overall Top 3", interactive=False, wrap=True)
                    top3_each_subject_table = gr.Dataframe(label="Top 3 in each subject", interactive=False, wrap=True)
                with gr.Tab("Remarks"):
                    remark_plot = gr.Plot(label="Remark Distribution")
                with gr.Tab("Search"):
                    regno_in = gr.Textbox(label="Enter RegNo (exact match)")
                    search_btn = gr.Button("Search")
                    search_msg = gr.Textbox(label="Search Status", interactive=False)
                    search_out = gr.Dataframe(label="Matching Record(s)", interactive=False, wrap=True)

    # Events
    load_btn.click(
        load_csv,
        inputs=[file_in],
        outputs=[df_state, load_msg, preview]
    )

    def run_and_store(df, pm):
        s, res, subj, faild, t3, t3sub, f1, f2, fcsv = run_all(df, pm)
        return s, res, subj, faild, t3, t3sub, f1, f2, fcsv, res

    run_btn.click(
        run_and_store,
        inputs=[df_state, pass_mark],
        outputs=[
            summary, result_table, subj_avg_table, fail_dist_table, top3_overall_table, top3_each_subject_table,
            avg_plot, remark_plot, download_file, result_state
        ]
    )

    search_btn.click(
        search_regno,
        inputs=[result_state, regno_in],
        outputs=[search_msg, search_out]
    )

if __name__ == "__main__":
    demo.launch()