import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)

from imblearn.over_sampling import SMOTE

from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet

# =========================
# GLOBALS
# =========================
df_global = None
best_model_name = None
best_model_obj = None

no_global = None
cw_global = None
smote_global = None
cm_global = None


# =========================
# UPLOAD
# =========================
def upload_and_clean(file):

    global df_global

    df = pd.read_csv(file.name)
    df = df.drop_duplicates()

    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].fillna(df[col].median())
        else:
            df[col] = df[col].fillna(df[col].mode()[0])

    df_global = df

    return (
        "Data Loaded Successfully",
        df.head(),
        gr.update(choices=list(df.columns)),
        gr.update(choices=list(df.columns))
    )


# =========================
# ANALYSIS VISUALIZATION
# =========================
def analyze_data(target):

    df = df_global.copy()
    images = []

    cols = [c for c in df.columns if c != target]

    for col in cols[:6]:

        fig, axes = plt.subplots(1, 2, figsize=(12, 4))

        df[col].astype(str).value_counts().head(10).plot(
            kind="bar",
            ax=axes[0]
        )
        axes[0].set_title(f"Bar - {col}")
        axes[0].tick_params(axis='x', rotation=45)

        df[col].astype(str).value_counts().head(6).plot(
            kind="pie",
            ax=axes[1],
            autopct="%1.1f%%"
        )
        axes[1].set_title(f"Pie - {col}")
        axes[1].set_ylabel("")

        plt.tight_layout()

        path = f"/tmp/{col}.png"
        plt.savefig(path)
        plt.close()

        images.append(path)

    return images


# =========================
# CONFUSION MATRIX
# =========================
def plot_cm(y_true, y_pred, title):

    cm = confusion_matrix(y_true, y_pred)

    plt.figure(figsize=(4,4))
    plt.imshow(cm, cmap="Blues")

    plt.title(title)

    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, cm[i, j], ha="center", va="center")

    path = f"/tmp/{title}.png"
    plt.savefig(path)
    plt.close()

    return path


# =========================
# ML (NO / CW / SMOTE)
# =========================
def run_ml(target):

    global df_global, best_model_name
    global no_global, cw_global, smote_global, cm_global

    df = df_global.copy()

    # encode
    for col in df.columns:
        if not pd.api.types.is_numeric_dtype(df[col]):
            df[col] = LabelEncoder().fit_transform(df[col].astype(str))

    X = df.drop(columns=[target])
    y = df[target]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # imbalance check
    counts = np.bincount(y)
    imbalance = min(counts) / max(counts) < 0.5

    models = {
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier(),
        "XGBoost": XGBClassifier(eval_metric="logloss")
    }

    no_rows, cw_rows, smote_rows = [], [], []
    cm_images = {}

    best_score = 0

    # =========================
    # NO SAMPLING
    # =========================
    for name, model in models.items():

        model.fit(X_train, y_train)
        pred = model.predict(X_test)

        acc = accuracy_score(y_test, pred)

        no_rows.append({
            "Model": name,
            "Accuracy": acc,
            "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
            "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
            "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
        })

        cm_images[f"{name}_no"] = plot_cm(y_test, pred, f"{name}_NO")

        if acc > best_score:
            best_score = acc
            best_model_name = name + " (No)"

    # =========================
    # CLASS WEIGHT
    # =========================
    for name in models.keys():

        if name == "Decision Tree":
            model = DecisionTreeClassifier(class_weight="balanced")
        elif name == "Random Forest":
            model = RandomForestClassifier(class_weight="balanced")
        else:
            model = XGBClassifier(eval_metric="logloss")

        model.fit(X_train, y_train)
        pred = model.predict(X_test)

        cw_rows.append({
            "Model": name,
            "Accuracy": accuracy_score(y_test, pred),
            "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
            "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
            "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
        })

        cm_images[f"{name}_cw"] = plot_cm(y_test, pred, f"{name}_CW")

    # =========================
    # SMOTE
    # =========================
    if imbalance:
        sm = SMOTE(random_state=42)
        X_res, y_res = sm.fit_resample(X_train, y_train)
    else:
        X_res, y_res = X_train, y_train

    for name, model in models.items():

        model.fit(X_res, y_res)
        pred = model.predict(X_test)

        smote_rows.append({
            "Model": name,
            "Accuracy": accuracy_score(y_test, pred),
            "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
            "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
            "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
        })

        cm_images[f"{name}_smote"] = plot_cm(y_test, pred, f"{name}_SMOTE")

    # store globally
    no_global = pd.DataFrame(no_rows)
    cw_global = pd.DataFrame(cw_rows)
    smote_global = pd.DataFrame(smote_rows)
    cm_global = cm_images

    return (
        f"Imbalance: {imbalance}",
        no_global,
        cw_global,
        smote_global,
        list(cm_images.values())
    )


# =========================
# FEATURE IMPORTANCE
# =========================
def feature_importance():

    global best_model_obj

    if hasattr(best_model_obj, "feature_importances_"):

        plt.figure(figsize=(6,4))
        plt.barh(range(len(best_model_obj.feature_importances_)),
                 best_model_obj.feature_importances_)

        path = "/tmp/feat.png"
        plt.savefig(path)
        plt.close()

        return path

    return None


# =========================
# PDF REPORT
# =========================
def generate_pdf():

    global no_global, cw_global, smote_global, cm_global, best_model_name

    path = "/tmp/report.pdf"
    doc = SimpleDocTemplate(path)
    styles = getSampleStyleSheet()
    elements = []

    elements.append(Paragraph("AutoML Full Report", styles["Title"]))
    elements.append(Spacer(1, 10))
    elements.append(Paragraph(f"Best Model: {best_model_name}", styles["Heading2"]))

    def add_table(df, title):
        elements.append(Spacer(1, 10))
        elements.append(Paragraph(title, styles["Heading3"]))

        data = [df.columns.tolist()] + df.values.tolist()

        table = Table(data)
        table.setStyle(TableStyle([
            ("BACKGROUND", (0,0), (-1,0), colors.grey),
            ("TEXTCOLOR", (0,0), (-1,0), colors.white),
            ("GRID", (0,0), (-1,-1), 0.5, colors.black)
        ]))

        elements.append(table)

    add_table(no_global, "No Sampling")
    add_table(cw_global, "Class Weight")
    add_table(smote_global, "SMOTE")

    elements.append(Spacer(1, 10))
    elements.append(Paragraph("Confusion Matrices", styles["Heading2"]))

    for name, img in cm_global.items():
        elements.append(Paragraph(name, styles["Normal"]))
        elements.append(Image(img, width=200, height=200))

    doc.build(elements)

    return path


# =========================
# ANALYSIS
# =========================
def full_analysis(target):

    ml_status, no_df, cw_df, smote_df, imgs = run_ml(target)

    return ml_status, no_df, cw_df, smote_df, imgs


# =========================
# UI
# =========================
with gr.Blocks() as demo:

    gr.Markdown("# 🚀 Advanced AutoML System")

    file = gr.File()

    upload_btn = gr.Button("Upload")

    status = gr.Textbox()
    preview = gr.Dataframe()

    target = gr.Dropdown(label="Target")

    run_btn = gr.Button("Run Full Analysis")

    ml_status = gr.Textbox()

    no_table = gr.Dataframe()
    cw_table = gr.Dataframe()
    smote_table = gr.Dataframe()

    gallery = gr.Gallery(columns=2)

    feat_btn = gr.Button("Feature Importance")
    feat_img = gr.Image()

    pdf_btn = gr.Button("Download PDF")
    pdf_file = gr.File()

    upload_btn.click(upload_and_clean, file,
                      [status, preview, target, target])

    run_btn.click(full_analysis, target,
                  [ml_status, no_table, cw_table, smote_table, gallery])

    feat_btn.click(feature_importance, None, feat_img)

    pdf_btn.click(generate_pdf, None, pdf_file)

demo.launch(share=True)