import streamlit as st
import pandas as pd
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import sklearn
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, mean_squared_error, r2_score,
    classification_report, confusion_matrix
)
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.linear_model import LinearRegression

import warnings
warnings.filterwarnings("ignore")

def model_training():
    st.title("Model Training & Evaluation")

    # Step 1: Load preprocessed data
    files = os.listdir('preprocessed_data') if os.path.exists('preprocessed_data') else []
    if not files:
        st.warning("No preprocessed data found. Please preprocess a file first.")
        return

    selected_file = st.selectbox("Select a preprocessed CSV file", files)
    df = pd.read_csv(os.path.join('preprocessed_data', selected_file))
    st.write("Data Preview:")
    st.dataframe(df.head())

    # Step 2: Select target column
    target_column = st.selectbox("Select Target Column", df.columns)
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # Step 3: Detect problem type
    if y.nunique() <= 20 and y.dtype in ['int64', 'int32']:
        problem_type = "classification"
        st.info("Detected as Classification Problem")
    else:
        problem_type = "regression"
        st.info("Detected as Regression Problem")

    # Step 4: Train/Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Step 5: Define models
    if problem_type == "classification":
        models = {
            "Random Forest Classifier": RandomForestClassifier(),
            "Decision Tree Classifier": DecisionTreeClassifier(),
            "KNN Classifier": KNeighborsClassifier()
        }
    else:
        models = {
            "Random Forest Regressor": RandomForestRegressor(),
            "Decision Tree Regressor": DecisionTreeRegressor(),
            "KNN Regressor": KNeighborsRegressor(),
            "Linear Regression": LinearRegression()
        }

    # Step 6: Train, Evaluate, and Display Metrics
    results = []
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        if problem_type == "classification":
            score = (accuracy_score(y_test, y_pred))*100
        else:
            score = (r2_score(y_test, y_pred))*100

        results.append((name, score, model, y_pred))

    # Step 7: Show model performance table
    results_df = pd.DataFrame(results, columns=["Model", "Score", "Trained_Model", "Predictions"])
    st.write("Model Performance:")
    st.dataframe(results_df[["Model", "Score"]])

    # Step 8: Best Model Selection
    best_model_row = results_df.loc[results_df["Score"].idxmax()]
    st.success(f"Best Model: {best_model_row['Model']} with Score: {best_model_row['Score']:.4f}")
    #SHOW NAME OF BEST MODEL
    st.write("Best Model Details:", best_model_row)
    best_model = best_model_row["Trained_Model"]
    
    
    # Step 9: Detailed Metrics for Best Model
    st.subheader("Detailed Metrics for Best Model")
    best_y_pred = best_model_row["Predictions"]

    if problem_type == "classification":
        st.write("**Accuracy Score:**", accuracy_score(y_test, best_y_pred))
        st.text("**Classification Report:**")
        st.text(classification_report(y_test, best_y_pred))

        # Confusion Matrix
        cm = confusion_matrix(y_test, best_y_pred)
        fig, ax = plt.subplots()
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
        ax.set_xlabel("Predicted")
        ax.set_ylabel("Actual")
        st.pyplot(fig)

    else:
        st.write("**Mean Squared Error:**", np.sqrt(mean_squared_error(y_test, best_y_pred))*100, "%")
        st.write("**R² Score:**", r2_score(y_test, best_y_pred))

    # Step 10: Save & Download Best Model
    if st.button("Save Best Model"):
        st.write("Saving the best model...")
        st.write(f"Model Name: {best_model_row['Model']}")
        os.makedirs("saved_models", exist_ok=True)
        model_path = f"saved_models/{best_model_row['Model'].replace(' ', '_')}.pkl"
        joblib.dump(best_model_row["Trained_Model"], model_path)
        st.download_button(
            label="Download Model",
            data=open(model_path, "rb").read(),
            file_name=os.path.basename(model_path)
        )