import os
import sys
from dataclasses import dataclass

from catboost import CatBoostClassifier
from sklearn.ensemble import (
    AdaBoostClassifier,
    GradientBoostingClassifier,
    RandomForestClassifier,
)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

from src.exception import CustomException
from src.logger import logging

from src.utils import save_object,evaluate_models

@dataclass
class ModelTrainerConfig:
    trained_model_file_path=os.path.join("artifacts","model.pkl")

class ModelTrainer:
    def __init__(self):
        self.model_trainer_config=ModelTrainerConfig()


    def initiate_model_trainer(self,train_array,test_array):
        try:
            logging.info("Split training and test input data")
            X_train,y_train,X_test,y_test=(
                train_array[:,:-1],
                train_array[:,-1],
                test_array[:,:-1],
                test_array[:,-1]
            )
            models = {
                "Random Forest": RandomForestClassifier(),
                "Decision Tree": DecisionTreeClassifier(),
                "Gradient Boosting": GradientBoostingClassifier(),
                "Logistic Regression": LogisticRegression(max_iter=1000),
                "XGBClassifier": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
                "CatBoosting Classifier": CatBoostClassifier(verbose=False),
                "AdaBoost Classifier": AdaBoostClassifier(),
            }
            params={
                "Decision Tree": {
                    'criterion':['gini','entropy'],
                },
                "Random Forest":{
                    'n_estimators': [16,32,64]
                },
                "Gradient Boosting":{
                    'learning_rate':[.1,.01,.05],
                    'subsample':[0.7,0.8,0.9],
                    'n_estimators': [16,32,64]
                },
                "Logistic Regression":{
                    'C':[0.01,0.1,1,10]
                },
                "XGBClassifier":{
                    'learning_rate':[.1,.01,.05],
                    'n_estimators': [16,32,64]
                },
                "CatBoosting Classifier":{
                    'depth': [4,6],
                    'learning_rate': [0.01, 0.05, 0.1],
                    'iterations': [50, 100]
                },
                "AdaBoost Classifier":{
                    'learning_rate':[.1,.01,0.5],
                    'n_estimators': [16,32,64]
                }
                
            }

            model_report:dict=evaluate_models(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test,
                                             models=models,param=params)
            
            ## To get best model score from dict
            best_model_score = max(sorted(model_report.values()))

            ## To get best model name from dict

            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            best_model = models[best_model_name]

            # If no model meets a high threshold, log a warning but continue to
            # save the best model found. This avoids aborting the pipeline for
            # modest accuracy scores; adjust threshold as needed.
            if best_model_score < 0.5:
                logging.warning(f"Best model score {best_model_score:.3f} is below 0.5; saving best found model anyway.")
            else:
                logging.info(f"Best found model on both training and testing dataset")

            save_object(
                file_path=self.model_trainer_config.trained_model_file_path,
                obj=best_model
            )

            predicted = best_model.predict(X_test)

            accuracy = accuracy_score(y_test, predicted)
            return accuracy
            

        except Exception as e:
            raise CustomException(e,sys)