import os
import sys


from src.exception.exception import DeliveryTimeException
from src.logging.logger import logging


from src.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact
from src.entity.config_entity import ModelTrainerConfig


from src.utils.ml_utils.model.estimator import DeliveryPredictionModel
from src.utils.main_utils.utils import save_object, load_object
from src.utils.main_utils.utils import load_numpy_array_data, evaluate_models
from src.utils.ml_utils.metric.regression_metric import get_regression_score


import pandas as pd
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
import os
import joblib
import sys
import mlflow


from dotenv import load_dotenv
load_dotenv()


os.environ["MLFLOW_TRACKING_URI"] = os.getenv("MLFLOW_TRACKING_URI")
os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("MLFLOW_TRACKING_USERNAME")
os.environ["MLFLOW_TRACKING_PASSWORD"] = os.getenv("MLFLOW_TRACKING_PASSWORD")


class ModelTrainer:
    def __init__(self, model_trainer_config:ModelTrainerConfig, data_transformation_artifact:DataTransformationArtifact):
        try:
            self.model_trainer_config=model_trainer_config
            self.data_transformation_artifact=data_transformation_artifact
            # CHANGED: Update feature names to match the pipeline output with scaler
            self.model_trainer_config.feature_names=[
                'scaler__Distance_km',
                'scaler__Courier_Experience_yrs',
                'Vehicle_Type_Pickup Truck',
                'Vehicle_Type_Scooter',
                'Weather_Foggy',
                'Weather_Rainy',
                'Weather_Snowy',
                'Weather_Windy',
                'Time_of_Day_Evening',
                'Time_of_Day_Morning',
                'Time_of_Day_Night',
                'Traffic_Level_Low',
                'Traffic_Level_Medium'
            ]
        except Exception as e:
            raise DeliveryTimeException(e, sys)


    def track_mlflow(self, best_model, regressionMetric):
        mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))


        with mlflow.start_run():
            mlflow.log_metric("r2_score", regressionMetric.r2_score)
            mlflow.log_metric("Mean_Absolute_Error", regressionMetric.mean_absolute_error)
            mlflow.log_metric("Mean_Squared_Error", regressionMetric.mean_squared_error)


            joblib.dump(best_model, "model.joblib")


            mlflow.log_artifact("model.joblib", artifact_path="model")


    def train_model(self, X_train, y_train, X_test, y_test):
        try:
            models = {
                "XGBoost Regression": XGBRegressor(objective='reg:squarederror', random_state=42, n_jobs=-1),
                "RandomForest": RandomForestRegressor(),
                "GradientBoostRegressor": GradientBoostingRegressor()
            }


            params = {
                "XGBoost Regression" : {
                    'n_estimators': [500, 700, 100, 150],
                    'max_depth': [3, 4, 5],
                    'learning_rate': [0.01, 0.05],
                    'subsample': [0.6, 0.8, 1.0],
                    'colsample_bytree': [0.6, 0.7, 0.8]
                },
                'GradientBoostRegressor': {
                    'n_estimators':[1000, 500],
                    'min_samples_split': [2, 8],
                    'criterion': ['friedman_mse', 'squared_error'],
                    'loss': ['squared_error', 'huber'],
                    'max_depth': [5, None]
                },
                'RandomForest': {
                    'n_estimators': [1000],
                    'min_samples_split': [2],
                    'max_features': [7],
                    'max_depth': [None]
                }
            }


            model_report:dict=evaluate_models(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test,
                                              models=models, param=params)


            best_model_score = max(sorted(model_report.values()))


            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]


            best_model = models[best_model_name]


            y_train_pred = best_model.predict(X_train)


            regression_train_metric=get_regression_score(y_true= y_train, y_pred=y_train_pred)
            self.track_mlflow(best_model, regression_train_metric)


            y_test_pred = best_model.predict(X_test)
            regression_test_metric = get_regression_score(y_true=y_test, y_pred=y_test_pred)


            self.track_mlflow(best_model, regression_test_metric)


            model_dir_path = os.path.dirname(self.model_trainer_config.trained_model_file_path)
            os.makedirs(model_dir_path, exist_ok=True)


            Delivery_Prediction_Model=DeliveryPredictionModel(model=best_model)
            save_object(self.model_trainer_config.trained_model_file_path, obj=Delivery_Prediction_Model)


            # Model Pusher
            save_object("final_model/model.pkl", best_model)


            model_trainer_artifact=ModelTrainerArtifact(
                trained_model_file_path=self.model_trainer_config.trained_model_file_path,
                train_metric_artifact=regression_train_metric,
                test_metric_artifact=regression_test_metric
            )
            logging.info(f"Model trainer artifact: {model_trainer_artifact}")
            return model_trainer_artifact


        except Exception as e:
            raise DeliveryTimeException(e, sys)


    def initiate_model_trainer(self) -> ModelTrainerArtifact:
        try:
            train_file_path = self.data_transformation_artifact.transformed_train_file_path
            test_file_path = self.data_transformation_artifact.transformed_test_file_path


            train_arr = load_numpy_array_data(train_file_path)
            test_arr = load_numpy_array_data(test_file_path)


            logging.info(f"Shape of training data: {train_arr.shape}")
            logging.info(f"Testing array: {test_arr.shape}")


            X_train, y_train, X_test, y_test = (
                train_arr[:, :-1],
                train_arr[:, -1],
                test_arr[:, :-1],
                test_arr[:, -1]
            )


            logging.info(f"X_train shape: {X_train.shape}")
            logging.info(f"X_test sahpe: {X_test.shape}")
            
            # REMOVED: All the StandardScaler code and DataFrame conversion
            # Train directly on the transformed arrays
            model_trainer_artifact = self.train_model(X_train, y_train, X_test, y_test)
            return model_trainer_artifact


        except Exception as e:
            raise DeliveryTimeException(e, sys)