Spaces:

VashuTheGreat2
/

HousePricePredictor

Sleeping

File size: 5,370 Bytes

51f7cb3

import sys
from typing import Tuple

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

from src.exception import MyException
from src.logger import logging
from src.utils.main_utils import load_numpy_array_data, load_object, save_object
from src.entity.config_entity import ModelTrainerConfig
from src.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact,RegressionMetricArtifact
from src.entity.estimator import MyModel
import mlflow
import mlflow.sklearn
from src.constants import REPO_OWNER,REPO_NAME

import dagshub
dagshub.init(repo_owner=REPO_OWNER, repo_name=REPO_NAME, mlflow=True)



class ModelTrainer:
    def __init__(self):
        pass

    async def init_config(
        self,
        data_transformation_artifact: DataTransformationArtifact,
        model_trainer_config: ModelTrainerConfig
    ):
        self.data_transformation_artifact = data_transformation_artifact
        self.model_trainer_config = model_trainer_config

    async def get_model_object_and_report(self, train: np.array, test: np.array) -> Tuple[object, dict]:
        try:
            logging.info("Training RandomForestRegressor with specified parameters")

            x_train, y_train = train[:, :-1], train[:, -1]
            x_test, y_test = test[:, :-1], test[:, -1]

            logging.info("Train-test split done.")

            model = RandomForestRegressor(
                n_estimators=self.model_trainer_config.n_estimators,
                min_samples_split=self.model_trainer_config.min_samples_split,
                min_samples_leaf=self.model_trainer_config.min_samples_leaf,
                max_depth=self.model_trainer_config.max_depth,
                criterion=self.model_trainer_config.criterion,
                random_state=self.model_trainer_config.random_state
            )

            logging.info("Model training started...")
            model.fit(x_train, y_train)
            logging.info("Model training completed.")

            # Predictions and evaluation metrics
            y_pred = model.predict(x_test)
            r2 = r2_score(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            metric_artifact = RegressionMetricArtifact(r2_score=r2, mse=mse, mae=mae)


            

            # MLflow logging
            with mlflow.start_run():
                mlflow.log_metric("r2_score", r2)
                mlflow.log_metric("mse", mse)
                mlflow.log_metric("mae", mae)

                mlflow.log_param("n_estimators", self.model_trainer_config.n_estimators)
                mlflow.log_param("min_samples_split", self.model_trainer_config.min_samples_split)
                mlflow.log_param("min_samples_leaf", self.model_trainer_config.min_samples_leaf)
                mlflow.log_param("max_depth", self.model_trainer_config.max_depth)
                mlflow.log_param("criterion", self.model_trainer_config.criterion)
                mlflow.log_param("random_state", self.model_trainer_config.random_state)

                mlflow.sklearn.log_model(model, artifact_path="model")

            return model, metric_artifact

        except Exception as e:
            raise MyException(e, sys) from e

    async def initiate_model_trainer(self) -> ModelTrainerArtifact:
        logging.info("Entered initiate_model_trainer method of ModelTrainer class")

        try:
            logging.info("Started Model Training Component")

            # Load transformed train-test data
            train_arr = await load_numpy_array_data(
                file_path=self.data_transformation_artifact.transformed_train_file_path
            )
            test_arr = await load_numpy_array_data(
                file_path=self.data_transformation_artifact.transformed_test_file_path
            )
            logging.info("Train-test data loaded successfully.")

            # Train model and get metrics
            trained_model, metric_artifact = await self.get_model_object_and_report(
                train=train_arr, test=test_arr
            )
            logging.info("Model object and metrics obtained.")

            # Load preprocessing object
            preprocessing_obj = await load_object(
                file_path=self.data_transformation_artifact.transformed_object_file_path
            )
            logging.info("Preprocessing object loaded.")

            logging.info("Saving new model as performance is better than previous one.")
            my_model = MyModel()
            await my_model.init_config(
                preprocessing_object=preprocessing_obj, trained_model_object=trained_model
            )
            await save_object(self.model_trainer_config.trained_model_file_path, my_model)
            logging.info("Saved final model object including preprocessing and trained model.")

            model_trainer_artifact = ModelTrainerArtifact(
                trained_model_file_path=self.model_trainer_config.trained_model_file_path,
                metric_artifact=metric_artifact
            )

            logging.info(f"Model trainer artifact: {model_trainer_artifact}")
            return model_trainer_artifact

        except Exception as e:
            raise MyException(e, sys) from e