Spaces:
Sleeping
Sleeping
File size: 5,370 Bytes
51f7cb3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | import sys
from typing import Tuple
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from src.exception import MyException
from src.logger import logging
from src.utils.main_utils import load_numpy_array_data, load_object, save_object
from src.entity.config_entity import ModelTrainerConfig
from src.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact,RegressionMetricArtifact
from src.entity.estimator import MyModel
import mlflow
import mlflow.sklearn
from src.constants import REPO_OWNER,REPO_NAME
import dagshub
dagshub.init(repo_owner=REPO_OWNER, repo_name=REPO_NAME, mlflow=True)
class ModelTrainer:
def __init__(self):
pass
async def init_config(
self,
data_transformation_artifact: DataTransformationArtifact,
model_trainer_config: ModelTrainerConfig
):
self.data_transformation_artifact = data_transformation_artifact
self.model_trainer_config = model_trainer_config
async def get_model_object_and_report(self, train: np.array, test: np.array) -> Tuple[object, dict]:
try:
logging.info("Training RandomForestRegressor with specified parameters")
x_train, y_train = train[:, :-1], train[:, -1]
x_test, y_test = test[:, :-1], test[:, -1]
logging.info("Train-test split done.")
model = RandomForestRegressor(
n_estimators=self.model_trainer_config.n_estimators,
min_samples_split=self.model_trainer_config.min_samples_split,
min_samples_leaf=self.model_trainer_config.min_samples_leaf,
max_depth=self.model_trainer_config.max_depth,
criterion=self.model_trainer_config.criterion,
random_state=self.model_trainer_config.random_state
)
logging.info("Model training started...")
model.fit(x_train, y_train)
logging.info("Model training completed.")
# Predictions and evaluation metrics
y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
metric_artifact = RegressionMetricArtifact(r2_score=r2, mse=mse, mae=mae)
# MLflow logging
with mlflow.start_run():
mlflow.log_metric("r2_score", r2)
mlflow.log_metric("mse", mse)
mlflow.log_metric("mae", mae)
mlflow.log_param("n_estimators", self.model_trainer_config.n_estimators)
mlflow.log_param("min_samples_split", self.model_trainer_config.min_samples_split)
mlflow.log_param("min_samples_leaf", self.model_trainer_config.min_samples_leaf)
mlflow.log_param("max_depth", self.model_trainer_config.max_depth)
mlflow.log_param("criterion", self.model_trainer_config.criterion)
mlflow.log_param("random_state", self.model_trainer_config.random_state)
mlflow.sklearn.log_model(model, artifact_path="model")
return model, metric_artifact
except Exception as e:
raise MyException(e, sys) from e
async def initiate_model_trainer(self) -> ModelTrainerArtifact:
logging.info("Entered initiate_model_trainer method of ModelTrainer class")
try:
logging.info("Started Model Training Component")
# Load transformed train-test data
train_arr = await load_numpy_array_data(
file_path=self.data_transformation_artifact.transformed_train_file_path
)
test_arr = await load_numpy_array_data(
file_path=self.data_transformation_artifact.transformed_test_file_path
)
logging.info("Train-test data loaded successfully.")
# Train model and get metrics
trained_model, metric_artifact = await self.get_model_object_and_report(
train=train_arr, test=test_arr
)
logging.info("Model object and metrics obtained.")
# Load preprocessing object
preprocessing_obj = await load_object(
file_path=self.data_transformation_artifact.transformed_object_file_path
)
logging.info("Preprocessing object loaded.")
logging.info("Saving new model as performance is better than previous one.")
my_model = MyModel()
await my_model.init_config(
preprocessing_object=preprocessing_obj, trained_model_object=trained_model
)
await save_object(self.model_trainer_config.trained_model_file_path, my_model)
logging.info("Saved final model object including preprocessing and trained model.")
model_trainer_artifact = ModelTrainerArtifact(
trained_model_file_path=self.model_trainer_config.trained_model_file_path,
metric_artifact=metric_artifact
)
logging.info(f"Model trainer artifact: {model_trainer_artifact}")
return model_trainer_artifact
except Exception as e:
raise MyException(e, sys) from e
|