Spaces:
Sleeping
Sleeping
File size: 6,321 Bytes
3561d8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import os
import sys
from src.exception.exception import DeliveryTimeException
from src.logging.logger import logging
from src.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact
from src.entity.config_entity import ModelTrainerConfig
from src.utils.ml_utils.model.estimator import DeliveryPredictionModel
from src.utils.main_utils.utils import save_object, load_object
from src.utils.main_utils.utils import load_numpy_array_data, evaluate_models
from src.utils.ml_utils.metric.regression_metric import get_regression_score
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
import os
import joblib
import sys
import joblib
import mlflow
from dotenv import load_dotenv
load_dotenv()
os.environ["MLFLOW_TRACKING_URI"]=os.getenv("MLFLOW_TRACKING_URI")
os.environ["MLFLOW_TRACKING_USERNAME"]=os.getenv("MLFLOW_TRACKING_USERNAME")
os.environ["MLFLOW_TRACKING_PASSWORD"]=os.getenv("MLFLOW_TRACKING_PASSWORD")
class ModelTrainer:
def __init__(self, model_trainer_config:ModelTrainerConfig, data_transformation_artifact:DataTransformationArtifact):
try:
self.model_trainer_config=model_trainer_config
self.data_transformation_artifact=data_transformation_artifact
self.model_trainer_config.feature_names =['multiple_deliveries', 'Road_traffic_density', 'Vehicle_condition', 'Delivery_person_Ratings', 'distance_deliveries', 'Weather_conditions', 'Festival', 'distance_traffic', 'distance','Delivery_person_Age', 'prep_traffic', 'City']
except Exception as e:
raise DeliveryTimeException(e, sys)
def track_mlflow(self, best_model, regressionMetric):
mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))
with mlflow.start_run():
mlflow.log_metric("f1_score", regressionMetric.r2_score)
mlflow.log_metric("precision", regressionMetric.mean_absolute_error)
mlflow.log_metric("recall", regressionMetric.mean_squared_error)
joblib.dump(best_model, "model.joblib")
mlflow.log_artifact("model.joblib", artifact_path="model")
def train_model(self,X_train, y_train, X_test, y_test):
try:
models = {
"XGBoost Regression": xgb.XGBRegressor(random_state=42)
}
params = {
"XGBoost Regression": {
'n_estimators': [50, 100, 150],
'max_depth': [5, 7, 9],
'learning_rate': [0.01, 0.05, 0.1],
'subsample': [0.6, 0.8, 1.0],
'colsample_bytree':[0.6, 0.8, 1.0]
}
}
model_report:dict=evaluate_models(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test,
models=models, param=params)
best_model_score = max(sorted(model_report.values()))
best_model_name = list(model_report.keys())[
list(model_report.values()).index(best_model_score)
]
best_model = models[best_model_name]
y_train_pred = best_model.predict(X_train)
regression_train_metric=get_regression_score(y_true=y_train, y_pred=y_train_pred)
self.track_mlflow(best_model, regression_train_metric)
y_test_pred = best_model.predict(X_test)
regression_test_metric = get_regression_score(y_true=y_test, y_pred=y_test_pred)
self.track_mlflow(best_model, regression_test_metric)
model_dir_path = os.path.dirname(self.model_trainer_config.trained_model_file_path)
os.makedirs(model_dir_path, exist_ok=True)
Delivery_Prediction_Model=DeliveryPredictionModel(model=best_model)
save_object(self.model_trainer_config.trained_model_file_path, obj=Delivery_Prediction_Model)
# Model pusher
save_object("final_model/model.pkl", best_model)
model_trainer_artifact=ModelTrainerArtifact(trained_model_file_path=self.model_trainer_config.trained_model_file_path,
train_metric_artifact=regression_train_metric,
test_metric_artifact=regression_test_metric
)
logging.info(f"Model trainer artifact: {model_trainer_artifact}")
return model_trainer_artifact
except Exception as e:
raise DeliveryTimeException(e, sys)
def initiate_model_trainer(self) -> ModelTrainerArtifact:
try:
train_file_path = self.data_transformation_artifact.transformed_train_file_path
test_file_path = self.data_transformation_artifact.transformed_test_file_path
train_arr = load_numpy_array_data(train_file_path)
test_arr = load_numpy_array_data(test_file_path)
logging.info(f"shape of training data: {train_arr.shape}")
logging.info(f"Testing array: {test_arr.shape}")
X_train, y_train, X_test, y_test = (
train_arr[:, :-1],
train_arr[:, -1],
test_arr[:, :-1],
test_arr[:, -1]
)
logging.info(f"X_train shape: {X_train.shape}")
logging.info(f"X_test shape{X_test.shape}")
feature_names = self.model_trainer_config.feature_names
X_train_df = pd.DataFrame(X_train, columns=feature_names)
X_test_df = pd.DataFrame(X_test, columns=feature_names)
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train_df), columns=feature_names)
X_test_scaled = pd.DataFrame(scaler.transform(X_test_df), columns=feature_names)
joblib.dump({'scaler': scaler, 'feature_names': feature_names}, 'final_model/preprocessor.pkl')
model_trainer_artifact = self.train_model(X_train_scaled, y_train, X_test_scaled, y_test)
return model_trainer_artifact
except Exception as e:
raise DeliveryTimeException(e, sys) |