nivakaran commited on
Commit
3561d8d
·
verified ·
1 Parent(s): b1af7f1

Create model_trainer.py

Browse files
Files changed (1) hide show
  1. src/components/model_trainer.py +154 -0
src/components/model_trainer.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ from src.exception.exception import DeliveryTimeException
5
+ from src.logging.logger import logging
6
+
7
+
8
+ from src.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact
9
+ from src.entity.config_entity import ModelTrainerConfig
10
+
11
+ from src.utils.ml_utils.model.estimator import DeliveryPredictionModel
12
+ from src.utils.main_utils.utils import save_object, load_object
13
+ from src.utils.main_utils.utils import load_numpy_array_data, evaluate_models
14
+ from src.utils.ml_utils.metric.regression_metric import get_regression_score
15
+
16
+ import pandas as pd
17
+ import xgboost as xgb
18
+ from sklearn.preprocessing import StandardScaler
19
+ from sklearn.model_selection import RandomizedSearchCV
20
+ import os
21
+ import joblib
22
+ import sys
23
+ import joblib
24
+
25
+ import mlflow
26
+
27
+ from dotenv import load_dotenv
28
+ load_dotenv()
29
+
30
+ os.environ["MLFLOW_TRACKING_URI"]=os.getenv("MLFLOW_TRACKING_URI")
31
+ os.environ["MLFLOW_TRACKING_USERNAME"]=os.getenv("MLFLOW_TRACKING_USERNAME")
32
+ os.environ["MLFLOW_TRACKING_PASSWORD"]=os.getenv("MLFLOW_TRACKING_PASSWORD")
33
+
34
+
35
+ class ModelTrainer:
36
+ def __init__(self, model_trainer_config:ModelTrainerConfig, data_transformation_artifact:DataTransformationArtifact):
37
+ try:
38
+ self.model_trainer_config=model_trainer_config
39
+ self.data_transformation_artifact=data_transformation_artifact
40
+ self.model_trainer_config.feature_names =['multiple_deliveries', 'Road_traffic_density', 'Vehicle_condition', 'Delivery_person_Ratings', 'distance_deliveries', 'Weather_conditions', 'Festival', 'distance_traffic', 'distance','Delivery_person_Age', 'prep_traffic', 'City']
41
+ except Exception as e:
42
+ raise DeliveryTimeException(e, sys)
43
+
44
+ def track_mlflow(self, best_model, regressionMetric):
45
+ mlflow.set_tracking_uri(os.getenv("MLFLOW_TRACKING_URI"))
46
+
47
+ with mlflow.start_run():
48
+ mlflow.log_metric("f1_score", regressionMetric.r2_score)
49
+ mlflow.log_metric("precision", regressionMetric.mean_absolute_error)
50
+ mlflow.log_metric("recall", regressionMetric.mean_squared_error)
51
+
52
+ joblib.dump(best_model, "model.joblib")
53
+
54
+ mlflow.log_artifact("model.joblib", artifact_path="model")
55
+
56
+
57
+ def train_model(self,X_train, y_train, X_test, y_test):
58
+ try:
59
+ models = {
60
+ "XGBoost Regression": xgb.XGBRegressor(random_state=42)
61
+ }
62
+
63
+ params = {
64
+ "XGBoost Regression": {
65
+ 'n_estimators': [50, 100, 150],
66
+ 'max_depth': [5, 7, 9],
67
+ 'learning_rate': [0.01, 0.05, 0.1],
68
+ 'subsample': [0.6, 0.8, 1.0],
69
+ 'colsample_bytree':[0.6, 0.8, 1.0]
70
+ }
71
+
72
+ }
73
+
74
+ model_report:dict=evaluate_models(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test,
75
+ models=models, param=params)
76
+
77
+ best_model_score = max(sorted(model_report.values()))
78
+
79
+ best_model_name = list(model_report.keys())[
80
+ list(model_report.values()).index(best_model_score)
81
+ ]
82
+ best_model = models[best_model_name]
83
+
84
+ y_train_pred = best_model.predict(X_train)
85
+
86
+ regression_train_metric=get_regression_score(y_true=y_train, y_pred=y_train_pred)
87
+
88
+ self.track_mlflow(best_model, regression_train_metric)
89
+
90
+ y_test_pred = best_model.predict(X_test)
91
+ regression_test_metric = get_regression_score(y_true=y_test, y_pred=y_test_pred)
92
+ self.track_mlflow(best_model, regression_test_metric)
93
+
94
+
95
+
96
+ model_dir_path = os.path.dirname(self.model_trainer_config.trained_model_file_path)
97
+ os.makedirs(model_dir_path, exist_ok=True)
98
+
99
+ Delivery_Prediction_Model=DeliveryPredictionModel(model=best_model)
100
+ save_object(self.model_trainer_config.trained_model_file_path, obj=Delivery_Prediction_Model)
101
+
102
+ # Model pusher
103
+ save_object("final_model/model.pkl", best_model)
104
+
105
+ model_trainer_artifact=ModelTrainerArtifact(trained_model_file_path=self.model_trainer_config.trained_model_file_path,
106
+ train_metric_artifact=regression_train_metric,
107
+ test_metric_artifact=regression_test_metric
108
+ )
109
+ logging.info(f"Model trainer artifact: {model_trainer_artifact}")
110
+ return model_trainer_artifact
111
+
112
+
113
+
114
+
115
+
116
+ except Exception as e:
117
+ raise DeliveryTimeException(e, sys)
118
+ def initiate_model_trainer(self) -> ModelTrainerArtifact:
119
+ try:
120
+ train_file_path = self.data_transformation_artifact.transformed_train_file_path
121
+ test_file_path = self.data_transformation_artifact.transformed_test_file_path
122
+
123
+ train_arr = load_numpy_array_data(train_file_path)
124
+ test_arr = load_numpy_array_data(test_file_path)
125
+
126
+ logging.info(f"shape of training data: {train_arr.shape}")
127
+ logging.info(f"Testing array: {test_arr.shape}")
128
+
129
+ X_train, y_train, X_test, y_test = (
130
+ train_arr[:, :-1],
131
+ train_arr[:, -1],
132
+ test_arr[:, :-1],
133
+ test_arr[:, -1]
134
+ )
135
+
136
+ logging.info(f"X_train shape: {X_train.shape}")
137
+ logging.info(f"X_test shape{X_test.shape}")
138
+ feature_names = self.model_trainer_config.feature_names
139
+
140
+ X_train_df = pd.DataFrame(X_train, columns=feature_names)
141
+ X_test_df = pd.DataFrame(X_test, columns=feature_names)
142
+
143
+ scaler = StandardScaler()
144
+ X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train_df), columns=feature_names)
145
+ X_test_scaled = pd.DataFrame(scaler.transform(X_test_df), columns=feature_names)
146
+
147
+ joblib.dump({'scaler': scaler, 'feature_names': feature_names}, 'final_model/preprocessor.pkl')
148
+
149
+
150
+ model_trainer_artifact = self.train_model(X_train_scaled, y_train, X_test_scaled, y_test)
151
+ return model_trainer_artifact
152
+
153
+ except Exception as e:
154
+ raise DeliveryTimeException(e, sys)