Inder-26 commited on
Commit
d3b5bd8
·
1 Parent(s): eae2854

Model Trainer And Evaluation With Hyperparameter Tuning

Browse files
main.py CHANGED
@@ -1,6 +1,7 @@
1
  from networksecurity.components.data_ingestion import DataIngestion
2
  from networksecurity.components.data_validation import DataValidation
3
  from networksecurity.components.data_transformation import DataTransformation
 
4
 
5
  from networksecurity.exception.exception import NetworkSecurityException
6
  from networksecurity.logging.logger import logging
@@ -30,5 +31,12 @@ if __name__ == "__main__":
30
  logging.info(f"Data transformation completed {data_transformation_artifact}")
31
  print(data_transformation_artifact)
32
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
  raise NetworkSecurityException(e, sys)
 
1
  from networksecurity.components.data_ingestion import DataIngestion
2
  from networksecurity.components.data_validation import DataValidation
3
  from networksecurity.components.data_transformation import DataTransformation
4
+ from networksecurity.components.model_trainer import ModelTrainer,ModelTrainerConfig
5
 
6
  from networksecurity.exception.exception import NetworkSecurityException
7
  from networksecurity.logging.logger import logging
 
31
  logging.info(f"Data transformation completed {data_transformation_artifact}")
32
  print(data_transformation_artifact)
33
 
34
+ logging.info("Model Trainer Started")
35
+ model_trainer_config=ModelTrainerConfig(traningpipelineconfig)
36
+ model_trainer=ModelTrainer(model_trainer_config=model_trainer_config,
37
+ data_transformation_artifact=data_transformation_artifact)
38
+ model_trainer_artifact=model_trainer.initiate_model_trainer()
39
+ logging.info(f"Model Trainer completed {model_trainer_artifact}")
40
+
41
  except Exception as e:
42
  raise NetworkSecurityException(e, sys)
networksecurity/components/model_trainer.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os,sys
2
+ from networksecurity.exception.exception import NetworkSecurityException
3
+ from networksecurity.logging.logger import logging
4
+
5
+ from networksecurity.entity.config_entity import ModelTrainerConfig
6
+ from networksecurity.entity.artifact_entity import DataTransformationArtifact,ModelTrainerArtifact
7
+
8
+ from networksecurity.utils.ml_utils.model.estimator import NetworkModel
9
+ from networksecurity.utils.main_utils.utils import save_object,load_object,load_numpy_array_data,evaluate_models
10
+ from networksecurity.utils.ml_utils.metric.classfication_metric import get_classification_score
11
+
12
+ from sklearn.linear_model import LogisticRegression
13
+ from sklearn.metrics import r2_score
14
+ from sklearn.tree import DecisionTreeClassifier
15
+ from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
16
+
17
+ class ModelTrainer:
18
+ def __init__(self,model_trainer_config:ModelTrainerConfig,
19
+ data_transformation_artifact:DataTransformationArtifact):
20
+ try:
21
+ logging.info(f"{'>>'*20} Model Trainer {'<<'*20}")
22
+ self.model_trainer_config = model_trainer_config
23
+ self.data_transformation_artifact = data_transformation_artifact
24
+ except Exception as e:
25
+ raise NetworkSecurityException(e,sys)
26
+
27
+ def train_model(self,X_train,X_test,y_train,y_test):
28
+ model = {
29
+ "Logistic Regression": LogisticRegression(),
30
+ "Decision Tree": DecisionTreeClassifier(),
31
+ "Random Forest": RandomForestClassifier(),
32
+ "Gradient Boosting": GradientBoostingClassifier(),
33
+ "AdaBoost": AdaBoostClassifier()
34
+ }
35
+ params = {
36
+ "Decision Tree": {
37
+ 'criterion':['gini','entropy','log_loss'],
38
+ #'splitter':['best','random'],
39
+ #'max_features':['sqrt','log2']
40
+ },
41
+ "Random Forest": {
42
+ #'criterion':['gini','entropy','log_loss'],
43
+ #'max_features':['sqrt','log2'],
44
+ 'n_estimators':[8,16,32,64,128,256]
45
+ },
46
+ "Gradient Boosting": {
47
+ 'learning_rate':[.1,.01,.05,.001],
48
+ 'subsample':[0.6,0.7,0.75,0.8,0.85,0.9],
49
+ 'n_estimators':[8,16,32,64,128,256]
50
+ },
51
+ "AdaBoost": {
52
+ 'learning_rate':[.1,.01,.05,.001],
53
+ 'n_estimators':[8,16,32,64,128,256]
54
+ },
55
+ "Logistic Regression": {},
56
+ }
57
+
58
+ model_report: dict = evaluate_models(X_train=X_train,y_train=y_train,
59
+ X_test=X_test,y_test=y_test,models=model,params=params)
60
+
61
+ ## To get the best model score from dict
62
+ best_model_score = max(sorted(model_report.values()))
63
+
64
+ ## To get the best model name from dict
65
+ best_model_name = list(model_report.keys())[
66
+ list(model_report.values()).index(best_model_score)]
67
+ best_model = model[best_model_name]
68
+ logging.info(f"Best model found , Model Name : {best_model_name} , R2 Score : {best_model_score}")
69
+
70
+ y_train_pred = best_model.predict(X_train)
71
+ y_test_pred = best_model.predict(X_test)
72
+
73
+ classification_train_metric=get_classification_score(y_true=y_train, y_pred=y_train_pred)
74
+ classification_test_metric=get_classification_score(y_true=y_test, y_pred=y_test_pred)
75
+
76
+
77
+ ## Track with mlflow
78
+
79
+
80
+ preprocessor = load_object(file_path=self.data_transformation_artifact.transformed_object_file_path)
81
+ model_dir_path = os.path.dirname(self.model_trainer_config.trained_model_file_path)
82
+ os.makedirs(model_dir_path, exist_ok=True)
83
+
84
+ Network_model = NetworkModel(preprocessor=preprocessor, model=best_model)
85
+ save_object(file_path=self.model_trainer_config.trained_model_file_path, obj=Network_model)
86
+ logging.info(f"Trained model saved at : {self.model_trainer_config.trained_model_file_path}")
87
+
88
+ model_trainer_artifact=ModelTrainerArtifact(trained_model_file_path=self.model_trainer_config.trained_model_file_path,
89
+ train_metric_artifact=classification_train_metric,
90
+ test_metric_artifact=classification_test_metric)
91
+ logging.info(f"Model Trainer Artifact : {model_trainer_artifact}")
92
+ return model_trainer_artifact
93
+
94
+ def initiate_model_trainer(self)->ModelTrainerArtifact:
95
+ try:
96
+ logging.info("Loading transformed training array and transformed test array")
97
+ train_file_path = self.data_transformation_artifact.transformed_train_file_path
98
+ test_file_path = self.data_transformation_artifact.transformed_test_file_path
99
+
100
+ ## Load numpy array
101
+ train_array = load_numpy_array_data(file_path=train_file_path)
102
+ test_array = load_numpy_array_data(file_path=test_file_path)
103
+ logging.info("Splitting training and test input and target feature")
104
+ X_train,y_train = train_array[:,:-1],train_array[:,-1]
105
+ X_test,y_test = test_array[:,:-1],test_array[:,-1]
106
+
107
+ model_trainer_artifact = self.train_model(X_train=X_train, X_test=X_test,
108
+ y_train=y_train, y_test=y_test)
109
+ return model_trainer_artifact
110
+
111
+ except Exception as e:
112
+ raise NetworkSecurityException(e,sys)
networksecurity/constant/training_pipeline/__init__.py CHANGED
@@ -16,6 +16,9 @@ TEST_FILE_NAME: str = "test.csv"
16
 
17
  SCHEMA_FILE_PATH = os.path.join("data_schema", "schema.yaml")
18
 
 
 
 
19
  """
20
  Data Ingestion realted constant start with DATA_INGESTION VAR NAME
21
  """
@@ -48,4 +51,16 @@ DATA_TRANSFORMATION_IMPUTER_PARAMS: dict = {
48
  "n_neighbors": 3,
49
  "weights": "uniform",
50
  }
51
- PREPROCESSING_OBJECT_FILE_NAME: str = "preprocessing_object.pkl"
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  SCHEMA_FILE_PATH = os.path.join("data_schema", "schema.yaml")
18
 
19
+ SAVED_MODEL_DIR_NAME = os.path.join("saved_models")
20
+ MODEL_FILE_NAME: str = "model.pkl"
21
+
22
  """
23
  Data Ingestion realted constant start with DATA_INGESTION VAR NAME
24
  """
 
51
  "n_neighbors": 3,
52
  "weights": "uniform",
53
  }
54
+ DATA_TRANSFORMATION_TRAIN_FILE_PATH: str = "train.npy"
55
+ DATA_TRANSFORMATION_TEST_FILE_PATH: str = "test.npy"
56
+ PREPROCESSING_OBJECT_FILE_NAME: str = "preprocessing_object.pkl"
57
+
58
+ """
59
+ Model trainer related constanst with MODEL TRAINER VAR NAME
60
+ """
61
+
62
+ MODEL_TRAINER_DIR_NAME: str = "model_trainer"
63
+ MODEL_TRAINER_TRAINED_MODEL_DIR: str = "trained_model"
64
+ MODEL_TRAINER_TRAINED_MODEL_NAME: str = "model.pkl"
65
+ MODEL_TRAINER_EXPECTED_SCORE: float = 0.6
66
+ MODEL_TRAINER_OVER_FITTING_UNDER_FITTING_THRESHOLD: float = 0.05
networksecurity/entity/artifact_entity.py CHANGED
@@ -18,4 +18,21 @@ class DataValidationArtifact:
18
  class DataTransformationArtifact:
19
  transformed_train_file_path: str
20
  transformed_test_file_path: str
21
- transformed_object_file_path: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class DataTransformationArtifact:
19
  transformed_train_file_path: str
20
  transformed_test_file_path: str
21
+ transformed_object_file_path: str
22
+
23
+ @dataclass
24
+ class ClassificationMetricArtifact:
25
+ f1_score: float
26
+ precision_score: float
27
+ recall_score: float
28
+
29
+ @dataclass
30
+ class ModelTrainerArtifact:
31
+ trained_model_file_path: str
32
+ train_metric_artifact: ClassificationMetricArtifact
33
+ test_metric_artifact: ClassificationMetricArtifact
34
+
35
+ @dataclass
36
+ class ModelTrainerConfig:
37
+ model_trainer_dir: str
38
+ trained_model_file_path: str
networksecurity/entity/config_entity.py CHANGED
@@ -107,4 +107,16 @@ class DataTransformationConfig:
107
  self.data_transformation_dir,
108
  training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR,
109
  training_pipeline.PREPROCESSING_OBJECT_FILE_NAME
110
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  self.data_transformation_dir,
108
  training_pipeline.DATA_TRANSFORMATION_TRANSFORMED_OBJECT_DIR,
109
  training_pipeline.PREPROCESSING_OBJECT_FILE_NAME
110
+ )
111
+
112
+ class ModelTrainerConfig:
113
+ def __init__(self, training_pipeline_config: TraningPipelineConfig):
114
+ self.model_trainer_dir: str = os.path.join(
115
+ training_pipeline_config.artifact_dir, training_pipeline.MODEL_TRAINER_DIR_NAME
116
+ )
117
+ self.trained_model_file_path: str = os.path.join(
118
+ self.model_trainer_dir, training_pipeline.MODEL_TRAINER_TRAINED_MODEL_DIR,
119
+ training_pipeline.MODEL_FILE_NAME
120
+ )
121
+ self.expected_accuracy: float = training_pipeline.MODEL_TRAINER_EXPECTED_SCORE
122
+ self.overfitting_underfitting_threshold = training_pipeline.MODEL_TRAINER_OVER_FITTING_UNDER_FITTING_THRESHOLD
networksecurity/utils/main_utils/utils.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import yaml
2
  from networksecurity.exception.exception import NetworkSecurityException
3
  from networksecurity.logging.logger import logging
@@ -53,5 +55,63 @@ def save_object(file_path: str, obj: object) -> None:
53
  with open(file_path, 'wb') as file_obj:
54
  pickle.dump(obj, file_obj)
55
  logging.info("Exited the save_object method of Main Utils")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  except Exception as e:
57
  raise NetworkSecurityException(e, sys)
 
1
+ from sklearn.metrics import r2_score
2
+ from sklearn.model_selection import GridSearchCV
3
  import yaml
4
  from networksecurity.exception.exception import NetworkSecurityException
5
  from networksecurity.logging.logger import logging
 
55
  with open(file_path, 'wb') as file_obj:
56
  pickle.dump(obj, file_obj)
57
  logging.info("Exited the save_object method of Main Utils")
58
+ except Exception as e:
59
+ raise NetworkSecurityException(e, sys)
60
+
61
+ def load_object(file_path: str) -> object:
62
+ try:
63
+ if not os.path.exists(file_path):
64
+ raise Exception(f"The file: {file_path} does not exist")
65
+ with open(file_path, 'rb') as file_obj:
66
+ print(file_obj)
67
+ return pickle.load(file_obj)
68
+ except Exception as e:
69
+ raise NetworkSecurityException(e, sys)
70
+
71
+ def load_numpy_array_data(file_path: str) -> np.array:
72
+ """
73
+ Load numpy array data from file
74
+ file_path : str : file path to load the numpy array
75
+ return : np.array : numpy array data loaded
76
+ """
77
+ try:
78
+ with open(file_path, 'rb') as file_obj:
79
+ return np.load(file_obj)
80
+ except Exception as e:
81
+ raise NetworkSecurityException(e, sys)
82
+
83
+ def evaluate_models(X_train, y_train, X_test, y_test, models: dict, params: dict) -> dict:
84
+ """
85
+ Evaluate multiple machine learning models and return their performance scores.
86
+
87
+ Args:
88
+ X_train: Training feature data.
89
+ y_train: Training target data.
90
+ X_test: Testing feature data.
91
+ y_test: Testing target data.
92
+ models (dict): A dictionary of model names and their corresponding model instances.
93
+ params (dict): A dictionary of model names and their corresponding hyperparameter grids.
94
+ Returns:
95
+ dict: A dictionary containing model names and their corresponding performance scores.
96
+ """
97
+ try:
98
+ report = {}
99
+ for i in range(len(list(models))):
100
+ model = list(models.values())[i]
101
+ param = params[list(models.keys())[i]]
102
+
103
+ gs = GridSearchCV(model, param, cv=3)
104
+ gs.fit(X_train, y_train)
105
+
106
+ model.set_params(**gs.best_params_)
107
+ model.fit(X_train, y_train)
108
+
109
+ y_train_pred = model.predict(X_train)
110
+ y_test_pred = model.predict(X_test)
111
+ train_model_score = r2_score(y_train, y_train_pred)
112
+ test_model_score = r2_score(y_test, y_test_pred)
113
+ report[list(models.keys())[i]] = test_model_score
114
+ return report
115
+
116
  except Exception as e:
117
  raise NetworkSecurityException(e, sys)
networksecurity/utils/ml_utils/__init__.py ADDED
File without changes
networksecurity/utils/ml_utils/metric/__init.py ADDED
File without changes
networksecurity/utils/ml_utils/metric/classfication_metric.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from networksecurity.entity.artifact_entity import ClassificationMetricArtifact
2
+ from networksecurity.exception.exception import NetworkSecurityException
3
+ from sklearn.metrics import precision_score, recall_score, f1_score
4
+ import sys
5
+
6
+ def get_classification_score(y_true, y_pred) -> ClassificationMetricArtifact:
7
+ try:
8
+ model_f1_score = f1_score(y_true, y_pred)
9
+ model_precision_score = precision_score(y_true, y_pred)
10
+ model_recall_score = recall_score(y_true, y_pred)
11
+
12
+ classification_metric= ClassificationMetricArtifact(
13
+ f1_score=model_f1_score,
14
+ precision_score=model_precision_score,
15
+ recall_score=model_recall_score
16
+ )
17
+ return classification_metric
18
+ except Exception as e:
19
+ raise NetworkSecurityException(e, sys)
networksecurity/utils/ml_utils/model/__init__.py ADDED
File without changes
networksecurity/utils/ml_utils/model/estimator.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from networksecurity.exception.exception import NetworkSecurityException
2
+ import sys,os
3
+ from networksecurity.constant.training_pipeline import SAVED_MODEL_DIR_NAME,MODEL_FILE_NAME
4
+ from networksecurity.logging.logger import logging
5
+
6
+ class NetworkModel:
7
+ def __init__(self,preprocessor,model):
8
+ """
9
+ Initialize the NetworkModel with preprocessor and model.
10
+
11
+ Args:
12
+ preprocessor: The preprocessing object.
13
+ model: The trained model.
14
+ """
15
+ try:
16
+ self.preprocessor = preprocessor
17
+ self.model = model
18
+ except Exception as e:
19
+ raise NetworkSecurityException(e, sys)
20
+
21
+ def predict(self, X):
22
+ """
23
+ Make predictions using the preprocessor and model.
24
+
25
+ Args:
26
+ X: The input data for prediction.
27
+ Returns:
28
+ The predictions made by the model.
29
+ """
30
+ try:
31
+ X_transform = self.preprocessor.transform(X)
32
+ y_hat = self.model.predict(X_transform)
33
+ return y_hat
34
+ except Exception as e:
35
+ raise NetworkSecurityException(e, sys)