Abeshith commited on
Commit
7f1fbee
Β·
1 Parent(s): 7e4b5f8

Add main pipelines and rename stages

Browse files
src/mlpipeline/pipeline/__init__.py CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mlpipeline.pipeline.training_pipeline import TrainingPipeline
2
+ from mlpipeline.pipeline.prediction_pipeline import PredictionPipeline
3
+
4
+ __all__ = ["TrainingPipeline", "PredictionPipeline"]
src/mlpipeline/pipeline/prediction_pipeline.py CHANGED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from pathlib import Path
3
+ from autogluon.tabular import TabularPredictor
4
+ from mlpipeline.logging.logger import get_logger
5
+ from mlpipeline.exception import MLPipelineException
6
+ import sys
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ class PredictionPipeline:
12
+ def __init__(self, model_path: str = "models/production/model"):
13
+ self.model_path = Path(model_path)
14
+ self.model = None
15
+
16
+ def load_model(self):
17
+ try:
18
+ if not self.model_path.exists():
19
+ raise FileNotFoundError(f"Model not found at {self.model_path}")
20
+
21
+ logger.info(f"Loading model from {self.model_path}")
22
+ self.model = TabularPredictor.load(str(self.model_path))
23
+ logger.info("Model loaded successfully")
24
+
25
+ except Exception as e:
26
+ raise MLPipelineException(f"Failed to load model: {str(e)}", sys)
27
+
28
+ def predict(self, input_data: pd.DataFrame) -> dict:
29
+ try:
30
+ if self.model is None:
31
+ self.load_model()
32
+
33
+ logger.info("Making predictions")
34
+ predictions = self.model.predict(input_data)
35
+ binary_predictions = (predictions > 0).astype(int)
36
+
37
+ try:
38
+ probabilities = self.model.predict_proba(input_data)
39
+ proba_list = probabilities.values.tolist() if hasattr(probabilities, 'values') else probabilities
40
+ except:
41
+ proba_list = None
42
+
43
+ result = {
44
+ "predictions": binary_predictions.tolist(),
45
+ "probabilities": proba_list,
46
+ "num_samples": len(input_data)
47
+ }
48
+
49
+ logger.info(f"Predictions completed for {len(input_data)} samples")
50
+ return result
51
+
52
+ except Exception as e:
53
+ raise MLPipelineException(f"Prediction failed: {str(e)}", sys)
54
+
55
+ def predict_single(self, input_dict: dict) -> dict:
56
+ try:
57
+ df = pd.DataFrame([input_dict])
58
+ result = self.predict(df)
59
+
60
+ return {
61
+ "prediction": result["predictions"][0],
62
+ "probability": result["probabilities"][0] if result["probabilities"] else None
63
+ }
64
+
65
+ except Exception as e:
66
+ raise MLPipelineException(f"Single prediction failed: {str(e)}", sys)
67
+
68
+
69
+ if __name__ == "__main__":
70
+ pipeline = PredictionPipeline()
71
+ test_data = pd.read_csv("artifacts/feature_engineering/test_features.csv")
72
+
73
+ if "Heart Disease" in test_data.columns:
74
+ test_data = test_data.drop(columns=["Heart Disease"])
75
+
76
+ results = pipeline.predict(test_data.head(10))
77
+ logger.info(f"Predictions: {results['predictions']}")
78
+ logger.info(f"Number of samples: {results['num_samples']}")
src/mlpipeline/pipeline/training_pipeline.py CHANGED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mlpipeline.logging.logger import get_logger
2
+ from mlpipeline.exception import MLPipelineException
3
+ from mlpipeline.config.configuration import ConfigurationManager
4
+ from mlpipeline.components.data_ingestion import DataIngestion
5
+ from mlpipeline.components.data_validation import DataValidation
6
+ from mlpipeline.components.data_transformation import DataTransformation
7
+ from mlpipeline.components.feature_engineering import FeatureEngineering
8
+ from mlpipeline.components.automl_trainer import AutoMLTrainer
9
+ from mlpipeline.components.model_evaluation import ModelEvaluation
10
+ from mlpipeline.components.model_pusher import ModelPusher
11
+ import sys
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ class TrainingPipeline:
17
+ def __init__(self):
18
+ self.config_manager = ConfigurationManager()
19
+
20
+ def run_data_ingestion(self):
21
+ try:
22
+ logger.info("Stage 1: Data Ingestion started")
23
+ config = self.config_manager.get_data_ingestion_config()
24
+ data_ingestion = DataIngestion(config)
25
+ artifact = data_ingestion.download_data()
26
+ logger.info(f"Stage 1: Data Ingestion completed - {artifact}")
27
+ return artifact
28
+ except Exception as e:
29
+ raise MLPipelineException(str(e), sys)
30
+
31
+ def run_data_validation(self):
32
+ try:
33
+ logger.info("Stage 2: Data Validation started")
34
+ config = self.config_manager.get_data_validation_config()
35
+ data_validation = DataValidation(config)
36
+ artifact = data_validation.validate_schema()
37
+ logger.info(f"Stage 2: Data Validation completed - {artifact}")
38
+ return artifact
39
+ except Exception as e:
40
+ raise MLPipelineException(str(e), sys)
41
+
42
+ def run_data_transformation(self):
43
+ try:
44
+ logger.info("Stage 3: Data Transformation started")
45
+ config = self.config_manager.get_data_transformation_config()
46
+ data_transformation = DataTransformation(config)
47
+ artifact = data_transformation.transform()
48
+ logger.info(f"Stage 3: Data Transformation completed - {artifact}")
49
+ return artifact
50
+ except Exception as e:
51
+ raise MLPipelineException(str(e), sys)
52
+
53
+ def run_feature_engineering(self):
54
+ try:
55
+ logger.info("Stage 4: Feature Engineering started")
56
+ config = self.config_manager.get_feature_engineering_config()
57
+ feature_engineering = FeatureEngineering(config)
58
+ artifact = feature_engineering.engineer_features()
59
+ logger.info(f"Stage 4: Feature Engineering completed - {artifact}")
60
+ return artifact
61
+ except Exception as e:
62
+ raise MLPipelineException(str(e), sys)
63
+
64
+ def run_model_training(self):
65
+ try:
66
+ logger.info("Stage 5: Model Training started")
67
+ config = self.config_manager.get_model_trainer_config()
68
+ model_trainer = AutoMLTrainer(config)
69
+ artifact = model_trainer.train()
70
+ logger.info(f"Stage 5: Model Training completed - {artifact}")
71
+ return artifact
72
+ except Exception as e:
73
+ raise MLPipelineException(str(e), sys)
74
+
75
+ def run_model_evaluation(self):
76
+ try:
77
+ logger.info("Stage 6: Model Evaluation started")
78
+ config = self.config_manager.get_model_evaluation_config()
79
+ model_evaluation = ModelEvaluation(config)
80
+ artifact = model_evaluation.evaluate()
81
+ logger.info(f"Stage 6: Model Evaluation completed - {artifact}")
82
+ return artifact
83
+ except Exception as e:
84
+ raise MLPipelineException(str(e), sys)
85
+
86
+ def run_model_pusher(self):
87
+ try:
88
+ logger.info("Stage 7: Model Pusher started")
89
+ config = self.config_manager.get_model_pusher_config()
90
+ model_pusher = ModelPusher(config)
91
+ artifact = model_pusher.push_model()
92
+ logger.info(f"Stage 7: Model Pusher completed - {artifact}")
93
+ return artifact
94
+ except Exception as e:
95
+ raise MLPipelineException(str(e), sys)
96
+
97
+ def run_pipeline(self):
98
+ try:
99
+ logger.info("=" * 50)
100
+ logger.info("Training Pipeline Started")
101
+ logger.info("=" * 50)
102
+
103
+ self.run_data_ingestion()
104
+ self.run_data_validation()
105
+ self.run_data_transformation()
106
+ self.run_feature_engineering()
107
+ self.run_model_training()
108
+ eval_artifact = self.run_model_evaluation()
109
+ push_artifact = self.run_model_pusher()
110
+
111
+ logger.info("=" * 50)
112
+ logger.info("Training Pipeline Completed Successfully")
113
+ logger.info(f"Model Accuracy: {eval_artifact.evaluation_metrics.get('accuracy', 'N/A')}")
114
+ logger.info(f"Model Location: {push_artifact.pushed_model_path}")
115
+ logger.info("=" * 50)
116
+
117
+ return push_artifact
118
+
119
+ except Exception as e:
120
+ logger.error(f"Training Pipeline Failed: {str(e)}")
121
+ raise MLPipelineException(str(e), sys)
122
+
123
+
124
+ if __name__ == "__main__":
125
+ pipeline = TrainingPipeline()
126
+ pipeline.run_pipeline()
src/mlpipeline/{pipelines β†’ stages}/__init__.py RENAMED
File without changes
src/mlpipeline/{pipelines β†’ stages}/data_ingestion_pipeline.py RENAMED
File without changes
src/mlpipeline/{pipelines β†’ stages}/data_transformation_pipeline.py RENAMED
File without changes
src/mlpipeline/{pipelines β†’ stages}/data_validation_pipeline.py RENAMED
File without changes
src/mlpipeline/{pipelines β†’ stages}/feature_engineering_pipeline.py RENAMED
File without changes
src/mlpipeline/{pipelines β†’ stages}/model_evaluation_pipeline.py RENAMED
File without changes
src/mlpipeline/{pipelines β†’ stages}/model_pusher_pipeline.py RENAMED
File without changes
src/mlpipeline/{pipelines β†’ stages}/model_trainer_pipeline.py RENAMED
File without changes