Spaces:
Running
Running
Add main pipelines and rename stages
Browse files- src/mlpipeline/pipeline/__init__.py +4 -0
- src/mlpipeline/pipeline/prediction_pipeline.py +78 -0
- src/mlpipeline/pipeline/training_pipeline.py +126 -0
- src/mlpipeline/{pipelines β stages}/__init__.py +0 -0
- src/mlpipeline/{pipelines β stages}/data_ingestion_pipeline.py +0 -0
- src/mlpipeline/{pipelines β stages}/data_transformation_pipeline.py +0 -0
- src/mlpipeline/{pipelines β stages}/data_validation_pipeline.py +0 -0
- src/mlpipeline/{pipelines β stages}/feature_engineering_pipeline.py +0 -0
- src/mlpipeline/{pipelines β stages}/model_evaluation_pipeline.py +0 -0
- src/mlpipeline/{pipelines β stages}/model_pusher_pipeline.py +0 -0
- src/mlpipeline/{pipelines β stages}/model_trainer_pipeline.py +0 -0
src/mlpipeline/pipeline/__init__.py
CHANGED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from mlpipeline.pipeline.training_pipeline import TrainingPipeline
|
| 2 |
+
from mlpipeline.pipeline.prediction_pipeline import PredictionPipeline
|
| 3 |
+
|
| 4 |
+
__all__ = ["TrainingPipeline", "PredictionPipeline"]
|
src/mlpipeline/pipeline/prediction_pipeline.py
CHANGED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from autogluon.tabular import TabularPredictor
|
| 4 |
+
from mlpipeline.logging.logger import get_logger
|
| 5 |
+
from mlpipeline.exception import MLPipelineException
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
logger = get_logger(__name__)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class PredictionPipeline:
|
| 12 |
+
def __init__(self, model_path: str = "models/production/model"):
|
| 13 |
+
self.model_path = Path(model_path)
|
| 14 |
+
self.model = None
|
| 15 |
+
|
| 16 |
+
def load_model(self):
|
| 17 |
+
try:
|
| 18 |
+
if not self.model_path.exists():
|
| 19 |
+
raise FileNotFoundError(f"Model not found at {self.model_path}")
|
| 20 |
+
|
| 21 |
+
logger.info(f"Loading model from {self.model_path}")
|
| 22 |
+
self.model = TabularPredictor.load(str(self.model_path))
|
| 23 |
+
logger.info("Model loaded successfully")
|
| 24 |
+
|
| 25 |
+
except Exception as e:
|
| 26 |
+
raise MLPipelineException(f"Failed to load model: {str(e)}", sys)
|
| 27 |
+
|
| 28 |
+
def predict(self, input_data: pd.DataFrame) -> dict:
|
| 29 |
+
try:
|
| 30 |
+
if self.model is None:
|
| 31 |
+
self.load_model()
|
| 32 |
+
|
| 33 |
+
logger.info("Making predictions")
|
| 34 |
+
predictions = self.model.predict(input_data)
|
| 35 |
+
binary_predictions = (predictions > 0).astype(int)
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
probabilities = self.model.predict_proba(input_data)
|
| 39 |
+
proba_list = probabilities.values.tolist() if hasattr(probabilities, 'values') else probabilities
|
| 40 |
+
except:
|
| 41 |
+
proba_list = None
|
| 42 |
+
|
| 43 |
+
result = {
|
| 44 |
+
"predictions": binary_predictions.tolist(),
|
| 45 |
+
"probabilities": proba_list,
|
| 46 |
+
"num_samples": len(input_data)
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
logger.info(f"Predictions completed for {len(input_data)} samples")
|
| 50 |
+
return result
|
| 51 |
+
|
| 52 |
+
except Exception as e:
|
| 53 |
+
raise MLPipelineException(f"Prediction failed: {str(e)}", sys)
|
| 54 |
+
|
| 55 |
+
def predict_single(self, input_dict: dict) -> dict:
|
| 56 |
+
try:
|
| 57 |
+
df = pd.DataFrame([input_dict])
|
| 58 |
+
result = self.predict(df)
|
| 59 |
+
|
| 60 |
+
return {
|
| 61 |
+
"prediction": result["predictions"][0],
|
| 62 |
+
"probability": result["probabilities"][0] if result["probabilities"] else None
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
except Exception as e:
|
| 66 |
+
raise MLPipelineException(f"Single prediction failed: {str(e)}", sys)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
pipeline = PredictionPipeline()
|
| 71 |
+
test_data = pd.read_csv("artifacts/feature_engineering/test_features.csv")
|
| 72 |
+
|
| 73 |
+
if "Heart Disease" in test_data.columns:
|
| 74 |
+
test_data = test_data.drop(columns=["Heart Disease"])
|
| 75 |
+
|
| 76 |
+
results = pipeline.predict(test_data.head(10))
|
| 77 |
+
logger.info(f"Predictions: {results['predictions']}")
|
| 78 |
+
logger.info(f"Number of samples: {results['num_samples']}")
|
src/mlpipeline/pipeline/training_pipeline.py
CHANGED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from mlpipeline.logging.logger import get_logger
|
| 2 |
+
from mlpipeline.exception import MLPipelineException
|
| 3 |
+
from mlpipeline.config.configuration import ConfigurationManager
|
| 4 |
+
from mlpipeline.components.data_ingestion import DataIngestion
|
| 5 |
+
from mlpipeline.components.data_validation import DataValidation
|
| 6 |
+
from mlpipeline.components.data_transformation import DataTransformation
|
| 7 |
+
from mlpipeline.components.feature_engineering import FeatureEngineering
|
| 8 |
+
from mlpipeline.components.automl_trainer import AutoMLTrainer
|
| 9 |
+
from mlpipeline.components.model_evaluation import ModelEvaluation
|
| 10 |
+
from mlpipeline.components.model_pusher import ModelPusher
|
| 11 |
+
import sys
|
| 12 |
+
|
| 13 |
+
logger = get_logger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class TrainingPipeline:
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.config_manager = ConfigurationManager()
|
| 19 |
+
|
| 20 |
+
def run_data_ingestion(self):
|
| 21 |
+
try:
|
| 22 |
+
logger.info("Stage 1: Data Ingestion started")
|
| 23 |
+
config = self.config_manager.get_data_ingestion_config()
|
| 24 |
+
data_ingestion = DataIngestion(config)
|
| 25 |
+
artifact = data_ingestion.download_data()
|
| 26 |
+
logger.info(f"Stage 1: Data Ingestion completed - {artifact}")
|
| 27 |
+
return artifact
|
| 28 |
+
except Exception as e:
|
| 29 |
+
raise MLPipelineException(str(e), sys)
|
| 30 |
+
|
| 31 |
+
def run_data_validation(self):
|
| 32 |
+
try:
|
| 33 |
+
logger.info("Stage 2: Data Validation started")
|
| 34 |
+
config = self.config_manager.get_data_validation_config()
|
| 35 |
+
data_validation = DataValidation(config)
|
| 36 |
+
artifact = data_validation.validate_schema()
|
| 37 |
+
logger.info(f"Stage 2: Data Validation completed - {artifact}")
|
| 38 |
+
return artifact
|
| 39 |
+
except Exception as e:
|
| 40 |
+
raise MLPipelineException(str(e), sys)
|
| 41 |
+
|
| 42 |
+
def run_data_transformation(self):
|
| 43 |
+
try:
|
| 44 |
+
logger.info("Stage 3: Data Transformation started")
|
| 45 |
+
config = self.config_manager.get_data_transformation_config()
|
| 46 |
+
data_transformation = DataTransformation(config)
|
| 47 |
+
artifact = data_transformation.transform()
|
| 48 |
+
logger.info(f"Stage 3: Data Transformation completed - {artifact}")
|
| 49 |
+
return artifact
|
| 50 |
+
except Exception as e:
|
| 51 |
+
raise MLPipelineException(str(e), sys)
|
| 52 |
+
|
| 53 |
+
def run_feature_engineering(self):
|
| 54 |
+
try:
|
| 55 |
+
logger.info("Stage 4: Feature Engineering started")
|
| 56 |
+
config = self.config_manager.get_feature_engineering_config()
|
| 57 |
+
feature_engineering = FeatureEngineering(config)
|
| 58 |
+
artifact = feature_engineering.engineer_features()
|
| 59 |
+
logger.info(f"Stage 4: Feature Engineering completed - {artifact}")
|
| 60 |
+
return artifact
|
| 61 |
+
except Exception as e:
|
| 62 |
+
raise MLPipelineException(str(e), sys)
|
| 63 |
+
|
| 64 |
+
def run_model_training(self):
|
| 65 |
+
try:
|
| 66 |
+
logger.info("Stage 5: Model Training started")
|
| 67 |
+
config = self.config_manager.get_model_trainer_config()
|
| 68 |
+
model_trainer = AutoMLTrainer(config)
|
| 69 |
+
artifact = model_trainer.train()
|
| 70 |
+
logger.info(f"Stage 5: Model Training completed - {artifact}")
|
| 71 |
+
return artifact
|
| 72 |
+
except Exception as e:
|
| 73 |
+
raise MLPipelineException(str(e), sys)
|
| 74 |
+
|
| 75 |
+
def run_model_evaluation(self):
|
| 76 |
+
try:
|
| 77 |
+
logger.info("Stage 6: Model Evaluation started")
|
| 78 |
+
config = self.config_manager.get_model_evaluation_config()
|
| 79 |
+
model_evaluation = ModelEvaluation(config)
|
| 80 |
+
artifact = model_evaluation.evaluate()
|
| 81 |
+
logger.info(f"Stage 6: Model Evaluation completed - {artifact}")
|
| 82 |
+
return artifact
|
| 83 |
+
except Exception as e:
|
| 84 |
+
raise MLPipelineException(str(e), sys)
|
| 85 |
+
|
| 86 |
+
def run_model_pusher(self):
|
| 87 |
+
try:
|
| 88 |
+
logger.info("Stage 7: Model Pusher started")
|
| 89 |
+
config = self.config_manager.get_model_pusher_config()
|
| 90 |
+
model_pusher = ModelPusher(config)
|
| 91 |
+
artifact = model_pusher.push_model()
|
| 92 |
+
logger.info(f"Stage 7: Model Pusher completed - {artifact}")
|
| 93 |
+
return artifact
|
| 94 |
+
except Exception as e:
|
| 95 |
+
raise MLPipelineException(str(e), sys)
|
| 96 |
+
|
| 97 |
+
def run_pipeline(self):
|
| 98 |
+
try:
|
| 99 |
+
logger.info("=" * 50)
|
| 100 |
+
logger.info("Training Pipeline Started")
|
| 101 |
+
logger.info("=" * 50)
|
| 102 |
+
|
| 103 |
+
self.run_data_ingestion()
|
| 104 |
+
self.run_data_validation()
|
| 105 |
+
self.run_data_transformation()
|
| 106 |
+
self.run_feature_engineering()
|
| 107 |
+
self.run_model_training()
|
| 108 |
+
eval_artifact = self.run_model_evaluation()
|
| 109 |
+
push_artifact = self.run_model_pusher()
|
| 110 |
+
|
| 111 |
+
logger.info("=" * 50)
|
| 112 |
+
logger.info("Training Pipeline Completed Successfully")
|
| 113 |
+
logger.info(f"Model Accuracy: {eval_artifact.evaluation_metrics.get('accuracy', 'N/A')}")
|
| 114 |
+
logger.info(f"Model Location: {push_artifact.pushed_model_path}")
|
| 115 |
+
logger.info("=" * 50)
|
| 116 |
+
|
| 117 |
+
return push_artifact
|
| 118 |
+
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logger.error(f"Training Pipeline Failed: {str(e)}")
|
| 121 |
+
raise MLPipelineException(str(e), sys)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
if __name__ == "__main__":
|
| 125 |
+
pipeline = TrainingPipeline()
|
| 126 |
+
pipeline.run_pipeline()
|
src/mlpipeline/{pipelines β stages}/__init__.py
RENAMED
|
File without changes
|
src/mlpipeline/{pipelines β stages}/data_ingestion_pipeline.py
RENAMED
|
File without changes
|
src/mlpipeline/{pipelines β stages}/data_transformation_pipeline.py
RENAMED
|
File without changes
|
src/mlpipeline/{pipelines β stages}/data_validation_pipeline.py
RENAMED
|
File without changes
|
src/mlpipeline/{pipelines β stages}/feature_engineering_pipeline.py
RENAMED
|
File without changes
|
src/mlpipeline/{pipelines β stages}/model_evaluation_pipeline.py
RENAMED
|
File without changes
|
src/mlpipeline/{pipelines β stages}/model_pusher_pipeline.py
RENAMED
|
File without changes
|
src/mlpipeline/{pipelines β stages}/model_trainer_pipeline.py
RENAMED
|
File without changes
|