Abeshith commited on
Commit
d463732
·
1 Parent(s): 475d366

Add MLflow tracking integration

Browse files
.env.example ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # DagsHub MLflow Tracking Configuration
2
+ # Get your token from: https://dagshub.com/user/settings/tokens
3
+
4
+ DAGSHUB_USERNAME=your_dagshub_username
5
+ DAGSHUB_TOKEN=your_dagshub_token_here
6
+ MLFLOW_TRACKING_URI=https://dagshub.com/your_username/your_repo.mlflow/
config/config.yaml CHANGED
@@ -47,7 +47,7 @@ model_pusher:
47
  model_registry_path: models/production
48
 
49
  mlflow:
50
- tracking_uri: http://localhost:5000
51
  experiment_name: automl_experiment
52
  run_name: null
53
  registry_uri: null
 
47
  model_registry_path: models/production
48
 
49
  mlflow:
50
+ tracking_uri: https://dagshub.com/abheshith7/AutoML-MLOps-PipeLine.mlflow/
51
  experiment_name: automl_experiment
52
  run_name: null
53
  registry_uri: null
src/mlpipeline/automl/autogluon_trainer.py CHANGED
@@ -1,5 +1,5 @@
1
  from pathlib import Path
2
- from typing import Dict, Any, Optional
3
  import pandas as pd
4
  from autogluon.tabular import TabularPredictor
5
 
@@ -34,13 +34,24 @@ class AutoGluonTrainer:
34
  leaderboard = self.predictor.leaderboard(silent=True)
35
  best_model = leaderboard.iloc[0]
36
 
 
 
 
 
 
 
37
  metrics = {
38
- 'score': float(best_model['score_val']),
 
39
  'score_test': float(best_model.get('score_test', 0.0)),
 
 
 
 
40
  }
41
 
42
  logger.info(f"AutoGluon training completed. Best score: {metrics['score']}")
43
- return metrics
44
 
45
  def predict(self, data: pd.DataFrame) -> pd.Series:
46
  if self.predictor is None:
 
1
  from pathlib import Path
2
+ from typing import Dict, Any, Optional, Tuple
3
  import pandas as pd
4
  from autogluon.tabular import TabularPredictor
5
 
 
34
  leaderboard = self.predictor.leaderboard(silent=True)
35
  best_model = leaderboard.iloc[0]
36
 
37
+ # Get feature importance if available
38
+ try:
39
+ feature_importance = self.predictor.feature_importance(data=train_data)
40
+ except:
41
+ feature_importance = None
42
+
43
  metrics = {
44
+ 'validation_accuracy': float(best_model['score_val']),
45
+ 'score': float(best_model['score_val']), # Keep for backward compatibility
46
  'score_test': float(best_model.get('score_test', 0.0)),
47
+ 'fit_time': float(best_model.get('fit_time', 0.0)),
48
+ 'pred_time_val': float(best_model.get('pred_time_val', 0.0)),
49
+ 'num_models_trained': len(leaderboard),
50
+ 'best_model_name': str(best_model['model']),
51
  }
52
 
53
  logger.info(f"AutoGluon training completed. Best score: {metrics['score']}")
54
+ return metrics, feature_importance
55
 
56
  def predict(self, data: pd.DataFrame) -> pd.Series:
57
  if self.predictor is None:
src/mlpipeline/components/automl_trainer.py CHANGED
@@ -1,4 +1,6 @@
1
  import pandas as pd
 
 
2
  from mlpipeline.entity import ModelTrainerConfig, ModelTrainerArtifact
3
  from mlpipeline.automl import AutoMLFactory
4
  from mlpipeline.logging.logger import get_logger
@@ -7,7 +9,6 @@ from mlpipeline.constants import AUTOML_CONFIG_FILE_PATH
7
  from mlpipeline.utils.common import read_yaml
8
  from pathlib import Path
9
  import sys
10
- import os
11
 
12
  logger = get_logger(__name__)
13
 
@@ -20,11 +21,38 @@ class AutoMLTrainer:
20
  try:
21
  logger.info("Starting model training")
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  train_df = pd.read_csv(self.config.train_data_path)
24
 
25
  automl_config = read_yaml(Path(AUTOML_CONFIG_FILE_PATH))
26
  library_config = automl_config[self.config.automl_library]
27
 
 
 
 
 
 
 
28
  trainer = AutoMLFactory.create_trainer(
29
  self.config.automl_library,
30
  library_config
@@ -33,11 +61,64 @@ class AutoMLTrainer:
33
  os.makedirs(self.config.root_dir, exist_ok=True)
34
 
35
  if self.config.automl_library == 'autogluon':
36
- metrics = trainer.train(train_df, self.config.target_column, self.config.model_path)
 
 
 
 
 
37
  else:
38
  X_train = train_df.drop(columns=[self.config.target_column])
39
  y_train = train_df[self.config.target_column]
40
  metrics = trainer.train(X_train, y_train, self.config.model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  logger.info(f"Model trained with metrics: {metrics}")
43
 
@@ -48,4 +129,6 @@ class AutoMLTrainer:
48
  message=f"Model trained successfully with score: {metrics.get('score', 0.0):.4f}"
49
  )
50
  except Exception as e:
 
 
51
  raise ModelTrainingException(str(e), sys)
 
1
  import pandas as pd
2
+ import mlflow
3
+ import os
4
  from mlpipeline.entity import ModelTrainerConfig, ModelTrainerArtifact
5
  from mlpipeline.automl import AutoMLFactory
6
  from mlpipeline.logging.logger import get_logger
 
9
  from mlpipeline.utils.common import read_yaml
10
  from pathlib import Path
11
  import sys
 
12
 
13
  logger = get_logger(__name__)
14
 
 
21
  try:
22
  logger.info("Starting model training")
23
 
24
+ tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "https://dagshub.com/abheshith7/AutoML-MLOps-PipeLine.mlflow/")
25
+ dagshub_token = os.getenv("DAGSHUB_TOKEN")
26
+
27
+ mlflow_enabled = False
28
+ if dagshub_token and "dagshub.com" in tracking_uri:
29
+ try:
30
+ os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("DAGSHUB_USERNAME", "abheshith7")
31
+ os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
32
+ mlflow.set_tracking_uri(tracking_uri)
33
+ mlflow.set_experiment("automl_experiment")
34
+ mlflow_enabled = True
35
+ logger.info(f"MLflow tracking enabled: {tracking_uri}")
36
+ except Exception as e:
37
+ logger.warning(f"MLflow tracking disabled: {str(e)}")
38
+ mlflow_enabled = False
39
+ else:
40
+ logger.warning("MLflow tracking disabled: DAGSHUB_TOKEN not set")
41
+
42
+ if mlflow_enabled:
43
+ mlflow.start_run()
44
+
45
  train_df = pd.read_csv(self.config.train_data_path)
46
 
47
  automl_config = read_yaml(Path(AUTOML_CONFIG_FILE_PATH))
48
  library_config = automl_config[self.config.automl_library]
49
 
50
+ if mlflow_enabled:
51
+ mlflow.log_param("automl_library", self.config.automl_library)
52
+ mlflow.log_param("target_column", self.config.target_column)
53
+ mlflow.log_param("train_samples", len(train_df))
54
+ mlflow.log_params(library_config)
55
+
56
  trainer = AutoMLFactory.create_trainer(
57
  self.config.automl_library,
58
  library_config
 
61
  os.makedirs(self.config.root_dir, exist_ok=True)
62
 
63
  if self.config.automl_library == 'autogluon':
64
+ result = trainer.train(train_df, self.config.target_column, self.config.model_path)
65
+ if isinstance(result, tuple):
66
+ metrics, feature_importance = result
67
+ else:
68
+ metrics = result
69
+ feature_importance = None
70
  else:
71
  X_train = train_df.drop(columns=[self.config.target_column])
72
  y_train = train_df[self.config.target_column]
73
  metrics = trainer.train(X_train, y_train, self.config.model_path)
74
+ feature_importance = None
75
+
76
+ if mlflow_enabled:
77
+ # Separate numeric metrics from string values
78
+ numeric_metrics = {}
79
+ string_values = {}
80
+
81
+ for key, value in metrics.items():
82
+ if isinstance(value, (int, float)):
83
+ numeric_metrics[key] = value
84
+ else:
85
+ string_values[key] = str(value)
86
+
87
+ # Log numeric metrics only
88
+ if numeric_metrics:
89
+ mlflow.log_metrics(numeric_metrics)
90
+
91
+ # Log string values as tags
92
+ for key, value in string_values.items():
93
+ mlflow.set_tag(key, value)
94
+
95
+ # Log feature importance as artifact
96
+ if feature_importance is not None:
97
+ import json
98
+ fi_dict = feature_importance.to_dict() if hasattr(feature_importance, 'to_dict') else {}
99
+ fi_path = Path(self.config.root_dir) / "feature_importance.json"
100
+ with open(fi_path, 'w') as f:
101
+ json.dump(fi_dict, f, indent=2)
102
+ mlflow.log_artifact(str(fi_path))
103
+
104
+ # Log model leaderboard
105
+ try:
106
+ from autogluon.tabular import TabularPredictor
107
+ predictor = TabularPredictor.load(str(self.config.model_path))
108
+ leaderboard = predictor.leaderboard(silent=True)
109
+ lb_path = Path(self.config.root_dir) / "leaderboard.csv"
110
+ leaderboard.to_csv(lb_path, index=False)
111
+ mlflow.log_artifact(str(lb_path))
112
+ except:
113
+ pass
114
+
115
+ # Set additional tags
116
+ mlflow.set_tag("model_type", "AutoML")
117
+ mlflow.set_tag("framework", self.config.automl_library)
118
+
119
+ run_id = mlflow.active_run().info.run_id
120
+ logger.info(f"MLflow run logged: {run_id}")
121
+ mlflow.end_run()
122
 
123
  logger.info(f"Model trained with metrics: {metrics}")
124
 
 
129
  message=f"Model trained successfully with score: {metrics.get('score', 0.0):.4f}"
130
  )
131
  except Exception as e:
132
+ if mlflow.active_run():
133
+ mlflow.end_run()
134
  raise ModelTrainingException(str(e), sys)
src/mlpipeline/components/model_evaluation.py CHANGED
@@ -1,12 +1,13 @@
1
  import pandas as pd
2
  import json
 
 
3
  from sklearn.metrics import accuracy_score, f1_score, classification_report
4
  from mlpipeline.entity import ModelEvaluationConfig, ModelEvaluationArtifact
5
  from autogluon.tabular import TabularPredictor
6
  from mlpipeline.logging.logger import get_logger
7
  from mlpipeline.exception import ModelEvaluationException
8
  import sys
9
- import os
10
 
11
  logger = get_logger(__name__)
12
 
@@ -19,8 +20,32 @@ class ModelEvaluation:
19
  try:
20
  logger.info("Starting model evaluation")
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  test_df = pd.read_csv(self.config.test_data_path)
23
 
 
 
 
24
  predictor = TabularPredictor.load(str(self.config.model_path))
25
 
26
  predictions = predictor.predict(test_df)
@@ -29,24 +54,63 @@ class ModelEvaluation:
29
  predictions_binary = (predictions > 0).astype(int)
30
  y_test_binary = (y_test > 0).astype(int)
31
 
 
 
 
32
  accuracy = float(accuracy_score(y_test_binary, predictions_binary))
33
  f1 = float(f1_score(y_test_binary, predictions_binary, average='weighted'))
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  metrics = {
36
- "accuracy": accuracy,
37
- "f1_score": f1
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
 
 
 
 
 
 
40
  os.makedirs(self.config.root_dir, exist_ok=True)
41
 
42
  with open(self.config.metrics_file, "w") as f:
43
  json.dump(metrics, f, indent=2)
44
 
 
 
 
 
 
 
45
  logger.info(f"Evaluation metrics: {metrics}")
46
 
47
  return ModelEvaluationArtifact(
48
  is_model_accepted=True,
49
- evaluation_metrics=metrics,
50
  message=f"Model evaluation completed with accuracy: {accuracy:.4f}"
51
  )
52
  except Exception as e:
 
1
  import pandas as pd
2
  import json
3
+ import mlflow
4
+ import os
5
  from sklearn.metrics import accuracy_score, f1_score, classification_report
6
  from mlpipeline.entity import ModelEvaluationConfig, ModelEvaluationArtifact
7
  from autogluon.tabular import TabularPredictor
8
  from mlpipeline.logging.logger import get_logger
9
  from mlpipeline.exception import ModelEvaluationException
10
  import sys
 
11
 
12
  logger = get_logger(__name__)
13
 
 
20
  try:
21
  logger.info("Starting model evaluation")
22
 
23
+ tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "https://dagshub.com/abheshith7/AutoML-MLOps-PipeLine.mlflow/")
24
+ dagshub_token = os.getenv("DAGSHUB_TOKEN")
25
+
26
+ mlflow_enabled = False
27
+ if dagshub_token and "dagshub.com" in tracking_uri:
28
+ try:
29
+ os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("DAGSHUB_USERNAME", "abheshith7")
30
+ os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
31
+ mlflow.set_tracking_uri(tracking_uri)
32
+ mlflow.set_experiment("automl_experiment")
33
+ mlflow_enabled = True
34
+ logger.info(f"MLflow tracking enabled: {tracking_uri}")
35
+ except Exception as e:
36
+ logger.warning(f"MLflow tracking disabled: {str(e)}")
37
+ mlflow_enabled = False
38
+ else:
39
+ logger.warning("MLflow tracking disabled: DAGSHUB_TOKEN not set")
40
+
41
+ if mlflow_enabled:
42
+ mlflow.start_run()
43
+
44
  test_df = pd.read_csv(self.config.test_data_path)
45
 
46
+ if mlflow_enabled:
47
+ mlflow.log_param("test_samples", len(test_df))
48
+
49
  predictor = TabularPredictor.load(str(self.config.model_path))
50
 
51
  predictions = predictor.predict(test_df)
 
54
  predictions_binary = (predictions > 0).astype(int)
55
  y_test_binary = (y_test > 0).astype(int)
56
 
57
+ # Calculate comprehensive metrics
58
+ from sklearn.metrics import precision_score, recall_score, roc_auc_score, confusion_matrix
59
+
60
  accuracy = float(accuracy_score(y_test_binary, predictions_binary))
61
  f1 = float(f1_score(y_test_binary, predictions_binary, average='weighted'))
62
+ precision = float(precision_score(y_test_binary, predictions_binary, average='weighted'))
63
+ recall = float(recall_score(y_test_binary, predictions_binary, average='weighted'))
64
+
65
+ # Get prediction probabilities for ROC-AUC
66
+ try:
67
+ pred_proba = predictor.predict_proba(test_df)
68
+ if hasattr(pred_proba, 'iloc'):
69
+ pred_proba_positive = pred_proba.iloc[:, 1]
70
+ else:
71
+ pred_proba_positive = pred_proba[:, 1]
72
+ auc = float(roc_auc_score(y_test_binary, pred_proba_positive))
73
+ except:
74
+ auc = 0.0
75
 
76
  metrics = {
77
+ "test_accuracy": accuracy,
78
+ "test_f1_score": f1,
79
+ "test_precision": precision,
80
+ "test_recall": recall,
81
+ "test_roc_auc": auc
82
+ }
83
+
84
+ # Save confusion matrix
85
+ cm = confusion_matrix(y_test_binary, predictions_binary)
86
+ cm_dict = {
87
+ "true_negatives": int(cm[0][0]),
88
+ "false_positives": int(cm[0][1]),
89
+ "false_negatives": int(cm[1][0]),
90
+ "true_positives": int(cm[1][1])
91
  }
92
 
93
+ if mlflow_enabled:
94
+ mlflow.log_metrics(metrics)
95
+ mlflow.log_metrics(cm_dict)
96
+ mlflow.set_tag("evaluation_stage", "test")
97
+
98
  os.makedirs(self.config.root_dir, exist_ok=True)
99
 
100
  with open(self.config.metrics_file, "w") as f:
101
  json.dump(metrics, f, indent=2)
102
 
103
+ if mlflow_enabled:
104
+ mlflow.log_artifact(str(self.config.metrics_file))
105
+ run_id = mlflow.active_run().info.run_id
106
+ logger.info(f"MLflow run logged: {run_id}")
107
+ mlflow.end_run()
108
+
109
  logger.info(f"Evaluation metrics: {metrics}")
110
 
111
  return ModelEvaluationArtifact(
112
  is_model_accepted=True,
113
+ evaluation_metrics={"accuracy": accuracy, "f1_score": f1},
114
  message=f"Model evaluation completed with accuracy: {accuracy:.4f}"
115
  )
116
  except Exception as e:
src/mlpipeline/constants.py CHANGED
@@ -11,5 +11,5 @@ MODELS_DIR = Path("artifacts/models")
11
  DATA_DIR = Path("artifacts/data")
12
  REPORTS_DIR = Path("artifacts/reports")
13
 
14
- MLFLOW_TRACKING_URI = "http://localhost:5000"
15
  MLFLOW_REGISTRY_URI = None
 
11
  DATA_DIR = Path("artifacts/data")
12
  REPORTS_DIR = Path("artifacts/reports")
13
 
14
+ MLFLOW_TRACKING_URI = "https://dagshub.com/abheshith7/AutoML-MLOps-PipeLine.mlflow/"
15
  MLFLOW_REGISTRY_URI = None