File size: 4,919 Bytes
a7d80f2
 
d463732
 
a7d80f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d463732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7d80f2
 
d463732
 
 
a7d80f2
 
 
 
 
 
 
 
d463732
 
 
a7d80f2
 
d463732
 
 
 
 
 
 
 
 
 
 
 
 
a7d80f2
 
d463732
 
 
 
 
 
 
 
 
 
 
 
 
 
a7d80f2
 
d463732
 
 
 
 
a7d80f2
 
 
 
 
d463732
 
 
 
 
 
a7d80f2
 
 
 
d463732
a7d80f2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pandas as pd
import json
import mlflow
import os
from sklearn.metrics import accuracy_score, f1_score, classification_report
from mlpipeline.entity import ModelEvaluationConfig, ModelEvaluationArtifact
from autogluon.tabular import TabularPredictor
from mlpipeline.logging.logger import get_logger
from mlpipeline.exception import ModelEvaluationException
import sys

logger = get_logger(__name__)


class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
    
    def evaluate(self) -> ModelEvaluationArtifact:
        try:
            logger.info("Starting model evaluation")
            
            tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "https://dagshub.com/abheshith7/AutoML-MLOps-PipeLine.mlflow/")
            dagshub_token = os.getenv("DAGSHUB_TOKEN")
            
            mlflow_enabled = False
            if dagshub_token and "dagshub.com" in tracking_uri:
                try:
                    os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("DAGSHUB_USERNAME", "abheshith7")
                    os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
                    mlflow.set_tracking_uri(tracking_uri)
                    mlflow.set_experiment("automl_experiment")
                    mlflow_enabled = True
                    logger.info(f"MLflow tracking enabled: {tracking_uri}")
                except Exception as e:
                    logger.warning(f"MLflow tracking disabled: {str(e)}")
                    mlflow_enabled = False
            else:
                logger.warning("MLflow tracking disabled: DAGSHUB_TOKEN not set")
            
            if mlflow_enabled:
                mlflow.start_run()
            
            test_df = pd.read_csv(self.config.test_data_path)
            
            if mlflow_enabled:
                mlflow.log_param("test_samples", len(test_df))
            
            predictor = TabularPredictor.load(str(self.config.model_path))
            
            predictions = predictor.predict(test_df)
            y_test = test_df[self.config.target_column]
            
            predictions_binary = (predictions > 0).astype(int)
            y_test_binary = (y_test > 0).astype(int)
            
            # Calculate comprehensive metrics
            from sklearn.metrics import precision_score, recall_score, roc_auc_score, confusion_matrix
            
            accuracy = float(accuracy_score(y_test_binary, predictions_binary))
            f1 = float(f1_score(y_test_binary, predictions_binary, average='weighted'))
            precision = float(precision_score(y_test_binary, predictions_binary, average='weighted'))
            recall = float(recall_score(y_test_binary, predictions_binary, average='weighted'))
            
            # Get prediction probabilities for ROC-AUC
            try:
                pred_proba = predictor.predict_proba(test_df)
                if hasattr(pred_proba, 'iloc'):
                    pred_proba_positive = pred_proba.iloc[:, 1]
                else:
                    pred_proba_positive = pred_proba[:, 1]
                auc = float(roc_auc_score(y_test_binary, pred_proba_positive))
            except:
                auc = 0.0
            
            metrics = {
                "test_accuracy": accuracy,
                "test_f1_score": f1,
                "test_precision": precision,
                "test_recall": recall,
                "test_roc_auc": auc
            }
            
            # Save confusion matrix
            cm = confusion_matrix(y_test_binary, predictions_binary)
            cm_dict = {
                "true_negatives": int(cm[0][0]),
                "false_positives": int(cm[0][1]),
                "false_negatives": int(cm[1][0]),
                "true_positives": int(cm[1][1])
            }
            
            if mlflow_enabled:
                mlflow.log_metrics(metrics)
                mlflow.log_metrics(cm_dict)
                mlflow.set_tag("evaluation_stage", "test")
            
            os.makedirs(self.config.root_dir, exist_ok=True)
            
            with open(self.config.metrics_file, "w") as f:
                json.dump(metrics, f, indent=2)
            
            if mlflow_enabled:
                mlflow.log_artifact(str(self.config.metrics_file))
                run_id = mlflow.active_run().info.run_id
                logger.info(f"MLflow run logged: {run_id}")
                mlflow.end_run()
            
            logger.info(f"Evaluation metrics: {metrics}")
            
            return ModelEvaluationArtifact(
                is_model_accepted=True,
                evaluation_metrics={"accuracy": accuracy, "f1_score": f1},
                message=f"Model evaluation completed with accuracy: {accuracy:.4f}"
            )
        except Exception as e:
            raise ModelEvaluationException(str(e), sys)