Spaces:

Abeshith
/

AutoML_MLOps_PipeLine

Running

File size: 4,919 Bytes

import pandas as pd
import json
import mlflow
import os
from sklearn.metrics import accuracy_score, f1_score, classification_report
from mlpipeline.entity import ModelEvaluationConfig, ModelEvaluationArtifact
from autogluon.tabular import TabularPredictor
from mlpipeline.logging.logger import get_logger
from mlpipeline.exception import ModelEvaluationException
import sys

logger = get_logger(__name__)


class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config
    
    def evaluate(self) -> ModelEvaluationArtifact:
        try:
            logger.info("Starting model evaluation")
            
            tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "https://dagshub.com/abheshith7/AutoML-MLOps-PipeLine.mlflow/")
            dagshub_token = os.getenv("DAGSHUB_TOKEN")
            
            mlflow_enabled = False
            if dagshub_token and "dagshub.com" in tracking_uri:
                try:
                    os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("DAGSHUB_USERNAME", "abheshith7")
                    os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
                    mlflow.set_tracking_uri(tracking_uri)
                    mlflow.set_experiment("automl_experiment")
                    mlflow_enabled = True
                    logger.info(f"MLflow tracking enabled: {tracking_uri}")
                except Exception as e:
                    logger.warning(f"MLflow tracking disabled: {str(e)}")
                    mlflow_enabled = False
            else:
                logger.warning("MLflow tracking disabled: DAGSHUB_TOKEN not set")
            
            if mlflow_enabled:
                mlflow.start_run()
            
            test_df = pd.read_csv(self.config.test_data_path)
            
            if mlflow_enabled:
                mlflow.log_param("test_samples", len(test_df))
            
            predictor = TabularPredictor.load(str(self.config.model_path))
            
            predictions = predictor.predict(test_df)
            y_test = test_df[self.config.target_column]
            
            predictions_binary = (predictions > 0).astype(int)
            y_test_binary = (y_test > 0).astype(int)
            
            # Calculate comprehensive metrics
            from sklearn.metrics import precision_score, recall_score, roc_auc_score, confusion_matrix
            
            accuracy = float(accuracy_score(y_test_binary, predictions_binary))
            f1 = float(f1_score(y_test_binary, predictions_binary, average='weighted'))
            precision = float(precision_score(y_test_binary, predictions_binary, average='weighted'))
            recall = float(recall_score(y_test_binary, predictions_binary, average='weighted'))
            
            # Get prediction probabilities for ROC-AUC
            try:
                pred_proba = predictor.predict_proba(test_df)
                if hasattr(pred_proba, 'iloc'):
                    pred_proba_positive = pred_proba.iloc[:, 1]
                else:
                    pred_proba_positive = pred_proba[:, 1]
                auc = float(roc_auc_score(y_test_binary, pred_proba_positive))
            except:
                auc = 0.0
            
            metrics = {
                "test_accuracy": accuracy,
                "test_f1_score": f1,
                "test_precision": precision,
                "test_recall": recall,
                "test_roc_auc": auc
            }
            
            # Save confusion matrix
            cm = confusion_matrix(y_test_binary, predictions_binary)
            cm_dict = {
                "true_negatives": int(cm[0][0]),
                "false_positives": int(cm[0][1]),
                "false_negatives": int(cm[1][0]),
                "true_positives": int(cm[1][1])
            }
            
            if mlflow_enabled:
                mlflow.log_metrics(metrics)
                mlflow.log_metrics(cm_dict)
                mlflow.set_tag("evaluation_stage", "test")
            
            os.makedirs(self.config.root_dir, exist_ok=True)
            
            with open(self.config.metrics_file, "w") as f:
                json.dump(metrics, f, indent=2)
            
            if mlflow_enabled:
                mlflow.log_artifact(str(self.config.metrics_file))
                run_id = mlflow.active_run().info.run_id
                logger.info(f"MLflow run logged: {run_id}")
                mlflow.end_run()
            
            logger.info(f"Evaluation metrics: {metrics}")
            
            return ModelEvaluationArtifact(
                is_model_accepted=True,
                evaluation_metrics={"accuracy": accuracy, "f1_score": f1},
                message=f"Model evaluation completed with accuracy: {accuracy:.4f}"
            )
        except Exception as e:
            raise ModelEvaluationException(str(e), sys)