Spaces:

Abeshith
/

AutoML_MLOps_PipeLine

Sleeping

File size: 5,833 Bytes

import pandas as pd
import mlflow
import os
from mlpipeline.entity import ModelTrainerConfig, ModelTrainerArtifact
from mlpipeline.automl import AutoMLFactory
from mlpipeline.logging.logger import get_logger
from mlpipeline.exception import ModelTrainingException
from mlpipeline.constants import AUTOML_CONFIG_FILE_PATH
from mlpipeline.utils.common import read_yaml
from pathlib import Path
import sys

logger = get_logger(__name__)


class AutoMLTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
    
    def train(self) -> ModelTrainerArtifact:
        try:
            logger.info("Starting model training")
            
            tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "https://dagshub.com/abheshith7/AutoML-MLOps-PipeLine.mlflow/")
            dagshub_token = os.getenv("DAGSHUB_TOKEN")
            
            mlflow_enabled = False
            if dagshub_token and "dagshub.com" in tracking_uri:
                try:
                    os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("DAGSHUB_USERNAME", "abheshith7")
                    os.environ["MLFLOW_TRACKING_PASSWORD"] = dagshub_token
                    mlflow.set_tracking_uri(tracking_uri)
                    mlflow.set_experiment("automl_experiment")
                    mlflow_enabled = True
                    logger.info(f"MLflow tracking enabled: {tracking_uri}")
                except Exception as e:
                    logger.warning(f"MLflow tracking disabled: {str(e)}")
                    mlflow_enabled = False
            else:
                logger.warning("MLflow tracking disabled: DAGSHUB_TOKEN not set")
            
            if mlflow_enabled:
                mlflow.start_run()
            
            train_df = pd.read_csv(self.config.train_data_path)
            
            automl_config = read_yaml(Path(AUTOML_CONFIG_FILE_PATH))
            library_config = automl_config[self.config.automl_library]
            
            if mlflow_enabled:
                mlflow.log_param("automl_library", self.config.automl_library)
                mlflow.log_param("target_column", self.config.target_column)
                mlflow.log_param("train_samples", len(train_df))
                mlflow.log_params(library_config)
            
            trainer = AutoMLFactory.create_trainer(
                self.config.automl_library,
                library_config
            )
            
            os.makedirs(self.config.root_dir, exist_ok=True)
            
            if self.config.automl_library == 'autogluon':
                result = trainer.train(train_df, self.config.target_column, self.config.model_path)
                if isinstance(result, tuple):
                    metrics, feature_importance = result
                else:
                    metrics = result
                    feature_importance = None
            else:
                X_train = train_df.drop(columns=[self.config.target_column])
                y_train = train_df[self.config.target_column]
                metrics = trainer.train(X_train, y_train, self.config.model_path)
                feature_importance = None
            
            if mlflow_enabled:
                # Separate numeric metrics from string values
                numeric_metrics = {}
                string_values = {}
                
                for key, value in metrics.items():
                    if isinstance(value, (int, float)):
                        numeric_metrics[key] = value
                    else:
                        string_values[key] = str(value)
                
                # Log numeric metrics only
                if numeric_metrics:
                    mlflow.log_metrics(numeric_metrics)
                
                # Log string values as tags
                for key, value in string_values.items():
                    mlflow.set_tag(key, value)
                
                # Log feature importance as artifact
                if feature_importance is not None:
                    import json
                    fi_dict = feature_importance.to_dict() if hasattr(feature_importance, 'to_dict') else {}
                    fi_path = Path(self.config.root_dir) / "feature_importance.json"
                    with open(fi_path, 'w') as f:
                        json.dump(fi_dict, f, indent=2)
                    mlflow.log_artifact(str(fi_path))
                
                # Log model leaderboard
                try:
                    from autogluon.tabular import TabularPredictor
                    predictor = TabularPredictor.load(str(self.config.model_path))
                    leaderboard = predictor.leaderboard(silent=True)
                    lb_path = Path(self.config.root_dir) / "leaderboard.csv"
                    leaderboard.to_csv(lb_path, index=False)
                    mlflow.log_artifact(str(lb_path))
                except:
                    pass
                
                # Set additional tags
                mlflow.set_tag("model_type", "AutoML")
                mlflow.set_tag("framework", self.config.automl_library)
                
                run_id = mlflow.active_run().info.run_id
                logger.info(f"MLflow run logged: {run_id}")
                mlflow.end_run()
            
            logger.info(f"Model trained with metrics: {metrics}")
            
            return ModelTrainerArtifact(
                model_path=self.config.model_path,
                train_metrics=metrics,
                is_trained=True,
                message=f"Model trained successfully with score: {metrics.get('score', 0.0):.4f}"
            )
        except Exception as e:
            if mlflow.active_run():
                mlflow.end_run()
            raise ModelTrainingException(str(e), sys)