Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| from typing import Annotated | |
| import mlflow | |
| import joblib | |
| import pandas as pd | |
| from sklearn.pipeline import Pipeline | |
| from zenml.integrations.mlflow.mlflow_utils import get_tracking_uri | |
| from zenml import ArtifactConfig, step | |
| from zenml.client import Client | |
| from zenml import Model | |
| # Import ModelBuilding class | |
| from src.model_building import ModelBuilding | |
| # Get the active experiment tracker from ZenML | |
| experiment_tracker = Client().active_stack.experiment_tracker | |
| # Define model metadata | |
| model_metadata = Model( | |
| name="customer_churn_prediction", | |
| version=None, | |
| license="Apache-2.0", | |
| description="Customer churn prediction model for Telecom company.", | |
| ) | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # # file:///home/sarath_kumar/.config/zenml/local_stores/b878ca30-c25c-4712-9a5a-a299384dcb87/mlruns/649008275814095771/a4672e3f3d6840cd8f5114939de29272/artifacts/model/model.pkl | |
| # | |
| # Adjusted model_builder_step function | |
| def model_builder_step(model_name: str, X_train: pd.DataFrame, y_train: pd.Series) -> Annotated[ | |
| Pipeline,ArtifactConfig(name = "sklearn_pipeline",is_model_artifact = True)]: | |
| """ | |
| ZenML step to create, preprocess, train, and return a specified model. | |
| Parameters | |
| model_name : str | |
| Name of the model to create. | |
| X_train : pd.DataFrame | |
| Training data features. | |
| y_train : pd.Series | |
| Training data labels/target. | |
| Returns | |
| Any | |
| The trained model or pipeline including preprocessing. | |
| """ | |
| # Identify categorical and numerical columns | |
| categorical_cols = X_train.select_dtypes(include=['object', "category"]).columns | |
| numerical_cols = X_train.select_dtypes(exclude=['object', 'category']).columns | |
| logger.info(f"Categorical columns: {categorical_cols.tolist()}") | |
| logger.info(f"Numerical columns: {numerical_cols.tolist()}") | |
| logger.info("Starting model building step...") | |
| if not mlflow.active_run(): | |
| mlflow.start_run() | |
| # Initialize the ModelBuilding class and select model by name | |
| model_builder = ModelBuilding() | |
| try: | |
| mlflow.sklearn.autolog() | |
| model = model_builder.get_model(model_name, X_train, y_train) | |
| logger.info(f"Model '{model_name}' has been successfully created.") | |
| # Define the pipeline including the model (assuming no preprocessing here) | |
| pipeline = Pipeline(steps=[("model", model)]) | |
| # Train the model | |
| pipeline.fit(X_train, y_train) | |
| logger.info("Model training completed") | |
| except ValueError as e: | |
| logger.error(f"An error occurred: {e}") | |
| raise | |
| finally: | |
| # end the mlflow run | |
| mlflow.end_run() | |
| return pipeline | |
| # # Save the model pipeline locally after evaluation | |
| # model_dir = "models" | |
| # os.makedirs(model_dir, exist_ok=True) # Ensure the models directory exists | |
| # model_path = os.path.join(model_dir, "model.pkl") | |
| # joblib.dump(pipeline, model_path) # Save model pipeline as 'model.pkl' | |
| # logger.info(f"Model saved at {model_path}") | |
| # zenml stack register mlflow_stack_customer_churn_prediction -a default -o default -d mlflow -e mlflow_tracker_customer_churn_prediction --set |