from src.utils.mlflow_utils import setup_mlflow import mlflow.sklearn from sklearn.ensemble import RandomForestRegressor from sklearn.pipeline import Pipeline from sklearn.metrics import mean_squared_error, r2_score def train_final_model( X_train, X_test, y_train, y_test, preprocessor, best_params: dict ): """ Train final model using best hyperparameters and log to MLflow """ setup_mlflow("AQI_Prediction") # Initialize model model = RandomForestRegressor(**best_params) # Build pipeline pipeline = Pipeline([ ("preprocessor", preprocessor), ("model", model) ]) # Train pipeline.fit(X_train, y_train) # Evaluate y_pred = pipeline.predict(X_test) rmse = mean_squared_error(y_test, y_pred) ** 0.5 r2 = r2_score(y_test, y_pred) # Log to MLflow with mlflow.start_run(run_name="Champion_RF_Model"): mlflow.log_params(best_params) mlflow.log_metric("rmse", rmse) mlflow.log_metric("r2_score", r2) mlflow.sklearn.log_model(pipeline, "model") print(f"Final Model → RMSE: {rmse:.2f}, R2: {r2:.4f}") print("Model logged to MLflow") return pipeline