File size: 1,270 Bytes
3d6943b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from src.utils.mlflow_utils import setup_mlflow
import mlflow.sklearn

from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score


def train_final_model(

    X_train,

    X_test,

    y_train,

    y_test,

    preprocessor,

    best_params: dict

):
    """

    Train final model using best hyperparameters and log to MLflow

    """


    setup_mlflow("AQI_Prediction")

    # Initialize model
    model = RandomForestRegressor(**best_params)

    # Build pipeline
    pipeline = Pipeline([
        ("preprocessor", preprocessor),
        ("model", model)
    ])

    # Train
    pipeline.fit(X_train, y_train)

    # Evaluate
    y_pred = pipeline.predict(X_test)

    rmse = mean_squared_error(y_test, y_pred) ** 0.5
    r2 = r2_score(y_test, y_pred)

    # Log to MLflow
    with mlflow.start_run(run_name="Champion_RF_Model"):

        mlflow.log_params(best_params)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2_score", r2)

        mlflow.sklearn.log_model(pipeline, "model")

        print(f"Final Model → RMSE: {rmse:.2f}, R2: {r2:.4f}")
        print("Model logged to MLflow")

    return pipeline