AQI_Predictor / src /models /train.py
SparshSG's picture
Upload 18 files
3d6943b verified
import mlflow
import mlflow.sklearn
import pandas as pd
from src.utils.mlflow_utils import setup_mlflow
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from src.models.prepare import prepare_data
def train_models(df: pd.DataFrame):
X_train, X_test, y_train, y_test, preprocessor = prepare_data(df)
models = {
"LinearRegression": LinearRegression(),
"Ridge": Ridge(),
"Lasso": Lasso(),
"RandomForest": RandomForestRegressor(random_state=42, n_jobs=-1)
}
setup_mlflow("AQI_Prediction")
results = {}
for name, model in models.items():
with mlflow.start_run(run_name=name):
pipeline = Pipeline([
("preprocessor", preprocessor),
("model", model)
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
rmse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mlflow.log_param("model", name)
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("r2_score", r2)
mlflow.sklearn.log_model(pipeline, "model")
results[name] = {"RMSE": rmse, "R2": r2}
print(f"{name} → RMSE: {rmse:.2f}, R2: {r2:.4f}")
return results