| | from sklearn.compose import ColumnTransformer |
| | from sklearn.pipeline import Pipeline |
| | from sklearn.impute import SimpleImputer |
| | from sklearn.preprocessing import OneHotEncoder |
| | from sklearn.ensemble import RandomForestRegressor |
| | from sklearn.metrics import mean_absolute_error |
| |
|
| | |
| | numerical_transformer = SimpleImputer(strategy='constant') |
| |
|
| | |
| | categorical_transformer = Pipeline(steps=[ |
| | ('imputer', SimpleImputer(strategy='most_frequent')), |
| | ('onehot', OneHotEncoder(handle_unknown='ignore')) |
| | ]) |
| |
|
| | |
| | preprocessor = ColumnTransformer( |
| | transformers=[ |
| | ('num', numerical_transformer, numerical_cols), |
| | ('cat', categorical_transformer, categorical_cols) |
| | ]) |
| |
|
| | |
| | model = RandomForestRegressor(n_estimators=100, random_state=0) |
| |
|
| | |
| | clf = Pipeline(steps=[('preprocessor', preprocessor), |
| | ('model', model) |
| | ]) |
| |
|
| | |
| | clf.fit(X_train, y_train) |
| |
|
| | |
| | preds = clf.predict(X_valid) |
| |
|
| | print('MAE:', mean_absolute_error(y_valid, preds)) |