|
|
from sklearn.base import BaseEstimator, TransformerMixin |
|
|
import numpy as np |
|
|
from catboost import CatBoostRegressor |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
class CatBoostWrapper(BaseEstimator): |
|
|
def __init__(self, iterations=2000, learning_rate=0.03, depth=6, l2_leaf_reg=5, random_seed=42): |
|
|
self.iterations = iterations |
|
|
self.learning_rate = learning_rate |
|
|
self.depth = depth |
|
|
self.l2_leaf_reg = l2_leaf_reg |
|
|
self.random_seed = random_seed |
|
|
self.model = None |
|
|
|
|
|
def fit(self, X, y): |
|
|
self.model = CatBoostRegressor( |
|
|
iterations=self.iterations, |
|
|
learning_rate=self.learning_rate, |
|
|
depth=self.depth, |
|
|
l2_leaf_reg=self.l2_leaf_reg, |
|
|
eval_metric='RMSE', |
|
|
random_seed=self.random_seed, |
|
|
early_stopping_rounds=100, |
|
|
verbose=100 |
|
|
) |
|
|
self.model.fit(X, y) |
|
|
return self |
|
|
|
|
|
def predict(self, X): |
|
|
return self.model.predict(X) |
|
|
|
|
|
def feature_importances_(self, feature_names): |
|
|
return pd.DataFrame({ |
|
|
'Feature': feature_names, |
|
|
'Importance': self.model.get_feature_importance() |
|
|
}).sort_values(by='Importance', ascending=False) |
|
|
|