Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.base import BaseEstimator, TransformerMixin | |
| from sklearn.preprocessing import OneHotEncoder | |
| class CustomEncoder(BaseEstimator, TransformerMixin): | |
| def __init__(self, bool_cols=None, cat_onehot_cols=None, num_cols=None): | |
| self.bool_cols = bool_cols or [] | |
| self.cat_onehot_cols = cat_onehot_cols or [] | |
| self.num_cols = num_cols or [] | |
| def fit(self, X, y=None): | |
| # Stockage des colonnes | |
| self.bool_cols_ = list(self.bool_cols) | |
| self.cat_onehot_cols_ = list(self.cat_onehot_cols) | |
| self.num_cols_ = list(self.num_cols) | |
| # OneHot | |
| self.ohe_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False) | |
| if self.cat_onehot_cols_: | |
| self.ohe_.fit(X[self.cat_onehot_cols_]) | |
| return self | |
| def transform(self, X): | |
| parts = [] | |
| # Booléens | |
| if self.bool_cols_: | |
| df_bool = X[self.bool_cols_].astype(int) | |
| parts.append(df_bool) | |
| # Numériques | |
| if self.num_cols_: | |
| df_num = X[self.num_cols_] | |
| parts.append(df_num) | |
| # OneHot | |
| if self.cat_onehot_cols_: | |
| ohe_data = self.ohe_.transform(X[self.cat_onehot_cols_]) | |
| ohe_df = pd.DataFrame( | |
| ohe_data, | |
| columns=self.ohe_.get_feature_names_out(self.cat_onehot_cols_), | |
| index=X.index | |
| ) | |
| parts.append(ohe_df) | |
| # Fusion | |
| df_final = pd.concat(parts, axis=1) | |
| # Stockage des colonnes finales (utile pour FI) | |
| self.feature_names_ = df_final.columns.tolist() | |
| return df_final |