Spaces:
Sleeping
Sleeping
File size: 1,672 Bytes
5fa8558 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
class CustomEncoder(BaseEstimator, TransformerMixin):
def __init__(self, bool_cols=None, cat_onehot_cols=None, num_cols=None):
self.bool_cols = bool_cols or []
self.cat_onehot_cols = cat_onehot_cols or []
self.num_cols = num_cols or []
def fit(self, X, y=None):
# Stockage des colonnes
self.bool_cols_ = list(self.bool_cols)
self.cat_onehot_cols_ = list(self.cat_onehot_cols)
self.num_cols_ = list(self.num_cols)
# OneHot
self.ohe_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
if self.cat_onehot_cols_:
self.ohe_.fit(X[self.cat_onehot_cols_])
return self
def transform(self, X):
parts = []
# Booléens
if self.bool_cols_:
df_bool = X[self.bool_cols_].astype(int)
parts.append(df_bool)
# Numériques
if self.num_cols_:
df_num = X[self.num_cols_]
parts.append(df_num)
# OneHot
if self.cat_onehot_cols_:
ohe_data = self.ohe_.transform(X[self.cat_onehot_cols_])
ohe_df = pd.DataFrame(
ohe_data,
columns=self.ohe_.get_feature_names_out(self.cat_onehot_cols_),
index=X.index
)
parts.append(ohe_df)
# Fusion
df_final = pd.concat(parts, axis=1)
# Stockage des colonnes finales (utile pour FI)
self.feature_names_ = df_final.columns.tolist()
return df_final |