File size: 1,672 Bytes
5fa8558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder

class CustomEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, bool_cols=None, cat_onehot_cols=None, num_cols=None):
        self.bool_cols = bool_cols or []
        self.cat_onehot_cols = cat_onehot_cols or []
        self.num_cols = num_cols or []

    def fit(self, X, y=None):
        # Stockage des colonnes
        self.bool_cols_ = list(self.bool_cols)
        self.cat_onehot_cols_ = list(self.cat_onehot_cols)
        self.num_cols_ = list(self.num_cols)

        # OneHot
        self.ohe_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
        if self.cat_onehot_cols_:
            self.ohe_.fit(X[self.cat_onehot_cols_])

        return self

    def transform(self, X):
        parts = []

        # Booléens
        if self.bool_cols_:
            df_bool = X[self.bool_cols_].astype(int)
            parts.append(df_bool)

        # Numériques
        if self.num_cols_:
            df_num = X[self.num_cols_]
            parts.append(df_num)

        # OneHot
        if self.cat_onehot_cols_:
            ohe_data = self.ohe_.transform(X[self.cat_onehot_cols_])
            ohe_df = pd.DataFrame(
                ohe_data,
                columns=self.ohe_.get_feature_names_out(self.cat_onehot_cols_),
                index=X.index
            )
            parts.append(ohe_df)

        # Fusion
        df_final = pd.concat(parts, axis=1)

        # Stockage des colonnes finales (utile pour FI)
        self.feature_names_ = df_final.columns.tolist()

        return df_final