technova-ml-api / encoder /custom_encoder.py
github-actions
deploy: snapshot
5fa8558
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
class CustomEncoder(BaseEstimator, TransformerMixin):
def __init__(self, bool_cols=None, cat_onehot_cols=None, num_cols=None):
self.bool_cols = bool_cols or []
self.cat_onehot_cols = cat_onehot_cols or []
self.num_cols = num_cols or []
def fit(self, X, y=None):
# Stockage des colonnes
self.bool_cols_ = list(self.bool_cols)
self.cat_onehot_cols_ = list(self.cat_onehot_cols)
self.num_cols_ = list(self.num_cols)
# OneHot
self.ohe_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
if self.cat_onehot_cols_:
self.ohe_.fit(X[self.cat_onehot_cols_])
return self
def transform(self, X):
parts = []
# Booléens
if self.bool_cols_:
df_bool = X[self.bool_cols_].astype(int)
parts.append(df_bool)
# Numériques
if self.num_cols_:
df_num = X[self.num_cols_]
parts.append(df_num)
# OneHot
if self.cat_onehot_cols_:
ohe_data = self.ohe_.transform(X[self.cat_onehot_cols_])
ohe_df = pd.DataFrame(
ohe_data,
columns=self.ohe_.get_feature_names_out(self.cat_onehot_cols_),
index=X.index
)
parts.append(ohe_df)
# Fusion
df_final = pd.concat(parts, axis=1)
# Stockage des colonnes finales (utile pour FI)
self.feature_names_ = df_final.columns.tolist()
return df_final