|
|
class FeatureEngineer(BaseEstimator, TransformerMixin):
|
|
|
def __init__(self):
|
|
|
|
|
|
|
|
|
self.le_prod = LabelEncoder()
|
|
|
self.le_store = LabelEncoder()
|
|
|
|
|
|
def fit(self, X, y=None):
|
|
|
|
|
|
X['Product_Id_Cd'] = X['Product_Id'].apply(lambda x: x[:2])
|
|
|
|
|
|
|
|
|
X['Product_Sugar_Content_Corr'] = X['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
|
|
|
|
|
|
|
|
|
|
|
|
X['Operation_Years'] = 2025 - X['Store_Establishment_Year']
|
|
|
|
|
|
self.le_prod.fit(X['Product_Id_Cd'])
|
|
|
le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
|
|
|
for i in le_feat:
|
|
|
self.le_prod.fit(X[i])
|
|
|
|
|
|
|
|
|
self.le_store.fit(X['Store_Id'])
|
|
|
return self
|
|
|
|
|
|
def transform(self, X):
|
|
|
X_copy = X.copy()
|
|
|
|
|
|
|
|
|
X_copy['Product_Id_Cd'] = X_copy['Product_Id'].apply(lambda x: x[:2])
|
|
|
|
|
|
X_copy['Product_Sugar_Content_Corr'] = X_copy['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
|
|
|
|
|
|
X_copy['Operation_Years'] = 2013 - X_copy['Store_Establishment_Year']
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
|
|
|
for i in le_feat:
|
|
|
X_copy[i] = self.le_prod.transform(X_copy[i])
|
|
|
except ValueError:
|
|
|
|
|
|
X_copy['Product_Id_Cd'] = -1
|
|
|
|
|
|
|
|
|
try:
|
|
|
X_copy['Store'] = self.le_store.transform(X_copy['Store_Id'])
|
|
|
except ValueError:
|
|
|
X_copy['Store'] = -1
|
|
|
|
|
|
|
|
|
|
|
|
rem_feat=['Product_Id','Store_Id','Product_Sugar_Content','Product_Type', 'Store_Establishment_Year']
|
|
|
X_copy.drop(rem_feat, axis=1, inplace=True)
|
|
|
|
|
|
return X_copy |