class FeatureEngineer(BaseEstimator, TransformerMixin): def __init__(self): # We need to store the LabelEncoders # so they can be applied consistently to new data. self.le_prod = LabelEncoder() self.le_store = LabelEncoder() def fit(self, X, y=None): # Create a new feature 'Product_Id_Cd' from the first two characters of Product_Id. X['Product_Id_Cd'] = X['Product_Id'].apply(lambda x: x[:2]) # Correct 'Product_Sugar_Content' to 'Product_Sugar_Content_Corr' X['Product_Sugar_Content_Corr'] = X['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True) # Calculate 'Operation_Years' X['Operation_Years'] = 2025 - X['Store_Establishment_Year'] self.le_prod.fit(X['Product_Id_Cd']) le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd'] for i in le_feat: self.le_prod.fit(X[i]) # Fit LabelEncoder for 'Store' self.le_store.fit(X['Store_Id']) return self def transform(self, X): X_copy = X.copy() # Apply the transformations X_copy['Product_Id_Cd'] = X_copy['Product_Id'].apply(lambda x: x[:2]) X_copy['Product_Sugar_Content_Corr'] = X_copy['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True) X_copy['Operation_Years'] = 2013 - X_copy['Store_Establishment_Year'] # Using a try-except block to handle unseen categories gracefully try: le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd'] for i in le_feat: X_copy[i] = self.le_prod.transform(X_copy[i]) except ValueError: # Handling unknown categories in production data X_copy['Product_Id_Cd'] = -1 # Apply LabelEncoder to 'Store_Id' try: X_copy['Store'] = self.le_store.transform(X_copy['Store_Id']) except ValueError: X_copy['Store'] = -1 # Droping the features which have been processed into new features already rem_feat=['Product_Id','Store_Id','Product_Sugar_Content','Product_Type', 'Store_Establishment_Year'] X_copy.drop(rem_feat, axis=1, inplace=True) return X_copy