Spaces:

SandeepMM
/

Backend

Sleeping

App Files Files Community

SandeepMM commited on Aug 17, 2025

Commit

a648b7a

verified ·

1 Parent(s): 7c4aac7

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

FeatureEngineer.py +59 -0

FeatureEngineer.py ADDED Viewed

	@@ -0,0 +1,59 @@

+class FeatureEngineer(BaseEstimator, TransformerMixin):
+    def __init__(self):
+        # We need to store the LabelEncoders
+        # so they can be applied consistently to new data.
+        self.le_prod = LabelEncoder()
+        self.le_store = LabelEncoder()
+    def fit(self, X, y=None):
+        # Create a new feature 'Product_Id_Cd' from the first two characters of Product_Id.
+        X['Product_Id_Cd'] = X['Product_Id'].apply(lambda x: x[:2])
+        # Correct 'Product_Sugar_Content' to 'Product_Sugar_Content_Corr'
+        X['Product_Sugar_Content_Corr'] = X['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
+        # Calculate 'Operation_Years'
+        X['Operation_Years'] = 2025 - X['Store_Establishment_Year']
+        self.le_prod.fit(X['Product_Id_Cd'])
+        le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
+        for i in le_feat:
+            self.le_prod.fit(X[i])
+        # Fit LabelEncoder for 'Store'
+        self.le_store.fit(X['Store_Id'])
+        return self
+    def transform(self, X):
+        X_copy = X.copy()
+        # Apply the transformations
+        X_copy['Product_Id_Cd'] = X_copy['Product_Id'].apply(lambda x: x[:2])
+        X_copy['Product_Sugar_Content_Corr'] = X_copy['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
+        X_copy['Operation_Years'] = 2013 - X_copy['Store_Establishment_Year']
+        # Using a try-except block to handle unseen categories gracefully
+        try:
+            le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
+            for i in le_feat:
+                X_copy[i] = self.le_prod.transform(X_copy[i])
+        except ValueError:
+            # Handling unknown categories in production data
+            X_copy['Product_Id_Cd'] = -1
+        # Apply LabelEncoder to 'Store_Id'
+        try:
+            X_copy['Store'] = self.le_store.transform(X_copy['Store_Id'])
+        except ValueError:
+            X_copy['Store'] = -1
+        # Droping the features which have been processed into new features already
+        rem_feat=['Product_Id','Store_Id','Product_Sugar_Content','Product_Type', 'Store_Establishment_Year']
+        X_copy.drop(rem_feat, axis=1, inplace=True)
+        return X_copy