SandeepMM commited on
Commit
a648b7a
·
verified ·
1 Parent(s): 7c4aac7

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. FeatureEngineer.py +59 -0
FeatureEngineer.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class FeatureEngineer(BaseEstimator, TransformerMixin):
2
+ def __init__(self):
3
+ # We need to store the LabelEncoders
4
+ # so they can be applied consistently to new data.
5
+ self.le_prod = LabelEncoder()
6
+ self.le_store = LabelEncoder()
7
+
8
+ def fit(self, X, y=None):
9
+ # Create a new feature 'Product_Id_Cd' from the first two characters of Product_Id.
10
+ X['Product_Id_Cd'] = X['Product_Id'].apply(lambda x: x[:2])
11
+
12
+ # Correct 'Product_Sugar_Content' to 'Product_Sugar_Content_Corr'
13
+ X['Product_Sugar_Content_Corr'] = X['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
14
+
15
+
16
+ # Calculate 'Operation_Years'
17
+ X['Operation_Years'] = 2025 - X['Store_Establishment_Year']
18
+
19
+ self.le_prod.fit(X['Product_Id_Cd'])
20
+ le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
21
+ for i in le_feat:
22
+ self.le_prod.fit(X[i])
23
+
24
+ # Fit LabelEncoder for 'Store'
25
+ self.le_store.fit(X['Store_Id'])
26
+ return self
27
+
28
+ def transform(self, X):
29
+ X_copy = X.copy()
30
+
31
+ # Apply the transformations
32
+ X_copy['Product_Id_Cd'] = X_copy['Product_Id'].apply(lambda x: x[:2])
33
+
34
+ X_copy['Product_Sugar_Content_Corr'] = X_copy['Product_Sugar_Content'].str.replace('reg', 'Regular', regex=True)
35
+
36
+ X_copy['Operation_Years'] = 2013 - X_copy['Store_Establishment_Year']
37
+
38
+ # Using a try-except block to handle unseen categories gracefully
39
+ try:
40
+
41
+ le_feat=['Product_Sugar_Content_Corr','Store_Size','Store_Location_City_Type','Store_Type','Product_Id_Cd']
42
+ for i in le_feat:
43
+ X_copy[i] = self.le_prod.transform(X_copy[i])
44
+ except ValueError:
45
+ # Handling unknown categories in production data
46
+ X_copy['Product_Id_Cd'] = -1
47
+
48
+ # Apply LabelEncoder to 'Store_Id'
49
+ try:
50
+ X_copy['Store'] = self.le_store.transform(X_copy['Store_Id'])
51
+ except ValueError:
52
+ X_copy['Store'] = -1
53
+
54
+
55
+ # Droping the features which have been processed into new features already
56
+ rem_feat=['Product_Id','Store_Id','Product_Sugar_Content','Product_Type', 'Store_Establishment_Year']
57
+ X_copy.drop(rem_feat, axis=1, inplace=True)
58
+
59
+ return X_copy