sudhirpgcmma02 commited on
Commit
d4a02b1
·
verified ·
1 Parent(s): a82e8ef

Upload outliercapper.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. outliercapper.py +35 -0
outliercapper.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class OutlierCapper(BaseEstimator, TransformerMixin):
2
+
3
+ def fit(self, X, y=None):
4
+
5
+ self.bounds = []
6
+
7
+ # If X is a DataFrame, convert to numpy array for percentile calculation to avoid FutureWarning
8
+ X_np = X.values if isinstance(X, pd.DataFrame) else X
9
+
10
+ for i in range(X_np.shape[1]):
11
+ Q1 = np.percentile(X_np[:, i], 25)
12
+ Q3 = np.percentile(X_np[:, i], 75)
13
+ IQR = Q3 - Q1
14
+ self.bounds.append((Q1-1.5*IQR, Q3+1.5*IQR))
15
+
16
+ return self
17
+
18
+ def transform(self, X):
19
+
20
+ # If X is a DataFrame, convert to numpy array for manipulation, then back to DataFrame if needed
21
+ X_transformed = X.copy()
22
+ if isinstance(X_transformed, pd.DataFrame):
23
+ column_names = X_transformed.columns
24
+ X_np = X_transformed.values
25
+ else:
26
+ column_names = None # Column names are lost if X is already numpy
27
+ X_np = X_transformed
28
+
29
+ for i, (low, high) in enumerate(self.bounds):
30
+ X_np[:, i] = np.clip(X_np[:, i], low, high)
31
+
32
+ if column_names is not None:
33
+ return pd.DataFrame(X_np, columns=column_names) # Return DataFrame to preserve column names
34
+ else:
35
+ return X_np # Return numpy array if no original column names