Spaces:
Sleeping
Sleeping
Upload outliercapper.py with huggingface_hub
Browse files- outliercapper.py +35 -0
outliercapper.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class OutlierCapper(BaseEstimator, TransformerMixin):
|
| 2 |
+
|
| 3 |
+
def fit(self, X, y=None):
|
| 4 |
+
|
| 5 |
+
self.bounds = []
|
| 6 |
+
|
| 7 |
+
# If X is a DataFrame, convert to numpy array for percentile calculation to avoid FutureWarning
|
| 8 |
+
X_np = X.values if isinstance(X, pd.DataFrame) else X
|
| 9 |
+
|
| 10 |
+
for i in range(X_np.shape[1]):
|
| 11 |
+
Q1 = np.percentile(X_np[:, i], 25)
|
| 12 |
+
Q3 = np.percentile(X_np[:, i], 75)
|
| 13 |
+
IQR = Q3 - Q1
|
| 14 |
+
self.bounds.append((Q1-1.5*IQR, Q3+1.5*IQR))
|
| 15 |
+
|
| 16 |
+
return self
|
| 17 |
+
|
| 18 |
+
def transform(self, X):
|
| 19 |
+
|
| 20 |
+
# If X is a DataFrame, convert to numpy array for manipulation, then back to DataFrame if needed
|
| 21 |
+
X_transformed = X.copy()
|
| 22 |
+
if isinstance(X_transformed, pd.DataFrame):
|
| 23 |
+
column_names = X_transformed.columns
|
| 24 |
+
X_np = X_transformed.values
|
| 25 |
+
else:
|
| 26 |
+
column_names = None # Column names are lost if X is already numpy
|
| 27 |
+
X_np = X_transformed
|
| 28 |
+
|
| 29 |
+
for i, (low, high) in enumerate(self.bounds):
|
| 30 |
+
X_np[:, i] = np.clip(X_np[:, i], low, high)
|
| 31 |
+
|
| 32 |
+
if column_names is not None:
|
| 33 |
+
return pd.DataFrame(X_np, columns=column_names) # Return DataFrame to preserve column names
|
| 34 |
+
else:
|
| 35 |
+
return X_np # Return numpy array if no original column names
|