Spaces:
Sleeping
Sleeping
File size: 1,386 Bytes
c8e6478 8d0def9 9a2ba50 d4a02b1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
class OutlierCapper(BaseEstimator, TransformerMixin):
def fit(self, X, y=None):
self.bounds = []
# If X is a DataFrame, convert to numpy array for percentile calculation to avoid FutureWarning
X_np = X.values if isinstance(X, pd.DataFrame) else X
for i in range(X_np.shape[1]):
Q1 = np.percentile(X_np[:, i], 25)
Q3 = np.percentile(X_np[:, i], 75)
IQR = Q3 - Q1
self.bounds.append((Q1-1.5*IQR, Q3+1.5*IQR))
return self
def transform(self, X):
# If X is a DataFrame, convert to numpy array for manipulation, then back to DataFrame if needed
X_transformed = X.copy()
if isinstance(X_transformed, pd.DataFrame):
column_names = X_transformed.columns
X_np = X_transformed.values
else:
column_names = None # Column names are lost if X is already numpy
X_np = X_transformed
for i, (low, high) in enumerate(self.bounds):
X_np[:, i] = np.clip(X_np[:, i], low, high)
if column_names is not None:
return pd.DataFrame(X_np, columns=column_names) # Return DataFrame to preserve column names
else:
return X_np # Return numpy array if no original column names
|