saherPervaiz commited on
Commit
14f03aa
·
verified ·
1 Parent(s): d935af2

Update utils/data_cleaning.py

Browse files
Files changed (1) hide show
  1. utils/data_cleaning.py +10 -9
utils/data_cleaning.py CHANGED
@@ -22,23 +22,24 @@ def handle_missing_values(df, method='Drop rows'):
22
  df[col].fillna(df[col].mode()[0], inplace=True)
23
  return df
24
 
25
- def remove_outliers_iqr(df, col):
26
  """
27
- Remove outliers using the IQR (Interquartile Range) method for a specific column.
28
 
29
  Parameters:
30
  - df: The input DataFrame.
31
- - col: The specific column to remove outliers from.
32
 
33
  Returns:
34
  - df: The DataFrame after removing outliers.
35
  """
36
- Q1 = df[col].quantile(0.25)
37
- Q3 = df[col].quantile(0.75)
38
- IQR = Q3 - Q1
39
- lower_bound = Q1 - 1.5 * IQR
40
- upper_bound = Q3 + 1.5 * IQR
41
- df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]
 
 
42
  return df
43
 
44
  def cap_extreme_values(df):
 
22
  df[col].fillna(df[col].mode()[0], inplace=True)
23
  return df
24
 
25
+ def remove_outliers_iqr(df):
26
  """
27
+ Remove outliers using the IQR (Interquartile Range) method for all numerical columns.
28
 
29
  Parameters:
30
  - df: The input DataFrame.
 
31
 
32
  Returns:
33
  - df: The DataFrame after removing outliers.
34
  """
35
+ numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
36
+ for col in numerical_cols:
37
+ Q1 = df[col].quantile(0.25)
38
+ Q3 = df[col].quantile(0.75)
39
+ IQR = Q3 - Q1
40
+ lower_bound = Q1 - 1.5 * IQR
41
+ upper_bound = Q3 + 1.5 * IQR
42
+ df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]
43
  return df
44
 
45
  def cap_extreme_values(df):