saherPervaiz commited on
Commit
df63133
·
verified ·
1 Parent(s): 14f03aa

Update utils/data_cleaning.py

Browse files
Files changed (1) hide show
  1. utils/data_cleaning.py +3 -0
utils/data_cleaning.py CHANGED
@@ -34,12 +34,15 @@ def remove_outliers_iqr(df):
34
  """
35
  numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
36
  for col in numerical_cols:
 
37
  Q1 = df[col].quantile(0.25)
38
  Q3 = df[col].quantile(0.75)
39
  IQR = Q3 - Q1
40
  lower_bound = Q1 - 1.5 * IQR
41
  upper_bound = Q3 + 1.5 * IQR
42
  df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]
 
 
43
  return df
44
 
45
  def cap_extreme_values(df):
 
34
  """
35
  numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
36
  for col in numerical_cols:
37
+ original_count = len(df)
38
  Q1 = df[col].quantile(0.25)
39
  Q3 = df[col].quantile(0.75)
40
  IQR = Q3 - Q1
41
  lower_bound = Q1 - 1.5 * IQR
42
  upper_bound = Q3 + 1.5 * IQR
43
  df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]
44
+ removed_rows = original_count - len(df)
45
+ print(f"Removed outliers from **{col}**: {removed_rows} rows removed.")
46
  return df
47
 
48
  def cap_extreme_values(df):