Aswini-Kumar commited on
Commit
b76a099
·
verified ·
1 Parent(s): a0be0f2

Upload server/specialists/cleaner.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. server/specialists/cleaner.py +25 -0
server/specialists/cleaner.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ def run(df: pd.DataFrame, action: dict) -> dict:
5
+ target_col = action.get("target", "all")
6
+ strategy = action.get("strategy", "median_impute")
7
+ df_out = df.copy()
8
+
9
+ cols = [target_col] if target_col != "all" else df.columns[:-1].tolist()
10
+
11
+ for col in cols:
12
+ if col not in df_out.columns or col == "label":
13
+ continue
14
+ if strategy == "median_impute":
15
+ df_out[col] = df_out[col].fillna(df_out[col].median())
16
+ elif strategy == "mean_impute":
17
+ df_out[col] = df_out[col].fillna(df_out[col].mean())
18
+ elif strategy == "drop_rows":
19
+ df_out = df_out.dropna(subset=[col])
20
+
21
+ reduced = df.isnull().sum().sum() - df_out.isnull().sum().sum()
22
+ return {
23
+ "df": df_out,
24
+ "log": f"Cleaner applied {strategy} to {cols}. Missing reduced by {reduced} cells."
25
+ }