Spaces:
Sleeping
Sleeping
Upload server/specialists/cleaner.py with huggingface_hub
Browse files
server/specialists/cleaner.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def run(df: pd.DataFrame, action: dict) -> dict:
|
| 5 |
+
target_col = action.get("target", "all")
|
| 6 |
+
strategy = action.get("strategy", "median_impute")
|
| 7 |
+
df_out = df.copy()
|
| 8 |
+
|
| 9 |
+
cols = [target_col] if target_col != "all" else df.columns[:-1].tolist()
|
| 10 |
+
|
| 11 |
+
for col in cols:
|
| 12 |
+
if col not in df_out.columns or col == "label":
|
| 13 |
+
continue
|
| 14 |
+
if strategy == "median_impute":
|
| 15 |
+
df_out[col] = df_out[col].fillna(df_out[col].median())
|
| 16 |
+
elif strategy == "mean_impute":
|
| 17 |
+
df_out[col] = df_out[col].fillna(df_out[col].mean())
|
| 18 |
+
elif strategy == "drop_rows":
|
| 19 |
+
df_out = df_out.dropna(subset=[col])
|
| 20 |
+
|
| 21 |
+
reduced = df.isnull().sum().sum() - df_out.isnull().sum().sum()
|
| 22 |
+
return {
|
| 23 |
+
"df": df_out,
|
| 24 |
+
"log": f"Cleaner applied {strategy} to {cols}. Missing reduced by {reduced} cells."
|
| 25 |
+
}
|