Spaces:
Sleeping
Sleeping
Upload server/specialists/augmenter.py with huggingface_hub
Browse files
server/specialists/augmenter.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from imblearn.over_sampling import SMOTE
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def run(df: pd.DataFrame, action: dict) -> dict:
|
| 7 |
+
df_clean = df.dropna()
|
| 8 |
+
if len(df_clean) < 20 or len(set(df_clean["label"])) < 2:
|
| 9 |
+
return {"df": df, "log": "Augmenter skipped — insufficient clean data."}
|
| 10 |
+
X = df_clean.drop("label", axis=1).values
|
| 11 |
+
y = df_clean["label"].values
|
| 12 |
+
try:
|
| 13 |
+
sm = SMOTE(random_state=42)
|
| 14 |
+
X_res, y_res = sm.fit_resample(X, y)
|
| 15 |
+
df_out = pd.DataFrame(X_res, columns=df.columns[:-1])
|
| 16 |
+
df_out["label"] = y_res
|
| 17 |
+
added = len(df_out) - len(df_clean)
|
| 18 |
+
return {"df": df_out, "log": f"Augmenter added {added} synthetic samples."}
|
| 19 |
+
except Exception as e:
|
| 20 |
+
return {"df": df, "log": f"Augmenter failed: {str(e)}"}
|