File size: 623 Bytes
9e1d7a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import pandas as pd


def run(df: pd.DataFrame, action: dict) -> dict:
    issues = []
    missing_pct = df.isnull().mean().mean()
    if missing_pct > 0.05:
        issues.append(f"High missing rate: {missing_pct:.1%}")
    if df["label"].value_counts(normalize=True).min() < 0.2:
        issues.append("Class imbalance detected")
    duplicates = df.duplicated().sum()
    if duplicates > 0:
        df = df.drop_duplicates()
        issues.append(f"Removed {duplicates} duplicate rows")
    return {
        "df": df,
        "log": f"Validator report: {'; '.join(issues) if issues else 'No major issues found.'}"
    }