File size: 841 Bytes
4013eed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def profile_dataframe(df):
    numeric_cols = df.select_dtypes(include=["int64", "float64", "int32", "float32"]).columns.tolist()
    categorical_cols = df.select_dtypes(include=["object", "bool"]).columns.tolist()

    recommended = []
    if numeric_cols:
        recommended.extend(["kmeans", "mini_batch_kmeans", "birch", "bisecting_kmeans", "gaussian_mixture"])
    if categorical_cols:
        recommended.extend(["agglomerative", "dbscan", "optics", "spectral", "hdbscan"])

    cols = []
    for col in df.columns:
        cols.append({
            "name": col,
            "inferred_type": str(df[col].dtype),
            "missing_pct": float(df[col].isna().mean()),
            "cardinality": int(df[col].nunique(dropna=True)),
        })

    return {
        "columns": cols,
        "recommended_algorithms": recommended,
    }