Spaces:
Sleeping
Sleeping
File size: 841 Bytes
4013eed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 | def profile_dataframe(df):
numeric_cols = df.select_dtypes(include=["int64", "float64", "int32", "float32"]).columns.tolist()
categorical_cols = df.select_dtypes(include=["object", "bool"]).columns.tolist()
recommended = []
if numeric_cols:
recommended.extend(["kmeans", "mini_batch_kmeans", "birch", "bisecting_kmeans", "gaussian_mixture"])
if categorical_cols:
recommended.extend(["agglomerative", "dbscan", "optics", "spectral", "hdbscan"])
cols = []
for col in df.columns:
cols.append({
"name": col,
"inferred_type": str(df[col].dtype),
"missing_pct": float(df[col].isna().mean()),
"cardinality": int(df[col].nunique(dropna=True)),
})
return {
"columns": cols,
"recommended_algorithms": recommended,
}
|