Spaces:
Sleeping
Sleeping
| def profile_dataframe(df): | |
| numeric_cols = df.select_dtypes(include=["int64", "float64", "int32", "float32"]).columns.tolist() | |
| categorical_cols = df.select_dtypes(include=["object", "bool"]).columns.tolist() | |
| recommended = [] | |
| if numeric_cols: | |
| recommended.extend(["kmeans", "mini_batch_kmeans", "birch", "bisecting_kmeans", "gaussian_mixture"]) | |
| if categorical_cols: | |
| recommended.extend(["agglomerative", "dbscan", "optics", "spectral", "hdbscan"]) | |
| cols = [] | |
| for col in df.columns: | |
| cols.append({ | |
| "name": col, | |
| "inferred_type": str(df[col].dtype), | |
| "missing_pct": float(df[col].isna().mean()), | |
| "cardinality": int(df[col].nunique(dropna=True)), | |
| }) | |
| return { | |
| "columns": cols, | |
| "recommended_algorithms": recommended, | |
| } | |