def profile_dataframe(df): numeric_cols = df.select_dtypes(include=["int64", "float64", "int32", "float32"]).columns.tolist() categorical_cols = df.select_dtypes(include=["object", "bool"]).columns.tolist() recommended = [] if numeric_cols: recommended.extend(["kmeans", "mini_batch_kmeans", "birch", "bisecting_kmeans", "gaussian_mixture"]) if categorical_cols: recommended.extend(["agglomerative", "dbscan", "optics", "spectral", "hdbscan"]) cols = [] for col in df.columns: cols.append({ "name": col, "inferred_type": str(df[col].dtype), "missing_pct": float(df[col].isna().mean()), "cardinality": int(df[col].nunique(dropna=True)), }) return { "columns": cols, "recommended_algorithms": recommended, }