ClusterBuster-API / backend /app /services /profiling_service.py
Adisri99's picture
Upload 26 files
4013eed verified
raw
history blame contribute delete
841 Bytes
def profile_dataframe(df):
numeric_cols = df.select_dtypes(include=["int64", "float64", "int32", "float32"]).columns.tolist()
categorical_cols = df.select_dtypes(include=["object", "bool"]).columns.tolist()
recommended = []
if numeric_cols:
recommended.extend(["kmeans", "mini_batch_kmeans", "birch", "bisecting_kmeans", "gaussian_mixture"])
if categorical_cols:
recommended.extend(["agglomerative", "dbscan", "optics", "spectral", "hdbscan"])
cols = []
for col in df.columns:
cols.append({
"name": col,
"inferred_type": str(df[col].dtype),
"missing_pct": float(df[col].isna().mean()),
"cardinality": int(df[col].nunique(dropna=True)),
})
return {
"columns": cols,
"recommended_algorithms": recommended,
}