VicGerardoPR's picture
Update services/profiler.py
260fb79 verified
import pandas as pd
import numpy as np
class ProfilerService:
def generate_profile(self, df: pd.DataFrame):
try:
profile = {
"general": {
"row_count": len(df),
"col_count": len(df.columns),
"memory_usage": df.memory_usage(deep=True).sum() / (1024 * 1024),
"duplicate_rows": int(df.duplicated().sum())
},
"columns": []
}
for col in df.columns:
col_data = df[col]
dtype = str(col_data.dtype)
null_count = int(col_data.isnull().sum())
unique_count = int(col_data.nunique())
stats = {
"name": col,
"type": dtype,
"null_count": null_count,
"unique_count": unique_count,
"null_percentage": (null_count / len(df)) * 100
}
if np.issubdtype(col_data.dtype, np.number):
stats.update({
"min": float(col_data.min()) if not col_data.empty else None,
"max": float(col_data.max()) if not col_data.empty else None,
"mean": float(col_data.mean()) if not col_data.empty else None,
"std": float(col_data.std()) if not col_data.empty else None
})
profile["columns"].append(stats)
return profile
except Exception as e:
return {"error": str(e)}
profiler_service = ProfilerService()