Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| class ProfilerService: | |
| def generate_profile(self, df: pd.DataFrame): | |
| try: | |
| profile = { | |
| "general": { | |
| "row_count": len(df), | |
| "col_count": len(df.columns), | |
| "memory_usage": df.memory_usage(deep=True).sum() / (1024 * 1024), | |
| "duplicate_rows": int(df.duplicated().sum()) | |
| }, | |
| "columns": [] | |
| } | |
| for col in df.columns: | |
| col_data = df[col] | |
| dtype = str(col_data.dtype) | |
| null_count = int(col_data.isnull().sum()) | |
| unique_count = int(col_data.nunique()) | |
| stats = { | |
| "name": col, | |
| "type": dtype, | |
| "null_count": null_count, | |
| "unique_count": unique_count, | |
| "null_percentage": (null_count / len(df)) * 100 | |
| } | |
| if np.issubdtype(col_data.dtype, np.number): | |
| stats.update({ | |
| "min": float(col_data.min()) if not col_data.empty else None, | |
| "max": float(col_data.max()) if not col_data.empty else None, | |
| "mean": float(col_data.mean()) if not col_data.empty else None, | |
| "std": float(col_data.std()) if not col_data.empty else None | |
| }) | |
| profile["columns"].append(stats) | |
| return profile | |
| except Exception as e: | |
| return {"error": str(e)} | |
| profiler_service = ProfilerService() | |