File size: 1,135 Bytes
1067825 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | import pandas as pd
def run_profile(df):
rows, cols = df.shape
rows = int(rows)
cols = int(cols)
columns = df.columns.to_list()
column_details = []
numeric_count = 0
for col in columns:
null_count = int(df[col].isnull().sum())
null_percentage = round((null_count / rows) * 100, 2)
column = {
col: str(df[col].dtype),
"null_count": null_count,
"null_percentage": null_percentage,
"high_null_warning": null_percentage > 50
}
if pd.api.types.is_numeric_dtype(df[col]):
numeric_count += 1
column["mean"] = round(float(df[col].mean()), 2)
column["median"] = round(float(df[col].median()), 2)
column["min"] = round(float(df[col].min()), 2)
column["max"] = round(float(df[col].max()), 2)
column_details.append(column)
return {
"rows": rows,
"columns": cols,
"column_name": df.columns.to_list(),
"column_details": column_details,
"numeric_column_count": numeric_count
} |