File size: 579 Bytes
9eecab5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import pandas as pd


def extract_schema(df: pd.DataFrame):

    schema = {}

    schema["rows"] = len(df)

    schema["columns"] = list(df.columns)

    schema["numeric_columns"] = list(
        df.select_dtypes(include=["number"]).columns
    )

    schema["categorical_columns"] = list(
        df.select_dtypes(include=["object", "category"]).columns
    )

    schema["missing_values"] = (
        df.isnull().mean().round(4).to_dict()
    )

    # NEW FIELD
    schema["column_types"] = {
        col: str(dtype) for col, dtype in df.dtypes.items()
    }

    return schema