|
|
|
|
|
"""Schema Detector Plugin""" |
|
|
import pandas as pd |
|
|
from typing import Dict, Any |
|
|
|
|
|
class SchemaDetector: |
|
|
"""Detects and reports data schema.""" |
|
|
def get_schema(self, df: pd.DataFrame) -> Dict[str, Any]: |
|
|
schema = {} |
|
|
for col in df.columns: |
|
|
dtype = str(df[col].dtype) |
|
|
if pd.api.types.is_numeric_dtype(df[col]): |
|
|
base_type = "Numeric" |
|
|
elif pd.api.types.is_datetime64_any_dtype(df[col]): |
|
|
base_type = "Datetime" |
|
|
elif df[col].nunique() < min(10, len(df) / 5): |
|
|
base_type = "Categorical" |
|
|
else: |
|
|
base_type = "Text/Object" |
|
|
schema[col] = { |
|
|
"inferred_type": base_type, |
|
|
"pandas_dtype": dtype, |
|
|
"non_null_count": int(df[col].count()), |
|
|
"unique_values": int(df[col].nunique()) |
|
|
} |
|
|
return schema |
|
|
|