File size: 935 Bytes
9d08bab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
#!/usr/bin/env python3
"""Schema Detector Plugin"""
import pandas as pd
from typing import Dict, Any
class SchemaDetector:
"""Detects and reports data schema."""
def get_schema(self, df: pd.DataFrame) -> Dict[str, Any]:
schema = {}
for col in df.columns:
dtype = str(df[col].dtype)
if pd.api.types.is_numeric_dtype(df[col]):
base_type = "Numeric"
elif pd.api.types.is_datetime64_any_dtype(df[col]):
base_type = "Datetime"
elif df[col].nunique() < min(10, len(df) / 5):
base_type = "Categorical"
else:
base_type = "Text/Object"
schema[col] = {
"inferred_type": base_type,
"pandas_dtype": dtype,
"non_null_count": int(df[col].count()),
"unique_values": int(df[col].nunique())
}
return schema
|