JatinAutonomousLabs's picture
Upload 4 files
9d08bab verified
raw
history blame contribute delete
935 Bytes
#!/usr/bin/env python3
"""Schema Detector Plugin"""
import pandas as pd
from typing import Dict, Any
class SchemaDetector:
"""Detects and reports data schema."""
def get_schema(self, df: pd.DataFrame) -> Dict[str, Any]:
schema = {}
for col in df.columns:
dtype = str(df[col].dtype)
if pd.api.types.is_numeric_dtype(df[col]):
base_type = "Numeric"
elif pd.api.types.is_datetime64_any_dtype(df[col]):
base_type = "Datetime"
elif df[col].nunique() < min(10, len(df) / 5):
base_type = "Categorical"
else:
base_type = "Text/Object"
schema[col] = {
"inferred_type": base_type,
"pandas_dtype": dtype,
"non_null_count": int(df[col].count()),
"unique_values": int(df[col].nunique())
}
return schema