|
|
|
|
|
"""Statistical Analysis Plugin""" |
|
|
import pandas as pd |
|
|
from typing import Dict, Any |
|
|
|
|
|
class StatisticalAnalyzer: |
|
|
"""Perform statistical analysis on data.""" |
|
|
def analyze(self, df: pd.DataFrame) -> Dict[str, Any]: |
|
|
"""Generate comprehensive statistical summary.""" |
|
|
analysis = {"shape": {"rows": len(df), "columns": len(df.columns)}, "columns": {}} |
|
|
for col in df.columns: |
|
|
col_analysis = {"name": col, "dtype": str(df[col].dtype)} |
|
|
col_analysis["missing_percent"] = float(df[col].isna().mean() * 100) |
|
|
if pd.api.types.is_numeric_dtype(df[col]): |
|
|
col_analysis.update({ |
|
|
"mean": float(df[col].mean()), |
|
|
"std": float(df[col].std()), |
|
|
"min": float(df[col].min()), |
|
|
"max": float(df[col].max()), |
|
|
"median": float(df[col].median()) |
|
|
}) |
|
|
elif pd.api.types.is_datetime64_any_dtype(df[col]): |
|
|
col_analysis.update({"min_date": str(df[col].min()), "max_date": str(df[col].max())}) |
|
|
else: |
|
|
col_analysis.update({"unique_values": int(df[col].nunique())}) |
|
|
analysis["columns"][col] = col_analysis |
|
|
return analysis |
|
|
|