File size: 1,278 Bytes
e5bc014
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python3
"""Statistical Analysis Plugin"""
import pandas as pd
from typing import Dict, Any

class StatisticalAnalyzer:
    """Perform statistical analysis on data."""
    def analyze(self, df: pd.DataFrame) -> Dict[str, Any]:
        """Generate comprehensive statistical summary."""
        analysis = {"shape": {"rows": len(df), "columns": len(df.columns)}, "columns": {}}
        for col in df.columns:
            col_analysis = {"name": col, "dtype": str(df[col].dtype)}
            col_analysis["missing_percent"] = float(df[col].isna().mean() * 100)
            if pd.api.types.is_numeric_dtype(df[col]):
                col_analysis.update({
                    "mean": float(df[col].mean()), 
                    "std": float(df[col].std()),
                    "min": float(df[col].min()), 
                    "max": float(df[col].max()),
                    "median": float(df[col].median())
                })
            elif pd.api.types.is_datetime64_any_dtype(df[col]):
                col_analysis.update({"min_date": str(df[col].min()), "max_date": str(df[col].max())})
            else: 
                col_analysis.update({"unique_values": int(df[col].nunique())})
            analysis["columns"][col] = col_analysis
        return analysis