File size: 1,383 Bytes
568dff8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pandas as pd
import numpy as np
from scipy import stats

class DataAnalyzer:
    def analyze(self, data):
        insights = {}

        # Basic statistics
        insights['basic_stats'] = data.describe().to_dict()

        # Correlation analysis
        numeric_columns = data.select_dtypes(include=[np.number]).columns
        if len(numeric_columns) > 1:
            correlation_matrix = data[numeric_columns].corr()
            insights['correlations'] = correlation_matrix.to_dict()

        # Skewness and kurtosis
        skewness = data[numeric_columns].skew()
        kurtosis = data[numeric_columns].kurtosis()
        insights['distribution'] = {
            'skewness': skewness.to_dict(),
            'kurtosis': kurtosis.to_dict()
        }

        # Categorical data analysis
        categorical_columns = data.select_dtypes(include=['object']).columns
        for column in categorical_columns:
            insights[f'{column}_distribution'] = data[column].value_counts().to_dict()

        # Check for normality
        normality_tests = {}
        for column in numeric_columns:
            _, p_value = stats.normaltest(data[column].dropna())
            normality_tests[column] = {
                'is_normal': p_value > 0.05,
                'p_value': p_value
            }
        insights['normality_tests'] = normality_tests

        return insights