File size: 1,383 Bytes
568dff8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import pandas as pd
import numpy as np
from scipy import stats
class DataAnalyzer:
def analyze(self, data):
insights = {}
# Basic statistics
insights['basic_stats'] = data.describe().to_dict()
# Correlation analysis
numeric_columns = data.select_dtypes(include=[np.number]).columns
if len(numeric_columns) > 1:
correlation_matrix = data[numeric_columns].corr()
insights['correlations'] = correlation_matrix.to_dict()
# Skewness and kurtosis
skewness = data[numeric_columns].skew()
kurtosis = data[numeric_columns].kurtosis()
insights['distribution'] = {
'skewness': skewness.to_dict(),
'kurtosis': kurtosis.to_dict()
}
# Categorical data analysis
categorical_columns = data.select_dtypes(include=['object']).columns
for column in categorical_columns:
insights[f'{column}_distribution'] = data[column].value_counts().to_dict()
# Check for normality
normality_tests = {}
for column in numeric_columns:
_, p_value = stats.normaltest(data[column].dropna())
normality_tests[column] = {
'is_normal': p_value > 0.05,
'p_value': p_value
}
insights['normality_tests'] = normality_tests
return insights |