jzou19950715 commited on
Commit
4a3a3c3
·
verified ·
1 Parent(s): 369e574

Create components/statistical.py

Browse files
Files changed (1) hide show
  1. components/statistical.py +92 -0
components/statistical.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # components/statistical.py
2
+
3
+ import numpy as np
4
+ from scipy import stats
5
+ from typing import Dict, List, Optional, Union
6
+ import pandas as pd
7
+
8
+ class StatisticalAnalyzer:
9
+ """Statistical analysis component"""
10
+
11
+ @staticmethod
12
+ def analyze_distribution(data: Union[List[float], np.ndarray]) -> Dict:
13
+ """Analyze data distribution"""
14
+ result = {
15
+ "n_samples": len(data),
16
+ "mean": float(np.mean(data)),
17
+ "std": float(np.std(data)),
18
+ "median": float(np.median(data)),
19
+ "skewness": float(stats.skew(data)),
20
+ "kurtosis": float(stats.kurtosis(data))
21
+ }
22
+
23
+ # Test for normality
24
+ statistic, p_value = stats.normaltest(data)
25
+ result["normality_test"] = {
26
+ "statistic": float(statistic),
27
+ "p_value": float(p_value),
28
+ "is_normal": p_value > 0.05
29
+ }
30
+
31
+ return result
32
+
33
+ @staticmethod
34
+ def calculate_confidence_interval(
35
+ data: Union[List[float], np.ndarray],
36
+ confidence: float = 0.95
37
+ ) -> Dict:
38
+ """Calculate confidence intervals"""
39
+ mean = np.mean(data)
40
+ std_err = stats.sem(data)
41
+ ci = stats.t.interval(confidence, len(data)-1, loc=mean, scale=std_err)
42
+
43
+ return {
44
+ "mean": float(mean),
45
+ "ci_lower": float(ci[0]),
46
+ "ci_upper": float(ci[1]),
47
+ "confidence": confidence
48
+ }
49
+
50
+ @staticmethod
51
+ def forecast_probability_cone(
52
+ data: Union[List[float], np.ndarray],
53
+ steps: int = 10,
54
+ confidence: float = 0.95
55
+ ) -> Dict:
56
+ """Generate probability cone forecast"""
57
+ mean = np.mean(data)
58
+ std_err = stats.sem(data)
59
+ t_value = stats.t.ppf((1 + confidence) / 2, len(data) - 1)
60
+
61
+ time_points = list(range(steps))
62
+ means = [mean] * steps
63
+ errors = [t_value * std_err * np.sqrt(1 + i/len(data))
64
+ for i in range(steps)]
65
+
66
+ return {
67
+ "time": time_points,
68
+ "mean": means,
69
+ "lower": [m - e for m, e in zip(means, errors)],
70
+ "upper": [m + e for m, e in zip(means, errors)]
71
+ }
72
+
73
+ @staticmethod
74
+ def analyze_correlations(df: pd.DataFrame) -> Dict:
75
+ """Analyze correlations between variables"""
76
+ corr_matrix = df.corr()
77
+
78
+ # Find significant correlations
79
+ significant = []
80
+ for i in range(len(corr_matrix.columns)):
81
+ for j in range(i+1, len(corr_matrix.columns)):
82
+ if abs(corr_matrix.iloc[i,j]) > 0.5:
83
+ significant.append({
84
+ "var1": corr_matrix.columns[i],
85
+ "var2": corr_matrix.columns[j],
86
+ "correlation": float(corr_matrix.iloc[i,j])
87
+ })
88
+
89
+ return {
90
+ "correlation_matrix": corr_matrix.to_dict(),
91
+ "significant_correlations": significant
92
+ }