File size: 4,538 Bytes
6de2f28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
SmartCertify ML β€” Math Utilities
Linear algebra, statistics, and probability utilities.
"""

import numpy as np
from scipy import stats
from typing import List, Tuple, Optional


# ─── Linear Algebra Utilities ─────────────────────────────────

def cosine_similarity_vectors(a: np.ndarray, b: np.ndarray) -> float:
    """Compute cosine similarity between two vectors."""
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    if norm_a == 0 or norm_b == 0:
        return 0.0
    return float(np.dot(a, b) / (norm_a * norm_b))


def euclidean_distance(a: np.ndarray, b: np.ndarray) -> float:
    """Compute Euclidean distance between two vectors."""
    return float(np.linalg.norm(a - b))


def matrix_rank(matrix: np.ndarray) -> int:
    """Compute rank of a matrix."""
    return int(np.linalg.matrix_rank(matrix))


def compute_svd(matrix: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Compute Singular Value Decomposition."""
    U, S, Vt = np.linalg.svd(matrix, full_matrices=False)
    return U, S, Vt


def normalize_vector(v: np.ndarray) -> np.ndarray:
    """L2-normalize a vector."""
    norm = np.linalg.norm(v)
    if norm == 0:
        return v
    return v / norm


# ─── Statistics Utilities ─────────────────────────────────────

def compute_confidence_interval(
    data: np.ndarray, confidence: float = 0.95
) -> Tuple[float, float]:
    """Compute confidence interval for the mean of data."""
    n = len(data)
    mean = np.mean(data)
    se = stats.sem(data)
    h = se * stats.t.ppf((1 + confidence) / 2, n - 1)
    return (float(mean - h), float(mean + h))


def compute_z_score(value: float, mean: float, std: float) -> float:
    """Compute z-score for a value given mean and standard deviation."""
    if std == 0:
        return 0.0
    return (value - mean) / std


def compute_p_value(z_score: float, two_tailed: bool = True) -> float:
    """Compute p-value from z-score."""
    p = 2 * (1 - stats.norm.cdf(abs(z_score))) if two_tailed else (1 - stats.norm.cdf(z_score))
    return float(p)


def ks_test(data: np.ndarray, distribution: str = "norm") -> Tuple[float, float]:
    """Kolmogorov-Smirnov test for distribution fit."""
    statistic, p_value = stats.kstest(data, distribution)
    return float(statistic), float(p_value)


def compute_entropy(probabilities: np.ndarray) -> float:
    """Compute Shannon entropy of a probability distribution."""
    probabilities = probabilities[probabilities > 0]
    return float(-np.sum(probabilities * np.log2(probabilities)))


def compute_kl_divergence(p: np.ndarray, q: np.ndarray) -> float:
    """Compute KL divergence D(P || Q)."""
    p = np.asarray(p, dtype=np.float64)
    q = np.asarray(q, dtype=np.float64)
    # Avoid division by zero
    mask = (p > 0) & (q > 0)
    return float(np.sum(p[mask] * np.log(p[mask] / q[mask])))


# ─── Probability Utilities ────────────────────────────────────

def gaussian_probability(x: float, mean: float, std: float) -> float:
    """Compute probability density of x under Gaussian distribution."""
    return float(stats.norm.pdf(x, loc=mean, scale=std))


def bayesian_update(
    prior: float, likelihood: float, evidence: float
) -> float:
    """Apply Bayes' theorem: P(H|E) = P(E|H) * P(H) / P(E)."""
    if evidence == 0:
        return 0.0
    return (likelihood * prior) / evidence


def softmax(x: np.ndarray) -> np.ndarray:
    """Compute softmax probabilities."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()


# ─── Feature Analysis ─────────────────────────────────────────

def compute_correlation_matrix(data: np.ndarray) -> np.ndarray:
    """Compute Pearson correlation matrix."""
    return np.corrcoef(data, rowvar=False)


def compute_mutual_information(x: np.ndarray, y: np.ndarray, bins: int = 20) -> float:
    """Compute mutual information between two variables."""
    hist_2d, _, _ = np.histogram2d(x, y, bins=bins)
    pxy = hist_2d / hist_2d.sum()
    px = pxy.sum(axis=1)
    py = pxy.sum(axis=0)

    mi = 0.0
    for i in range(bins):
        for j in range(bins):
            if pxy[i, j] > 0 and px[i] > 0 and py[j] > 0:
                mi += pxy[i, j] * np.log2(pxy[i, j] / (px[i] * py[j]))
    return mi