Spaces:
Sleeping
Sleeping
File size: 4,538 Bytes
6de2f28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """
SmartCertify ML β Math Utilities
Linear algebra, statistics, and probability utilities.
"""
import numpy as np
from scipy import stats
from typing import List, Tuple, Optional
# βββ Linear Algebra Utilities βββββββββββββββββββββββββββββββββ
def cosine_similarity_vectors(a: np.ndarray, b: np.ndarray) -> float:
"""Compute cosine similarity between two vectors."""
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
if norm_a == 0 or norm_b == 0:
return 0.0
return float(np.dot(a, b) / (norm_a * norm_b))
def euclidean_distance(a: np.ndarray, b: np.ndarray) -> float:
"""Compute Euclidean distance between two vectors."""
return float(np.linalg.norm(a - b))
def matrix_rank(matrix: np.ndarray) -> int:
"""Compute rank of a matrix."""
return int(np.linalg.matrix_rank(matrix))
def compute_svd(matrix: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Compute Singular Value Decomposition."""
U, S, Vt = np.linalg.svd(matrix, full_matrices=False)
return U, S, Vt
def normalize_vector(v: np.ndarray) -> np.ndarray:
"""L2-normalize a vector."""
norm = np.linalg.norm(v)
if norm == 0:
return v
return v / norm
# βββ Statistics Utilities βββββββββββββββββββββββββββββββββββββ
def compute_confidence_interval(
data: np.ndarray, confidence: float = 0.95
) -> Tuple[float, float]:
"""Compute confidence interval for the mean of data."""
n = len(data)
mean = np.mean(data)
se = stats.sem(data)
h = se * stats.t.ppf((1 + confidence) / 2, n - 1)
return (float(mean - h), float(mean + h))
def compute_z_score(value: float, mean: float, std: float) -> float:
"""Compute z-score for a value given mean and standard deviation."""
if std == 0:
return 0.0
return (value - mean) / std
def compute_p_value(z_score: float, two_tailed: bool = True) -> float:
"""Compute p-value from z-score."""
p = 2 * (1 - stats.norm.cdf(abs(z_score))) if two_tailed else (1 - stats.norm.cdf(z_score))
return float(p)
def ks_test(data: np.ndarray, distribution: str = "norm") -> Tuple[float, float]:
"""Kolmogorov-Smirnov test for distribution fit."""
statistic, p_value = stats.kstest(data, distribution)
return float(statistic), float(p_value)
def compute_entropy(probabilities: np.ndarray) -> float:
"""Compute Shannon entropy of a probability distribution."""
probabilities = probabilities[probabilities > 0]
return float(-np.sum(probabilities * np.log2(probabilities)))
def compute_kl_divergence(p: np.ndarray, q: np.ndarray) -> float:
"""Compute KL divergence D(P || Q)."""
p = np.asarray(p, dtype=np.float64)
q = np.asarray(q, dtype=np.float64)
# Avoid division by zero
mask = (p > 0) & (q > 0)
return float(np.sum(p[mask] * np.log(p[mask] / q[mask])))
# βββ Probability Utilities ββββββββββββββββββββββββββββββββββββ
def gaussian_probability(x: float, mean: float, std: float) -> float:
"""Compute probability density of x under Gaussian distribution."""
return float(stats.norm.pdf(x, loc=mean, scale=std))
def bayesian_update(
prior: float, likelihood: float, evidence: float
) -> float:
"""Apply Bayes' theorem: P(H|E) = P(E|H) * P(H) / P(E)."""
if evidence == 0:
return 0.0
return (likelihood * prior) / evidence
def softmax(x: np.ndarray) -> np.ndarray:
"""Compute softmax probabilities."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum()
# βββ Feature Analysis βββββββββββββββββββββββββββββββββββββββββ
def compute_correlation_matrix(data: np.ndarray) -> np.ndarray:
"""Compute Pearson correlation matrix."""
return np.corrcoef(data, rowvar=False)
def compute_mutual_information(x: np.ndarray, y: np.ndarray, bins: int = 20) -> float:
"""Compute mutual information between two variables."""
hist_2d, _, _ = np.histogram2d(x, y, bins=bins)
pxy = hist_2d / hist_2d.sum()
px = pxy.sum(axis=1)
py = pxy.sum(axis=0)
mi = 0.0
for i in range(bins):
for j in range(bins):
if pxy[i, j] > 0 and px[i] > 0 and py[j] > 0:
mi += pxy[i, j] * np.log2(pxy[i, j] / (px[i] * py[j]))
return mi
|