simplexuq-code / src /metrics /setsize.py
anonymous0523ly's picture
Initial anonymous code release
fc329a3 verified
raw
history blame
3.16 kB
"""Set size metrics."""
import numpy as np
def mean_radius(radius: np.ndarray) -> float:
return radius.mean()
def radius_by_strata(
radius: np.ndarray,
strata: np.ndarray,
) -> dict[int, float]:
result = {}
for s in np.unique(strata):
mask = strata == s
if mask.sum() > 0:
result[int(s)] = radius[mask].mean()
return result
def _distance_to_center(
samples: np.ndarray,
center: np.ndarray,
score: str,
) -> np.ndarray:
if score == "aitchison":
from src.utils.simplex import aitchison_dist
tiled_center = np.repeat(center[None, :], len(samples), axis=0)
return aitchison_dist(samples, tiled_center)
if score in {"tv", "total_variation", "l1"}:
return 0.5 * np.sum(np.abs(samples - center[None, :]), axis=1)
raise ValueError(f"Unsupported score for volume ratio: {score}")
def volume_ratio(
center: np.ndarray,
radius: float,
*,
score: str = "aitchison",
n_mc: int = 20000,
rng: np.random.Generator | None = None,
) -> float:
"""Monte Carlo estimate of simplex volume covered by the score ball.
The estimate is the fraction of uniformly sampled simplex points whose
distance to `center` is at most `radius`.
"""
center = np.asarray(center, dtype=float)
if rng is None:
rng = np.random.default_rng(0)
samples = rng.dirichlet(np.ones(center.shape[-1]), size=n_mc)
d = _distance_to_center(samples, center, score)
return float(np.mean(d <= radius))
def mean_volume_ratio(
centers: np.ndarray,
radius: np.ndarray,
*,
score: str = "aitchison",
n_mc: int = 20000,
max_points: int | None = None,
rng: np.random.Generator | None = None,
) -> float:
"""Average Monte Carlo simplex-volume ratio across test points."""
centers = np.asarray(centers, dtype=float)
radius = np.asarray(radius, dtype=float)
if rng is None:
rng = np.random.default_rng(0)
idx = np.arange(len(centers))
if max_points is not None and len(idx) > max_points:
idx = rng.choice(idx, size=max_points, replace=False)
vals = [
volume_ratio(centers[i], radius[i], score=score, n_mc=n_mc, rng=rng)
for i in idx
]
return float(np.mean(vals))
def volume_ratio_by_strata(
centers: np.ndarray,
radius: np.ndarray,
strata: np.ndarray,
*,
score: str = "aitchison",
n_mc: int = 20000,
max_points: int | None = None,
rng: np.random.Generator | None = None,
) -> dict[int, float]:
"""Average simplex-volume ratio per prediction-space stratum."""
centers = np.asarray(centers, dtype=float)
radius = np.asarray(radius, dtype=float)
strata = np.asarray(strata)
if rng is None:
rng = np.random.default_rng(0)
result = {}
for s in np.unique(strata):
mask = strata == s
centers_s = centers[mask]
radius_s = radius[mask]
result[int(s)] = mean_volume_ratio(
centers_s,
radius_s,
score=score,
n_mc=n_mc,
max_points=max_points,
rng=rng,
)
return result