| """Set size metrics.""" |
| import numpy as np |
|
|
|
|
| def mean_radius(radius: np.ndarray) -> float: |
| return radius.mean() |
|
|
|
|
| def radius_by_strata( |
| radius: np.ndarray, |
| strata: np.ndarray, |
| ) -> dict[int, float]: |
| result = {} |
| for s in np.unique(strata): |
| mask = strata == s |
| if mask.sum() > 0: |
| result[int(s)] = radius[mask].mean() |
| return result |
|
|
|
|
| def _distance_to_center( |
| samples: np.ndarray, |
| center: np.ndarray, |
| score: str, |
| ) -> np.ndarray: |
| if score == "aitchison": |
| from src.utils.simplex import aitchison_dist |
|
|
| tiled_center = np.repeat(center[None, :], len(samples), axis=0) |
| return aitchison_dist(samples, tiled_center) |
| if score in {"tv", "total_variation", "l1"}: |
| return 0.5 * np.sum(np.abs(samples - center[None, :]), axis=1) |
| raise ValueError(f"Unsupported score for volume ratio: {score}") |
|
|
|
|
| def volume_ratio( |
| center: np.ndarray, |
| radius: float, |
| *, |
| score: str = "aitchison", |
| n_mc: int = 20000, |
| rng: np.random.Generator | None = None, |
| ) -> float: |
| """Monte Carlo estimate of simplex volume covered by the score ball. |
| |
| The estimate is the fraction of uniformly sampled simplex points whose |
| distance to `center` is at most `radius`. |
| """ |
| center = np.asarray(center, dtype=float) |
| if rng is None: |
| rng = np.random.default_rng(0) |
| samples = rng.dirichlet(np.ones(center.shape[-1]), size=n_mc) |
| d = _distance_to_center(samples, center, score) |
| return float(np.mean(d <= radius)) |
|
|
|
|
| def mean_volume_ratio( |
| centers: np.ndarray, |
| radius: np.ndarray, |
| *, |
| score: str = "aitchison", |
| n_mc: int = 20000, |
| max_points: int | None = None, |
| rng: np.random.Generator | None = None, |
| ) -> float: |
| """Average Monte Carlo simplex-volume ratio across test points.""" |
| centers = np.asarray(centers, dtype=float) |
| radius = np.asarray(radius, dtype=float) |
| if rng is None: |
| rng = np.random.default_rng(0) |
|
|
| idx = np.arange(len(centers)) |
| if max_points is not None and len(idx) > max_points: |
| idx = rng.choice(idx, size=max_points, replace=False) |
|
|
| vals = [ |
| volume_ratio(centers[i], radius[i], score=score, n_mc=n_mc, rng=rng) |
| for i in idx |
| ] |
| return float(np.mean(vals)) |
|
|
|
|
| def volume_ratio_by_strata( |
| centers: np.ndarray, |
| radius: np.ndarray, |
| strata: np.ndarray, |
| *, |
| score: str = "aitchison", |
| n_mc: int = 20000, |
| max_points: int | None = None, |
| rng: np.random.Generator | None = None, |
| ) -> dict[int, float]: |
| """Average simplex-volume ratio per prediction-space stratum.""" |
| centers = np.asarray(centers, dtype=float) |
| radius = np.asarray(radius, dtype=float) |
| strata = np.asarray(strata) |
| if rng is None: |
| rng = np.random.default_rng(0) |
|
|
| result = {} |
| for s in np.unique(strata): |
| mask = strata == s |
| centers_s = centers[mask] |
| radius_s = radius[mask] |
| result[int(s)] = mean_volume_ratio( |
| centers_s, |
| radius_s, |
| score=score, |
| n_mc=n_mc, |
| max_points=max_points, |
| rng=rng, |
| ) |
| return result |
|
|