""" ProbVLM-Style Probabilistic Adapter for Uncertainty Estimation. Converts point embeddings into distributions (Generalized Gaussian) following the BayesCap approach from ProbVLM. Each adapter takes a frozen embedding and predicts: mu: Shift from the input embedding (residual) alpha: Scale parameter (controls spread) beta: Shape parameter (controls tail behavior) These define a Generalized Gaussian distribution: p(x) ∝ exp(-(|x - mu| / alpha)^beta) MC sampling from this distribution produces N embedding samples, which propagate uncertainty through the Gramian volume computation. Architecture: BayesCap_MLP input → Linear(d, hidden) → ReLU → Dropout → Linear(hidden, hidden) → ReLU → Dropout → Three heads: mu_head, alpha_head, beta_head """ from __future__ import annotations import logging from pathlib import Path from typing import Dict, Optional, Tuple import numpy as np logger = logging.getLogger(__name__) try: import torch import torch.nn as nn import torch.nn.functional as F TORCH_AVAILABLE = True except ImportError: TORCH_AVAILABLE = False def _check_torch(): if not TORCH_AVAILABLE: raise ImportError("PyTorch required for ProbabilisticAdapter") class ProbabilisticAdapter(nn.Module): """ BayesCap-style adapter that maps point embeddings to distributions. Takes a frozen embedding (from CLIP or CLAP) and predicts Generalized Gaussian parameters: (mu, alpha, beta). The adapter is lightweight (~0.5M params) and trains in minutes on small datasets. """ def __init__( self, input_dim: int = 512, hidden_dim: int = 256, num_layers: int = 3, dropout: float = 0.1, ): _check_torch() super().__init__() self.input_dim = input_dim # Shared backbone layers = [] in_d = input_dim for _ in range(num_layers - 1): layers.extend([ nn.Linear(in_d, hidden_dim), nn.ReLU(), nn.Dropout(dropout), ]) in_d = hidden_dim self.backbone = nn.Sequential(*layers) # Three output heads self.mu_head = nn.Linear(hidden_dim, input_dim) self.alpha_head = nn.Linear(hidden_dim, input_dim) self.beta_head = nn.Linear(hidden_dim, input_dim) self.config = { "input_dim": input_dim, "hidden_dim": hidden_dim, "num_layers": num_layers, "dropout": dropout, } def forward( self, embedding: torch.Tensor, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Predict distribution parameters from a point embedding. Args: embedding: Input embedding [batch, input_dim]. Returns: mu: Location parameter [batch, input_dim] (embedding + residual) alpha: Scale parameter [batch, input_dim] (> 0, via softplus) beta: Shape parameter [batch, input_dim] (> 0, via softplus) """ h = self.backbone(embedding) # mu: residual + input (anchored to original embedding) mu = embedding + self.mu_head(h) # alpha, beta: positive via softplus alpha = F.softplus(self.alpha_head(h)) + 1e-6 beta = F.softplus(self.beta_head(h)) + 1e-6 return mu, alpha, beta def sample( self, embedding: np.ndarray, n_samples: int = 100, ) -> np.ndarray: """ Draw Monte Carlo samples from the predicted distribution. Uses the reparameterization trick for Generalized Gaussian: x = mu + alpha * sign(u) * |u|^(1/beta) where u ~ Uniform(-1, 1) Args: embedding: Input embedding, shape (dim,) or (1, dim). n_samples: Number of MC samples. Returns: Samples array, shape (n_samples, dim). """ _check_torch() self.eval() emb = embedding.squeeze() if emb.ndim == 1: emb = emb[np.newaxis, :] with torch.no_grad(): x = torch.tensor(emb, dtype=torch.float32) mu, alpha, beta = self.forward(x) # Expand for sampling: [1, dim] -> [n_samples, dim] mu = mu.expand(n_samples, -1) alpha = alpha.expand(n_samples, -1) beta = beta.expand(n_samples, -1) # Reparameterized sampling from Generalized Gaussian u = torch.rand_like(mu) * 2 - 1 # Uniform(-1, 1) sign = torch.sign(u) samples = mu + alpha * sign * (torch.abs(u) + 1e-8).pow(1.0 / beta) # L2 normalize samples (stay on unit sphere) samples = F.normalize(samples, p=2, dim=-1) return samples.cpu().numpy() def uncertainty(self, embedding: np.ndarray) -> float: """ Compute scalar aleatoric uncertainty for an embedding. Returns the mean predicted alpha (scale parameter) across dimensions. High alpha → high uncertainty → wide distribution. Args: embedding: Input embedding, shape (dim,) or (1, dim). Returns: Scalar uncertainty value (mean alpha). """ _check_torch() self.eval() emb = embedding.squeeze() if emb.ndim == 1: emb = emb[np.newaxis, :] with torch.no_grad(): x = torch.tensor(emb, dtype=torch.float32) _, alpha, _ = self.forward(x) return float(alpha.mean().item()) def save(self, path: str) -> None: """Save adapter weights + config.""" _check_torch() import json p = Path(path) p.parent.mkdir(parents=True, exist_ok=True) torch.save(self.state_dict(), p) config_path = p.with_suffix(".json") with config_path.open("w") as f: json.dump(self.config, f, indent=2) logger.info("Saved ProbabilisticAdapter to %s", path) @classmethod def load(cls, path: str) -> "ProbabilisticAdapter": """Load adapter from saved weights.""" _check_torch() import json p = Path(path) config_path = p.with_suffix(".json") with config_path.open("r") as f: config = json.load(f) model = cls(**config) state_dict = torch.load(p, map_location="cpu", weights_only=True) model.load_state_dict(state_dict) model.eval() logger.info("Loaded ProbabilisticAdapter from %s", path) return model