""" ============================================================================= DCDE: Depth-Conditioned Dynamic Ensemble with Evidential Uncertainty for Femtosecond Laser Internal Hydrogel Etching Prediction A novel hybrid architecture combining: 1. FiLM-conditioned Neural Network (depth-adaptive feature modulation) 2. XGBoost gradient-boosted trees (capturing tabular feature interactions) 3. Learned dynamic gating network (input-conditioned fusion) 4. Evidential Deep Learning (Normal-Inverse-Gamma uncertainty) 5. Physics-informed regularization (monotonicity + energy constraints) References: - FiLM: Perez et al., AAAI 2018 (arxiv:1709.07871) - Deep Evidential Regression: Amini et al., NeurIPS 2020 (arxiv:1910.02600) - DELE gating: AAAI 2023 (arxiv:2302.00932) - Physics-informed ML: Zhang et al. 2022 (arxiv:2211.08064) ============================================================================= """ from __future__ import annotations import math from typing import Dict, List, Optional, Tuple import numpy as np import torch import torch.nn as nn import torch.nn.functional as F # ============================================================================= # 1. PHYSICS-INFORMED FEATURE ENGINEERING (Depth-Dependent) # ============================================================================= class DepthPhysicsFeatures: """ Compute analytically-derived physics features that encode how femtosecond laser behavior changes with focusing depth in hydrogels. These features capture three primary depth-dependent effects: 1. Spherical aberration (Strehl ratio degradation) 2. Group velocity dispersion (pulse temporal broadening) 3. Self-focusing proximity (Kerr nonlinearity regime) Scientific basis: - Vogel et al., Applied Physics B (2005) - fs-laser tissue interaction - Schaffer et al., Optics Letters (2001) - bulk modification thresholds - Boyd, Nonlinear Optics (2020) - self-focusing, GVD theory """ def __init__( self, n_medium: float = 1.34, # Refractive index of hydrogel beta2_fs2_mm: float = 55.0, # GVD parameter (fs²/mm) for water-like medium n2_m2_W: float = 2.0e-20, # Nonlinear refractive index (m²/W) ): self.n_medium = n_medium self.beta2 = beta2_fs2_mm * 1e-30 / 1e-3 # Convert to s²/m self.n2 = n2_m2_W def compute( self, focusing_depth_um: np.ndarray, pulse_duration_fs: np.ndarray, wavelength_nm: np.ndarray, NA: np.ndarray, power_mW: np.ndarray, rep_rate_kHz: np.ndarray, ) -> np.ndarray: """ Compute physics features from raw parameters. Returns array of shape (N, 5) with columns: [strehl_ratio, intensity_factor, z_normalized, self_focus_ratio, depth_aberration] """ z = np.asarray(focusing_depth_um) * 1e-6 # µm → m tau0 = np.asarray(pulse_duration_fs) * 1e-15 # fs → s lam = np.asarray(wavelength_nm) * 1e-9 # nm → m na = np.asarray(NA) P_avg = np.asarray(power_mW) * 1e-3 # mW → W f_rep = np.asarray(rep_rate_kHz) * 1e3 # kHz → Hz # 1. Strehl ratio: S(z) = exp(-(2π·Δn·z·NA²/λ)²) # Quantifies how much aberration degrades the focal spot delta_n = self.n_medium - 1.0 # Air-hydrogel RI mismatch strehl = np.exp(-((2 * np.pi * delta_n * z * na**2) / lam)**2) strehl = np.clip(strehl, 1e-6, 1.0) # 2. GVD pulse broadening: τ(z) = τ₀·√(1 + (z/L_D)²) # Reduced peak intensity at depth L_D = tau0**2 / np.abs(self.beta2) # Dispersion length tau_z = tau0 * np.sqrt(1 + (z / np.maximum(L_D, 1e-10))**2) intensity_factor = tau0 / np.maximum(tau_z, tau0) # ∈ (0, 1] # 3. Normalized depth (relative to Rayleigh range) # Indicates when geometric vs. wave-optical effects dominate w0 = lam / (np.pi * np.maximum(na, 0.01)) # Beam waist z_rayleigh = np.pi * w0**2 / lam z_normalized = z / np.maximum(z_rayleigh, 1e-10) # 4. Self-focusing proximity: P_peak / P_critical # When > 1: catastrophic self-focusing regime P_peak = P_avg / (f_rep * tau0) # Peak power per pulse P_cr = 3.77 * lam**2 / (8 * np.pi * self.n_medium * self.n2) sf_ratio = P_peak / np.maximum(P_cr, 1e-10) sf_ratio = np.clip(sf_ratio, 0, 50) # Cap at 50× critical # 5. Depth-dependent aberration parameter # Combined effect: how much the focal volume degrades with depth depth_aberration = delta_n * z * na**2 / lam return np.column_stack([ strehl, intensity_factor, z_normalized, sf_ratio, depth_aberration, ]).astype(np.float32) @property def feature_names(self) -> List[str]: return [ "strehl_ratio", "intensity_factor_gvd", "z_normalized_rayleigh", "self_focusing_ratio", "depth_aberration_param", ] # ============================================================================= # 2. FiLM-CONDITIONED NEURAL NETWORK (Depth-Adaptive) # ============================================================================= class FiLMGenerator(nn.Module): """ Feature-wise Linear Modulation (FiLM) generator. Maps conditioning input (depth features) to per-layer (γ, β) pairs that modulate hidden representations: h' = γ ⊙ h + β Uses the Δγ initialization trick: γ = 1 + Δγ for stable training (identity modulation at initialization). Reference: Perez et al., "FiLM: Visual Reasoning with a General Conditioning Layer", AAAI 2018. """ def __init__(self, conditioning_dim: int, hidden_dims: List[int]): super().__init__() self.generators = nn.ModuleList() for h_dim in hidden_dims: self.generators.append( nn.Sequential( nn.Linear(conditioning_dim, 64), nn.SiLU(), nn.Linear(64, h_dim * 2), # γ and β ) ) # Initialize near identity (Δγ ≈ 0, β ≈ 0) for gen in self.generators: nn.init.zeros_(gen[-1].weight) nn.init.zeros_(gen[-1].bias) def forward(self, conditioning: torch.Tensor) -> List[Tuple[torch.Tensor, torch.Tensor]]: """ Parameters ---------- conditioning : Tensor, shape (B, conditioning_dim) Depth-related features for conditioning Returns ------- list of (gamma, beta) tuples for each layer """ film_params = [] for gen in self.generators: params = gen(conditioning) h_dim = params.shape[-1] // 2 delta_gamma = params[:, :h_dim] beta = params[:, h_dim:] gamma = 1.0 + delta_gamma # Δγ trick film_params.append((gamma, beta)) return film_params class FiLMConditionedMLP(nn.Module): """ Multi-layer perceptron with FiLM conditioning at each hidden layer. Architecture: Input → [Linear → BatchNorm → FiLM(γ,β) → SiLU → Dropout] × L → Output The FiLM conditioning allows depth information to modulate the network's intermediate representations multiplicatively, enabling fundamentally different processing depending on focusing depth — not just adding depth as another input feature. """ def __init__( self, input_dim: int, hidden_dims: List[int], output_dim: int, conditioning_dim: int, dropout: float = 0.15, ): super().__init__() self.hidden_dims = hidden_dims # Build layers dims = [input_dim] + hidden_dims self.layers = nn.ModuleList([ nn.Linear(d_in, d_out) for d_in, d_out in zip(dims[:-1], dims[1:]) ]) self.batch_norms = nn.ModuleList([ nn.BatchNorm1d(d) for d in hidden_dims ]) self.dropouts = nn.ModuleList([ nn.Dropout(dropout * (1 - i / len(hidden_dims))) for i in range(len(hidden_dims)) ]) # FiLM generator (depth → modulation parameters) self.film_generator = FiLMGenerator(conditioning_dim, hidden_dims) # Output projection self.output_layer = nn.Linear(hidden_dims[-1], output_dim) def forward( self, x: torch.Tensor, conditioning: torch.Tensor, ) -> torch.Tensor: """ Parameters ---------- x : Tensor (B, input_dim) - laser + material features conditioning : Tensor (B, conditioning_dim) - depth physics features Returns ------- Tensor (B, output_dim) - latent representation """ # Get FiLM parameters for all layers film_params = self.film_generator(conditioning) h = x for i, (layer, bn, dropout) in enumerate( zip(self.layers, self.batch_norms, self.dropouts) ): h = layer(h) h = bn(h) # Apply FiLM modulation gamma, beta = film_params[i] h = gamma * h + beta h = F.silu(h) h = dropout(h) return self.output_layer(h) # ============================================================================= # 3. EVIDENTIAL REGRESSION HEAD (Normal-Inverse-Gamma) # ============================================================================= class EvidentialHead(nn.Module): """ Normal-Inverse-Gamma (NIG) evidential regression head. Outputs four parameters per target that parameterize a NIG distribution, providing both aleatoric and epistemic uncertainty estimates in a single forward pass (no ensemble or MC dropout required). For each output dimension: μ ~ N(γ, σ²/ν) [predictive mean with epistemic noise] σ² ~ InvGamma(α, β) [aleatoric variance] Uncertainty decomposition: Aleatoric: E[σ²] = β / (α - 1) Epistemic: Var[μ] = β / (ν(α - 1)) Reference: Amini et al., "Deep Evidential Regression", NeurIPS 2020. """ def __init__(self, input_dim: int, n_outputs: int): super().__init__() self.n_outputs = n_outputs # Output: 4 parameters per target (γ, ν, α, β) self.fc = nn.Linear(input_dim, n_outputs * 4) # Initialize carefully for stable NIG parameters nn.init.xavier_normal_(self.fc.weight, gain=0.1) nn.init.zeros_(self.fc.bias) def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, ...]: """ Returns ------- gamma : Tensor (B, n_outputs) - predictive mean nu : Tensor (B, n_outputs) - evidence for mean (>0) alpha : Tensor (B, n_outputs) - evidence for variance (>1) beta : Tensor (B, n_outputs) - scale for variance (>0) """ out = self.fc(x).reshape(-1, self.n_outputs, 4) gamma = out[..., 0] nu = F.softplus(out[..., 1]) + 1e-6 # ν > 0 alpha = F.softplus(out[..., 2]) + 1.0 + 1e-6 # α > 1 beta = F.softplus(out[..., 3]) + 1e-6 # β > 0 return gamma, nu, alpha, beta @staticmethod def aleatoric_uncertainty(alpha: torch.Tensor, beta: torch.Tensor) -> torch.Tensor: """E[σ²] = β / (α - 1)""" return beta / (alpha - 1.0).clamp(min=1e-6) @staticmethod def epistemic_uncertainty(nu: torch.Tensor, alpha: torch.Tensor, beta: torch.Tensor) -> torch.Tensor: """Var[μ] = β / (ν(α - 1))""" return beta / (nu * (alpha - 1.0).clamp(min=1e-6)) # ============================================================================= # 4. DEPTH-CONDITIONED GATING NETWORK (Learned Dynamic Fusion) # ============================================================================= class DepthConditionedGatingNetwork(nn.Module): """ Input-conditioned gating network that dynamically determines how to fuse XGBoost and Neural Network predictions. Unlike a fixed 60/40 weighting, this network learns WHEN each expert is more reliable — conditioned on both input features and focusing depth. Key insight from DELE (arxiv:2302.00932): the gating network benefits from seeing the same features as the experts, plus the experts' own predictions as additional input. Architecture: [input_features ⊕ depth_physics ⊕ expert_predictions] → MLP → softmax(2) """ def __init__( self, input_dim: int, depth_dim: int, n_expert_outputs: int, n_experts: int = 2, hidden_dim: int = 64, ): super().__init__() total_input = input_dim + depth_dim + n_expert_outputs * n_experts self.gate = nn.Sequential( nn.Linear(total_input, hidden_dim), nn.SiLU(), nn.Dropout(0.1), nn.Linear(hidden_dim, hidden_dim // 2), nn.SiLU(), nn.Linear(hidden_dim // 2, n_experts), ) # Temperature parameter (learnable) for softmax sharpness self.temperature = nn.Parameter(torch.ones(1)) def forward( self, features: torch.Tensor, depth_physics: torch.Tensor, expert_preds: List[torch.Tensor], ) -> torch.Tensor: """ Parameters ---------- features : Tensor (B, input_dim) depth_physics : Tensor (B, depth_dim) expert_preds : list of Tensor (B, n_outputs) per expert Returns ------- weights : Tensor (B, n_experts) - softmax weights summing to 1 """ gate_input = torch.cat( [features, depth_physics] + expert_preds, dim=-1 ) logits = self.gate(gate_input) / self.temperature.clamp(min=0.1) return F.softmax(logits, dim=-1) # ============================================================================= # 5. COMPLETE DCDE MODEL # ============================================================================= class DCDE(nn.Module): """ Depth-Conditioned Dynamic Ensemble (DCDE) A hybrid architecture for predicting femtosecond laser internal etching geometry in hydrogels. Combines: 1. XGBoost branch: Pre-trained gradient-boosted trees capturing complex tabular feature interactions (frozen during DCDE training) 2. FiLM-NN branch: Depth-conditioned neural network where focusing depth modulates intermediate representations via FiLM layers 3. Dynamic gating: Input-conditioned fusion network that learns optimal weighting between branches depending on input regime 4. Evidential head: NIG distribution output providing calibrated aleatoric + epistemic uncertainty 5. Physics-informed loss: Soft monotonicity constraints and energy conservation regularization Training protocol (3-phase, following DELE): Phase 1: Train XGBoost independently on tabular features Phase 2: Train FiLM-NN with evidential head (XGBoost frozen) Phase 3: Train gating network jointly (optionally fine-tune FiLM-NN) """ def __init__( self, input_dim: int, depth_physics_dim: int = 5, hidden_dims: List[int] = [128, 96, 64], n_outputs: int = 5, n_experts: int = 2, gating_hidden: int = 64, ): super().__init__() self.input_dim = input_dim self.n_outputs = n_outputs # FiLM-conditioned NN branch self.film_nn = FiLMConditionedMLP( input_dim=input_dim, hidden_dims=hidden_dims, output_dim=hidden_dims[-1], conditioning_dim=depth_physics_dim, ) # XGBoost prediction embedding (projects XGB outputs to latent space) self.xgb_embed = nn.Sequential( nn.Linear(n_outputs, hidden_dims[-1]), nn.SiLU(), nn.Linear(hidden_dims[-1], hidden_dims[-1]), ) # Gating network self.gating = DepthConditionedGatingNetwork( input_dim=input_dim, depth_dim=depth_physics_dim, n_expert_outputs=n_outputs, n_experts=n_experts, hidden_dim=gating_hidden, ) # Evidential head (NIG parameters) self.evidential_head = EvidentialHead(hidden_dims[-1], n_outputs) # Direct output head for XGBoost branch (for gating comparison) self.xgb_output = nn.Linear(hidden_dims[-1], n_outputs) def forward( self, features: torch.Tensor, depth_physics: torch.Tensor, xgb_predictions: torch.Tensor, ) -> Dict[str, torch.Tensor]: """ Parameters ---------- features : Tensor (B, input_dim) - all input features depth_physics : Tensor (B, depth_physics_dim) - computed physics features xgb_predictions : Tensor (B, n_outputs) - pre-computed XGBoost predictions Returns ------- dict with keys: 'gamma' : predictive mean (B, n_outputs) 'nu', 'alpha', 'beta' : NIG parameters 'aleatoric_unc' : aleatoric uncertainty 'epistemic_unc' : epistemic uncertainty 'gate_weights' : expert weights (B, 2) 'nn_pred' : raw NN branch prediction 'xgb_pred' : embedded XGBoost prediction """ # NN branch: depth-conditioned via FiLM nn_latent = self.film_nn(features, depth_physics) # XGBoost branch: embed predictions into latent space xgb_latent = self.xgb_embed(xgb_predictions) # Compute intermediate predictions for gating input nn_pred_raw = self.evidential_head(nn_latent)[0] # Just gamma # Dynamic gating: determine expert weights gate_weights = self.gating( features, depth_physics, [xgb_predictions, nn_pred_raw.detach()] # Detach to avoid circular gradients ) # Fused latent representation w_xgb = gate_weights[:, 0:1] # (B, 1) w_nn = gate_weights[:, 1:2] # (B, 1) fused_latent = w_xgb * xgb_latent + w_nn * nn_latent # Evidential output gamma, nu, alpha, beta = self.evidential_head(fused_latent) # Uncertainty decomposition aleatoric = EvidentialHead.aleatoric_uncertainty(alpha, beta) epistemic = EvidentialHead.epistemic_uncertainty(nu, alpha, beta) return { "gamma": gamma, "nu": nu, "alpha": alpha, "beta": beta, "aleatoric_unc": aleatoric, "epistemic_unc": epistemic, "gate_weights": gate_weights, "nn_pred": nn_pred_raw, "xgb_pred": xgb_predictions, } # ============================================================================= # 6. LOSS FUNCTIONS (NIG + Physics-Informed) # ============================================================================= class DCDELoss(nn.Module): """ Composite loss for DCDE training: L_total = L_NIG + λ_mono·L_monotonicity + λ_energy·L_energy + λ_gate·L_gate_entropy Components: 1. NIG Loss (evidential regression) - primary data fitting 2. Monotonicity loss - enforces physical depth-etch relationships 3. Energy conservation - volume scales with deposited energy 4. Gate entropy regularization - prevents degenerate gating """ def __init__( self, lambda_nig_reg: float = 0.01, lambda_mono: float = 0.05, lambda_energy: float = 0.02, lambda_gate: float = 0.01, depth_feature_idx: int = -1, power_feature_idx: int = 0, ): super().__init__() self.lambda_nig_reg = lambda_nig_reg self.lambda_mono = lambda_mono self.lambda_energy = lambda_energy self.lambda_gate = lambda_gate self.depth_idx = depth_feature_idx self.power_idx = power_feature_idx def nig_loss( self, y: torch.Tensor, gamma: torch.Tensor, nu: torch.Tensor, alpha: torch.Tensor, beta: torch.Tensor, ) -> torch.Tensor: """ Normal-Inverse-Gamma negative log-likelihood with evidence regularization. L = L_NLL + λ·L_evidence_regularization The regularization penalizes high evidence (ν, α) when the prediction is wrong, encouraging the model to be uncertain when inaccurate. """ # NLL term omega = 2 * beta * (1 + nu) nll = ( 0.5 * torch.log(torch.pi / nu.clamp(min=1e-6)) - alpha * torch.log(omega.clamp(min=1e-10)) + (alpha + 0.5) * torch.log( ((y - gamma) ** 2 * nu + omega).clamp(min=1e-10) ) + torch.lgamma(alpha) - torch.lgamma(alpha + 0.5) ) # Evidence regularization (penalize evidence when wrong) error = torch.abs(y - gamma) evidence = 2 * nu + alpha reg = error * evidence return (nll + self.lambda_nig_reg * reg).mean() def monotonicity_loss( self, features: torch.Tensor, gamma: torch.Tensor, model: nn.Module, depth_physics: torch.Tensor, xgb_pred: torch.Tensor, ) -> torch.Tensor: """ Soft monotonicity constraint: for most targets, increasing laser parameters (power, passes) at fixed depth should not decrease output. Specifically for depth etching: - More passes → deeper etch (target 0: etch_depth) - Higher fluence → wider etch (target 1: etch_width) Implemented as finite-difference gradient penalty. """ # Perturb power upward by small amount features_perturbed = features.clone() features_perturbed[:, self.power_idx] = features[:, self.power_idx] * 1.05 # Get predictions for perturbed input with torch.no_grad(): output_perturbed = model(features_perturbed, depth_physics, xgb_pred) # Depth and width should increase with power (soft constraint) # Only penalize violations (relu of negative gradient) violation_depth = F.relu(gamma[:, 0] - output_perturbed["gamma"][:, 0]) violation_width = F.relu(gamma[:, 1] - output_perturbed["gamma"][:, 1]) return (violation_depth.mean() + violation_width.mean()) / 2 def energy_conservation_loss( self, features: torch.Tensor, gamma: torch.Tensor, ) -> torch.Tensor: """ Soft energy constraint: predicted ablated volume should correlate positively with deposited energy. Volume proxy ∝ depth × width² Energy proxy ∝ power × (num_passes / scan_speed) We penalize anti-correlation (negative cosine similarity). """ # Volume proxy from predictions depth_pred = gamma[:, 0].clamp(min=0) width_pred = gamma[:, 1].clamp(min=0) volume_proxy = depth_pred * width_pred ** 2 # Energy proxy from inputs power = features[:, self.power_idx].clamp(min=1e-6) energy_proxy = power # Simplified; could include scan speed, passes # Penalize negative correlation # Cosine similarity should be positive cos_sim = F.cosine_similarity( volume_proxy.unsqueeze(-1), energy_proxy.unsqueeze(-1), dim=0, ) return F.relu(-cos_sim).mean() def gate_entropy_loss(self, gate_weights: torch.Tensor) -> torch.Tensor: """ Encourage non-degenerate gating (not always choosing one expert). Maximize entropy of gate weights (encourage exploration). Penalize when one weight is always 0 or 1. """ # Per-sample entropy entropy = -(gate_weights * torch.log(gate_weights + 1e-8)).sum(dim=-1) # Maximize entropy → minimize negative entropy max_entropy = math.log(gate_weights.shape[-1]) return (max_entropy - entropy.mean()) def forward( self, y: torch.Tensor, model_output: Dict[str, torch.Tensor], features: torch.Tensor, depth_physics: torch.Tensor, model: Optional[nn.Module] = None, ) -> Dict[str, torch.Tensor]: """ Compute total loss with all components. Returns dict with individual loss components for logging. """ gamma = model_output["gamma"] nu = model_output["nu"] alpha = model_output["alpha"] beta = model_output["beta"] gate_weights = model_output["gate_weights"] xgb_pred = model_output["xgb_pred"] # Primary loss: NIG l_nig = self.nig_loss(y, gamma, nu, alpha, beta) # Physics losses l_mono = torch.tensor(0.0, device=y.device) if model is not None and self.lambda_mono > 0: l_mono = self.monotonicity_loss(features, gamma, model, depth_physics, xgb_pred) l_energy = torch.tensor(0.0, device=y.device) if self.lambda_energy > 0: l_energy = self.energy_conservation_loss(features, gamma) # Gating regularization l_gate = self.gate_entropy_loss(gate_weights) # Total total = ( l_nig + self.lambda_mono * l_mono + self.lambda_energy * l_energy + self.lambda_gate * l_gate ) return { "total": total, "nig": l_nig, "monotonicity": l_mono, "energy": l_energy, "gate_entropy": l_gate, } # ============================================================================= # 7. TRAINING UTILITIES # ============================================================================= class DCDETrainer: """ Three-phase training protocol for DCDE. Phase 1: Train XGBoost on tabular features (external, uses sklearn/xgboost) Phase 2: Train FiLM-NN with evidential head (XGBoost predictions as input) Phase 3: Train gating network + fine-tune FiLM-NN end-to-end """ def __init__( self, model: DCDE, loss_fn: DCDELoss, lr_phase2: float = 1e-3, lr_phase3: float = 3e-4, weight_decay: float = 1e-4, device: str = "cpu", ): self.model = model.to(device) self.loss_fn = loss_fn self.lr_phase2 = lr_phase2 self.lr_phase3 = lr_phase3 self.weight_decay = weight_decay self.device = device def phase2_train_step( self, features: torch.Tensor, depth_physics: torch.Tensor, xgb_predictions: torch.Tensor, targets: torch.Tensor, optimizer: torch.optim.Optimizer, ) -> Dict[str, float]: """Single training step for Phase 2 (FiLM-NN + evidential head).""" self.model.train() optimizer.zero_grad() output = self.model(features, depth_physics, xgb_predictions) losses = self.loss_fn(targets, output, features, depth_physics, self.model) losses["total"].backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0) optimizer.step() return {k: v.item() for k, v in losses.items()} def phase3_train_step( self, features: torch.Tensor, depth_physics: torch.Tensor, xgb_predictions: torch.Tensor, targets: torch.Tensor, optimizer: torch.optim.Optimizer, ) -> Dict[str, float]: """Single training step for Phase 3 (end-to-end with gating).""" # Same as phase 2 but with different learning rate and all params unfrozen return self.phase2_train_step(features, depth_physics, xgb_predictions, targets, optimizer) @torch.no_grad() def predict( self, features: torch.Tensor, depth_physics: torch.Tensor, xgb_predictions: torch.Tensor, ) -> Dict[str, np.ndarray]: """ Inference with uncertainty quantification. Returns ------- dict with: 'mean': predicted values (B, n_outputs) 'aleatoric_unc': aleatoric uncertainty per target 'epistemic_unc': epistemic uncertainty per target 'total_unc': total predictive uncertainty 'gate_weights': expert weights showing XGB vs NN dominance """ self.model.eval() output = self.model(features, depth_physics, xgb_predictions) return { "mean": output["gamma"].cpu().numpy(), "aleatoric_unc": output["aleatoric_unc"].cpu().numpy(), "epistemic_unc": output["epistemic_unc"].cpu().numpy(), "total_unc": (output["aleatoric_unc"] + output["epistemic_unc"]).cpu().numpy(), "gate_weights": output["gate_weights"].cpu().numpy(), }