""" GLADIUS — Gaussian Head Configuration All hyperparameters for the Gaussian specialist in one place. """ from dataclasses import dataclass @dataclass class GaussianConfig: """Configuration for the Gaussian specialist head.""" # ── Scene Generation ── num_anchors: int = 64 # Coarse anchor Gaussians per scene details_per_anchor: int = 32 # Fine detail Gaussians per anchor max_gaussians: int = 2048 # num_anchors * details_per_anchor # ── Gaussian Parameters ── # Each Gaussian (excluding position): scale(3) + rot(4) + opacity(1) + sh_dc(3) = 11 # Position is predicted separately (continuous, 3 floats) param_dim: int = 11 # Non-position parameters per Gaussian pos_dim: int = 3 # Position dimensions full_dim: int = 14 # param_dim + pos_dim # ── VQ-VAE Codebook ── codebook_size: int = 4096 # Number of codebook entries codebook_dim: int = 64 # Embedding dimension per entry commitment_weight: float = 0.25 # β for commitment loss ema_decay: float = 0.99 # EMA decay for codebook updates codebook_reset_threshold: int = 2 # Reset entries used fewer times than this # ── VQ-VAE Architecture ── vqvae_hidden: int = 256 # Hidden dim in encoder/decoder vqvae_layers: int = 3 # Depth of encoder/decoder # ── Specialist Head ── cross_attn_heads: int = 8 # Cross-attention heads in detail generator anchor_hidden: int = 256 # Hidden dim for anchor MLP detail_hidden: int = 256 # Hidden dim for detail generation # ── Rendering (differentiable, for training loss) ── render_size: int = 64 # Rendered image size (H=W) for training render_views: int = 4 # Number of views to render per scene for loss ssim_weight: float = 0.2 # Weight of SSIM in rendering loss l1_weight: float = 0.8 # Weight of L1 in rendering loss # ── Training ── vqvae_lr: float = 3e-4 # Learning rate for VQ-VAE pre-training head_lr: float = 1e-4 # Learning rate for specialist head backbone_lr: float = 1e-5 # Learning rate for backbone fine-tuning (Phase 3) vqvae_steps: int = 50_000 # VQ-VAE pre-training steps head_steps: int = 100_000 # Head training steps (frozen backbone) joint_steps: int = 50_000 # Joint fine-tuning steps # ── Scene Bounds ── scene_scale: float = 2.0 # Scene fits in [-scale, +scale]^3 min_gaussian_scale: float = -6.0 # Log-scale minimum (exp(-6) ≈ 0.0025) max_gaussian_scale: float = 0.0 # Log-scale maximum (exp(0) = 1.0) @property def total_gaussians(self) -> int: return self.num_anchors * self.details_per_anchor def estimate_new_params(self, backbone_dim: int) -> dict: """Estimate parameter count of the Gaussian specialist.""" # Anchor head anchor_mlp = (backbone_dim * self.anchor_hidden + self.anchor_hidden * self.num_anchors * (self.pos_dim + self.param_dim)) # Detail cross-attention cross_attn = 4 * backbone_dim * backbone_dim # Q, K, V, O projections # Detail VQ logits vq_logits = backbone_dim * self.codebook_size # Detail position offset pos_offset = backbone_dim * self.pos_dim # Layer gates layer_gates = 48 # negligible, 2 * num_layers # VQ-VAE (frozen, but count for reference) vqvae_encoder = (self.param_dim * self.vqvae_hidden + self.vqvae_hidden * self.codebook_dim) vqvae_decoder = (self.codebook_dim * self.vqvae_hidden + self.vqvae_hidden * self.param_dim) vqvae_codebook = self.codebook_size * self.codebook_dim total_trainable = anchor_mlp + cross_attn + vq_logits + pos_offset + layer_gates total_vqvae = vqvae_encoder + vqvae_decoder + vqvae_codebook return { 'anchor_mlp': anchor_mlp, 'cross_attention': cross_attn, 'vq_logits': vq_logits, 'pos_offset': pos_offset, 'layer_gates': layer_gates, 'total_trainable': total_trainable, 'vqvae_total': total_vqvae, 'total_all': total_trainable + total_vqvae, }