amuzetnoM's picture
WYRM kernel source (v27 FINAL)
9463e5c verified
"""
GLADIUS β€” Gaussian Head Configuration
All hyperparameters for the Gaussian specialist in one place.
"""
from dataclasses import dataclass
@dataclass
class GaussianConfig:
"""Configuration for the Gaussian specialist head."""
# ── Scene Generation ──
num_anchors: int = 64 # Coarse anchor Gaussians per scene
details_per_anchor: int = 32 # Fine detail Gaussians per anchor
max_gaussians: int = 2048 # num_anchors * details_per_anchor
# ── Gaussian Parameters ──
# Each Gaussian (excluding position): scale(3) + rot(4) + opacity(1) + sh_dc(3) = 11
# Position is predicted separately (continuous, 3 floats)
param_dim: int = 11 # Non-position parameters per Gaussian
pos_dim: int = 3 # Position dimensions
full_dim: int = 14 # param_dim + pos_dim
# ── VQ-VAE Codebook ──
codebook_size: int = 4096 # Number of codebook entries
codebook_dim: int = 64 # Embedding dimension per entry
commitment_weight: float = 0.25 # Ξ² for commitment loss
ema_decay: float = 0.99 # EMA decay for codebook updates
codebook_reset_threshold: int = 2 # Reset entries used fewer times than this
# ── VQ-VAE Architecture ──
vqvae_hidden: int = 256 # Hidden dim in encoder/decoder
vqvae_layers: int = 3 # Depth of encoder/decoder
# ── Specialist Head ──
cross_attn_heads: int = 8 # Cross-attention heads in detail generator
anchor_hidden: int = 256 # Hidden dim for anchor MLP
detail_hidden: int = 256 # Hidden dim for detail generation
# ── Rendering (differentiable, for training loss) ──
render_size: int = 64 # Rendered image size (H=W) for training
render_views: int = 4 # Number of views to render per scene for loss
ssim_weight: float = 0.2 # Weight of SSIM in rendering loss
l1_weight: float = 0.8 # Weight of L1 in rendering loss
# ── Training ──
vqvae_lr: float = 3e-4 # Learning rate for VQ-VAE pre-training
head_lr: float = 1e-4 # Learning rate for specialist head
backbone_lr: float = 1e-5 # Learning rate for backbone fine-tuning (Phase 3)
vqvae_steps: int = 50_000 # VQ-VAE pre-training steps
head_steps: int = 100_000 # Head training steps (frozen backbone)
joint_steps: int = 50_000 # Joint fine-tuning steps
# ── Scene Bounds ──
scene_scale: float = 2.0 # Scene fits in [-scale, +scale]^3
min_gaussian_scale: float = -6.0 # Log-scale minimum (exp(-6) β‰ˆ 0.0025)
max_gaussian_scale: float = 0.0 # Log-scale maximum (exp(0) = 1.0)
@property
def total_gaussians(self) -> int:
return self.num_anchors * self.details_per_anchor
def estimate_new_params(self, backbone_dim: int) -> dict:
"""Estimate parameter count of the Gaussian specialist."""
# Anchor head
anchor_mlp = (backbone_dim * self.anchor_hidden +
self.anchor_hidden * self.num_anchors * (self.pos_dim + self.param_dim))
# Detail cross-attention
cross_attn = 4 * backbone_dim * backbone_dim # Q, K, V, O projections
# Detail VQ logits
vq_logits = backbone_dim * self.codebook_size
# Detail position offset
pos_offset = backbone_dim * self.pos_dim
# Layer gates
layer_gates = 48 # negligible, 2 * num_layers
# VQ-VAE (frozen, but count for reference)
vqvae_encoder = (self.param_dim * self.vqvae_hidden +
self.vqvae_hidden * self.codebook_dim)
vqvae_decoder = (self.codebook_dim * self.vqvae_hidden +
self.vqvae_hidden * self.param_dim)
vqvae_codebook = self.codebook_size * self.codebook_dim
total_trainable = anchor_mlp + cross_attn + vq_logits + pos_offset + layer_gates
total_vqvae = vqvae_encoder + vqvae_decoder + vqvae_codebook
return {
'anchor_mlp': anchor_mlp,
'cross_attention': cross_attn,
'vq_logits': vq_logits,
'pos_offset': pos_offset,
'layer_gates': layer_gates,
'total_trainable': total_trainable,
'vqvae_total': total_vqvae,
'total_all': total_trainable + total_vqvae,
}