WYRM kernel source (v27 FINAL)

9463e5c verified about 2 months ago

4.54 kB

	"""
	GLADIUS — Gaussian Head Configuration

	All hyperparameters for the Gaussian specialist in one place.
	"""

	from dataclasses import dataclass


	@dataclass
	class GaussianConfig:
	"""Configuration for the Gaussian specialist head."""

	# ── Scene Generation ──
	num_anchors: int = 64 # Coarse anchor Gaussians per scene
	details_per_anchor: int = 32 # Fine detail Gaussians per anchor
	max_gaussians: int = 2048 # num_anchors * details_per_anchor

	# ── Gaussian Parameters ──
	# Each Gaussian (excluding position): scale(3) + rot(4) + opacity(1) + sh_dc(3) = 11
	# Position is predicted separately (continuous, 3 floats)
	param_dim: int = 11 # Non-position parameters per Gaussian
	pos_dim: int = 3 # Position dimensions
	full_dim: int = 14 # param_dim + pos_dim

	# ── VQ-VAE Codebook ──
	codebook_size: int = 4096 # Number of codebook entries
	codebook_dim: int = 64 # Embedding dimension per entry
	commitment_weight: float = 0.25 # β for commitment loss
	ema_decay: float = 0.99 # EMA decay for codebook updates
	codebook_reset_threshold: int = 2 # Reset entries used fewer times than this

	# ── VQ-VAE Architecture ──
	vqvae_hidden: int = 256 # Hidden dim in encoder/decoder
	vqvae_layers: int = 3 # Depth of encoder/decoder

	# ── Specialist Head ──
	cross_attn_heads: int = 8 # Cross-attention heads in detail generator
	anchor_hidden: int = 256 # Hidden dim for anchor MLP
	detail_hidden: int = 256 # Hidden dim for detail generation

	# ── Rendering (differentiable, for training loss) ──
	render_size: int = 64 # Rendered image size (H=W) for training
	render_views: int = 4 # Number of views to render per scene for loss
	ssim_weight: float = 0.2 # Weight of SSIM in rendering loss
	l1_weight: float = 0.8 # Weight of L1 in rendering loss

	# ── Training ──
	vqvae_lr: float = 3e-4 # Learning rate for VQ-VAE pre-training
	head_lr: float = 1e-4 # Learning rate for specialist head
	backbone_lr: float = 1e-5 # Learning rate for backbone fine-tuning (Phase 3)
	vqvae_steps: int = 50_000 # VQ-VAE pre-training steps
	head_steps: int = 100_000 # Head training steps (frozen backbone)
	joint_steps: int = 50_000 # Joint fine-tuning steps

	# ── Scene Bounds ──
	scene_scale: float = 2.0 # Scene fits in [-scale, +scale]^3
	min_gaussian_scale: float = -6.0 # Log-scale minimum (exp(-6) ≈ 0.0025)
	max_gaussian_scale: float = 0.0 # Log-scale maximum (exp(0) = 1.0)

	@property
	def total_gaussians(self) -> int:
	return self.num_anchors * self.details_per_anchor

	def estimate_new_params(self, backbone_dim: int) -> dict:
	"""Estimate parameter count of the Gaussian specialist."""
	# Anchor head
	anchor_mlp = (backbone_dim * self.anchor_hidden +
	self.anchor_hidden * self.num_anchors * (self.pos_dim + self.param_dim))

	# Detail cross-attention
	cross_attn = 4 * backbone_dim * backbone_dim # Q, K, V, O projections

	# Detail VQ logits
	vq_logits = backbone_dim * self.codebook_size

	# Detail position offset
	pos_offset = backbone_dim * self.pos_dim

	# Layer gates
	layer_gates = 48 # negligible, 2 * num_layers

	# VQ-VAE (frozen, but count for reference)
	vqvae_encoder = (self.param_dim * self.vqvae_hidden +
	self.vqvae_hidden * self.codebook_dim)
	vqvae_decoder = (self.codebook_dim * self.vqvae_hidden +
	self.vqvae_hidden * self.param_dim)
	vqvae_codebook = self.codebook_size * self.codebook_dim

	total_trainable = anchor_mlp + cross_attn + vq_logits + pos_offset + layer_gates
	total_vqvae = vqvae_encoder + vqvae_decoder + vqvae_codebook

	return {
	'anchor_mlp': anchor_mlp,
	'cross_attention': cross_attn,
	'vq_logits': vq_logits,
	'pos_offset': pos_offset,
	'layer_gates': layer_gates,
	'total_trainable': total_trainable,
	'vqvae_total': total_vqvae,
	'total_all': total_trainable + total_vqvae,
	}