Spaces:

Efradeca
/

vae-fdm

Sleeping

App Files Files Community

vae-fdm / src /neural_fdm /variational.py

Efradeca

Upload folder using huggingface_hub

fc7d689 verified about 2 months ago

raw

history blame contribute delete

18.8 kB

	"""Variational Autoencoder for diverse structural form-finding.

	Implementation of a VAE coupled with a differentiable Force Density
	Method (FDM) decoder for generating diverse equilibrium solutions.

	The key insight: the FDM decoder is differentiable (via JAX implicit
	differentiation), so the reparameterization trick (Kingma & Welling, 2014)
	enables end-to-end training. The decoder is NOT modified -- it remains
	the exact physics solver, guaranteeing equilibrium for every sample.

	Mathematical formulation:

	Encoder: mu, log_sigma = E_phi(X_hat)
	Sampling: z = mu + exp(log_sigma) * epsilon, epsilon ~ N(0, I)
	Mapping: q = (softplus(z) + tau) * s
	Decoder: X(q) = K(q)^{-1} P (FDM equilibrium, unchanged)
	Loss: L = L_shape(X, X_hat) + beta * KL(q(z\|X_hat) \|\| p(z))

	Where:
	- KL divergence: Eq. 7 of Kingma & Welling (2014), arXiv:1312.6114
	- Beta annealing: Cyclical schedule per Fu et al. (2019), NAACL
	- Physics guarantee: FDM enforces R(X;q) = 0 by construction

	Motivation (from Pastrana et al., ICLR 2025, Section 6.1):
	"the choice of bar stiffnesses for a given structure is not unique
	and it is potentially appealing to present to the designer a diversity
	of possible solutions by reformulating our model in a variational
	setting (Kingma and Welling, 2014)"

	References
	----------
	[1] Kingma, D.P. & Welling, M. (2014). Auto-Encoding Variational Bayes.
	ICLR 2014. arXiv:1312.6114
	[2] Fu, H. et al. (2019). Cyclical Annealing Schedule: A Simple Approach
	to Mitigating KL Vanishing. NAACL 2019.
	[3] Higgins, I. et al. (2017). beta-VAE: Learning Basic Visual Concepts
	with a Constrained Variational Framework. ICLR 2017.
	[4] Pastrana, R. et al. (2025). Real-Time Design of Architectural Structures
	with Differentiable Mechanics and Neural Networks. ICLR 2025.
	"""

	from __future__ import annotations

	import equinox as eqx
	import jax
	import jax.numpy as jnp
	import jax.random as jrn
	from jaxtyping import Array, Float, PRNGKeyArray

	# =============================================================================
	# Variational Encoder
	# =============================================================================


	class VariationalMLPEncoder(eqx.Module):
	"""Variational MLP encoder for structural form-finding.

	Maps target shapes to a diagonal Gaussian distribution in latent space,
	then samples and maps to valid force densities via softplus + sign.

	Architecture:
	x -> [backbone MLP] -> h -> [mu_head] -> mu
	-> [log_sigma_head] -> log_sigma
	z = mu + exp(log_sigma) * epsilon (reparameterization trick [1])
	q = (softplus(z) + q_shift) * edges_signs

	The backbone shares features between mu and log_sigma heads,
	improving parameter efficiency (standard VAE practice [1]).

	Parameters
	----------
	backbone : eqx.nn.MLP
	Shared feature extractor (depth-1 hidden layers).
	mu_head : eqx.nn.Linear
	Maps features to mean vector (unconstrained).
	log_sigma_head : eqx.nn.Linear
	Maps features to log-std vector. Bias initialized to -2.0
	to start with small variance (sigma ~ 0.135), preventing
	initial noise from destabilizing FDM [3, Section 4.1].
	edges_signs : Array
	+1 for tension, -1 for compression per edge.
	q_shift : float
	Minimum force density magnitude (tau in paper [4]).
	"""

	backbone: eqx.nn.MLP
	mu_head: eqx.nn.Linear
	log_sigma_head: eqx.nn.Linear
	edges_signs: Array
	q_shift: Float
	slice_out: bool
	slice_indices: Array

	def __init__(
	self,
	edges_signs,
	q_shift=0.0,
	slice_out=False,
	slice_indices=None,
	in_size=300,
	out_size=180,
	width_size=256,
	depth=3,
	activation=jax.nn.elu,
	*,
	key,
	):
	k1, k2, k3 = jrn.split(key, 3)

	# Shared backbone: depth-1 hidden layers
	# Output is width_size features fed to both heads
	self.backbone = eqx.nn.MLP(
	in_size=in_size,
	out_size=width_size,
	width_size=width_size,
	depth=max(depth - 1, 1),
	activation=activation,
	key=k1,
	)

	# Mean head: no activation (unconstrained)
	self.mu_head = eqx.nn.Linear(width_size, out_size, key=k2)

	# Log-sigma head: bias initialized to -2.0 for small initial variance
	# This is critical to prevent the FDM decoder from receiving
	# highly noisy q values at the start of training [3]
	self.log_sigma_head = eqx.nn.Linear(width_size, out_size, key=k3)
	# Override bias initialization
	new_bias = jnp.full((out_size,), -2.0)
	self.log_sigma_head = eqx.tree_at(
	lambda l: l.bias, self.log_sigma_head, new_bias
	)

	self.edges_signs = edges_signs
	self.q_shift = q_shift
	self.slice_out = slice_out if slice_out else False
	self.slice_indices = slice_indices

	def __call__(
	self,
	x: Float[Array, "N3"],
	*,
	key: PRNGKeyArray \| None = None,
	) -> tuple[Float[Array, "E"], Float[Array, "E"], Float[Array, "E"]]:
	"""Encode target shape to force density distribution and sample.

	Parameters
	----------
	x : Array
	Flat target shape (N*3,).
	key : PRNGKey or None
	Random key for sampling. If None, uses deterministic MAP
	estimate z = mu (no sampling).

	Returns
	-------
	q : Array (E,)
	Force densities (physically valid: correct signs and shift).
	mu : Array (E,)
	Mean of approximate posterior q(z\|x).
	log_sigma : Array (E,)
	Log standard deviation of approximate posterior.
	"""
	# Optional input slicing (same as Encoder, models.py:273-276)
	if self.slice_out:
	x = jnp.reshape(x, (-1, 3))
	x = x[self.slice_indices, :]
	x = jnp.ravel(x)

	# Shared feature extraction
	h = self.backbone(x)

	# Distribution parameters
	mu = self.mu_head(h)
	log_sigma = self.log_sigma_head(h)

	# Numerical stability: clamp log_sigma to prevent
	# sigma explosion (>7.4) or exact zero (no KL gradient)
	log_sigma = jnp.clip(log_sigma, -10.0, 2.0)

	# Reparameterization trick (Kingma & Welling 2014, Eq. 4):
	# z = mu + sigma * epsilon, where epsilon ~ N(0, I)
	# This makes the sampling differentiable w.r.t. mu and sigma
	if key is not None:
	epsilon = jrn.normal(key, shape=mu.shape)
	z = mu + jnp.exp(log_sigma) * epsilon
	else:
	# Deterministic mode: MAP estimate (no sampling)
	z = mu

	# Map to valid force densities
	# softplus ensures positivity, then shift and sign are applied
	# (same convention as MLPEncoder, models.py:332)
	q = (jax.nn.softplus(z) + self.q_shift) * self.edges_signs

	return q, mu, log_sigma


	# =============================================================================
	# Variational Autoencoder
	# =============================================================================


	class VariationalAutoEncoder(eqx.Module):
	"""Variational autoencoder with differentiable FDM decoder.

	Couples a variational encoder with the physics-based FDM decoder.
	The decoder is NOT modified -- equilibrium is guaranteed for every
	sample from the approximate posterior.

	This enables generation of diverse equilibrium solutions from a
	single target shape, addressing the non-uniqueness of force density
	solutions noted in Pastrana et al. (2025), Section 6.1.

	Parameters
	----------
	encoder : VariationalMLPEncoder
	Variational encoder producing (q, mu, log_sigma).
	decoder : FDDecoder
	Physics-based decoder (unchanged from deterministic model).
	"""

	encoder: VariationalMLPEncoder
	decoder: eqx.Module

	def __init__(self, encoder, decoder):
	self.encoder = encoder
	self.decoder = decoder

	def __call__(
	self,
	x: Float[Array, "N3"],
	structure,
	aux_data: bool = False,
	*args,
	key: PRNGKeyArray \| None = None,
	**kwargs,
	):
	"""Forward pass: encode, sample, decode.

	Parameters
	----------
	x : Array
	Flat target shape.
	structure : EquilibriumStructure
	Mesh structure.
	aux_data : bool
	If True, return auxiliary data for loss computation.
	key : PRNGKey or None
	Random key for reparameterization sampling.

	Returns
	-------
	x_hat : Array
	Predicted equilibrium shape.
	vae_data : tuple (only when aux_data=True)
	((q, xyz_fixed, loads), mu, log_sigma)
	"""
	from neural_fdm.gnn import GNNEncoder, VariationalGNNEncoder

	if isinstance(self.encoder, (GNNEncoder, VariationalGNNEncoder)):
	q, mu, log_sigma = self.encoder(x, structure=structure, key=key)
	else:
	q, mu, log_sigma = self.encoder(x, key=key)
	x_hat = self.decoder(q, x, structure, aux_data)

	if aux_data:
	x_hat, params = x_hat # params = (q, xyz_fixed, loads)
	return x_hat, (params, mu, log_sigma)

	return x_hat

	def encode(self, x, *, key=None, structure=None):
	"""Encode target to distribution parameters."""
	from neural_fdm.gnn import GNNEncoder, VariationalGNNEncoder

	if isinstance(self.encoder, (GNNEncoder, VariationalGNNEncoder)):
	return self.encoder(x, structure=structure, key=key)
	return self.encoder(x, key=key)

	def decode(self, q, args, *kwargs):
	"""Decode force densities to equilibrium shape."""
	return self.decoder(q, args, *kwargs)

	def sample(
	self,
	x: Float[Array, "N3"],
	structure,
	key: PRNGKeyArray,
	num_samples: int = 10,
	) -> tuple[Float[Array, "S N3"], Float[Array, "S E"]]:
	"""Generate diverse equilibrium shapes from a single target.

	Samples multiple z values from q(z\|x) and decodes each through
	the FDM solver. Every sample is guaranteed to be in equilibrium.

	Parameters
	----------
	x : Array
	Single target shape (flat).
	structure : EquilibriumStructure
	Mesh structure.
	key : PRNGKey
	Random key.
	num_samples : int
	Number of diverse solutions to generate.

	Returns
	-------
	x_hats : Array (num_samples, N*3)
	Diverse equilibrium shapes.
	qs : Array (num_samples, E)
	Corresponding force densities.
	"""
	keys = jrn.split(key, num_samples)

	def _sample_one(k):
	from neural_fdm.gnn import GNNEncoder, VariationalGNNEncoder

	if isinstance(self.encoder, (GNNEncoder, VariationalGNNEncoder)):
	q, _, _ = self.encoder(x, structure=structure, key=k)
	else:
	q, _, _ = self.encoder(x, key=k)
	x_hat = self.decoder(q, x, structure, False)
	return x_hat, q

	x_hats, qs = jax.vmap(_sample_one)(keys)
	return x_hats, qs

	def predict_states(self, x, structure):
	"""Deterministic prediction for visualization.

	Uses MAP estimate (key=None) for compatibility with the
	existing visualization pipeline.
	"""
	x_hat, (params, mu, log_sigma) = self(x, structure, True, key=None)
	from neural_fdm.models import build_states
	return build_states(x_hat, params, structure)


	# =============================================================================
	# KL Divergence
	# =============================================================================


	def compute_kl_divergence(
	mu: Float[Array, "... D"],
	log_sigma: Float[Array, "... D"],
	) -> Float[Array, ""]:
	"""KL divergence between diagonal Gaussian and standard normal.

	KL(q(z\|x) \|\| p(z)) where:
	q(z\|x) = N(mu, diag(sigma^2))
	p(z) = N(0, I)

	Formula (Kingma & Welling 2014, Appendix B, Eq. 7):
	KL = -0.5 * sum_j (1 + log(sigma_j^2) - mu_j^2 - sigma_j^2)
	= -0.5 * sum_j (1 + 2log_sigma_j - mu_j^2 - exp(2log_sigma_j))

	Parameters
	----------
	mu : Array (..., D)
	Mean of approximate posterior.
	log_sigma : Array (..., D)
	Log standard deviation of approximate posterior.

	Returns
	-------
	kl : scalar
	Mean KL divergence over batch.

	References
	----------
	[1] Kingma & Welling (2014), arXiv:1312.6114, Eq. 7
	"""
	kl_per_dim = -0.5 * (
	1.0 + 2.0 * log_sigma - jnp.square(mu) - jnp.exp(2.0 * log_sigma)
	)
	# Sum over latent dimensions (per-sample KL), then mean over batch.
	# Consistent for both single sample (1D) and batch (2D):
	# - 1D: sum reduces to scalar, mean of scalar = scalar
	# - 2D: sum per row, mean over rows
	kl_per_sample = jnp.sum(kl_per_dim, axis=-1)
	return jnp.mean(kl_per_sample)


	# =============================================================================
	# Beta Annealing Schedule
	# =============================================================================


	def compute_beta_schedule(
	step: int,
	beta_max: float = 1.0,
	cycle_length: int = 5000,
	warmup_ratio: float = 0.5,
	) -> float:
	"""Cyclical beta annealing schedule.

	Beta linearly increases from 0 to beta_max during the warmup
	portion of each cycle, then stays at beta_max for the rest.
	This prevents posterior collapse by allowing the encoder to
	first learn a good reconstruction, then gradually enforce
	the prior constraint.

	Schedule per Fu et al. (2019):
	beta(t) = beta_max * min(1, (t mod T) / (T * r))

	where T = cycle_length, r = warmup_ratio.

	Parameters
	----------
	step : int
	Current training step.
	beta_max : float
	Maximum beta value. beta=1.0 gives standard VAE ELBO.
	beta<1.0 allows reconstruction to dominate (underfitting prior).
	beta>1.0 gives beta-VAE (Higgins et al. 2017) for stronger
	disentanglement.
	cycle_length : int
	Number of steps per annealing cycle.
	warmup_ratio : float
	Fraction of cycle for linear warmup (0 to 1).

	Returns
	-------
	beta : float
	Current beta value in [0, beta_max].

	References
	----------
	[2] Fu et al. (2019). Cyclical Annealing Schedule. NAACL 2019.
	[3] Higgins et al. (2017). beta-VAE. ICLR 2017.
	"""
	position = step % cycle_length
	warmup_length = cycle_length * warmup_ratio # float division, no rounding
	beta = beta_max * min(1.0, position / max(warmup_length, 1e-10))
	return beta


	# =============================================================================
	# Solution Multiplicity Metrics
	# =============================================================================


	def compute_diversity_metrics(
	x_hats: Float[Array, "S N3"],
	qs: Float[Array, "S E"],
	) -> dict:
	"""Quantify solution multiplicity from VAE samples.

	Given S samples of equilibrium shapes and force densities from the
	same target, computes metrics characterizing the diversity of solutions.

	Quantifies the force density solution multiplicity documented
	qualitatively by Veenendaal & Block (2012) and Adriaenssens et al. (2014).

	Parameters
	----------
	x_hats : Array (S, N*3)
	S sampled equilibrium shapes from the same target.
	qs : Array (S, E)
	Corresponding force density vectors.

	Returns
	-------
	metrics : dict
	- "n_samples": number of samples
	- "shape_pairwise_L1_mean": mean pairwise L1 distance between shapes
	- "shape_pairwise_L1_std": std of pairwise L1 distances
	- "q_pairwise_L1_mean": mean pairwise L1 distance between q vectors
	- "q_std_per_edge": Array (E,) std of q across samples per edge
	- "q_std_mean": mean of per-edge std (scalar summary)
	- "shape_std_per_node": Array (N,) std of position across samples per node

	References
	----------
	Veenendaal & Block (2012). "An overview and comparison of structural
	form finding methods." IJSS, 49(26):3741-3753.
	Adriaenssens et al. (2014). Shell Structures for Architecture. Routledge.
	"""
	S = x_hats.shape[0]

	# Pairwise L1 distances (shapes)
	shape_dists = []
	q_dists = []
	for i in range(S):
	for j in range(i + 1, S):
	shape_dists.append(float(jnp.sum(jnp.abs(x_hats[i] - x_hats[j]))))
	q_dists.append(float(jnp.sum(jnp.abs(qs[i] - qs[j]))))

	import numpy as np
	shape_dists = np.array(shape_dists)
	q_dists = np.array(q_dists)

	# Per-edge q standard deviation (where is there freedom?)
	q_std = np.std(np.array(qs), axis=0)

	# Per-node shape standard deviation
	x_reshaped = np.array(x_hats).reshape(S, -1, 3)
	shape_std = np.std(np.linalg.norm(x_reshaped, axis=-1), axis=0)

	return {
	"n_samples": S,
	"shape_pairwise_L1_mean": float(np.mean(shape_dists)) if len(shape_dists) > 0 else 0.0,
	"shape_pairwise_L1_std": float(np.std(shape_dists)) if len(shape_dists) > 0 else 0.0,
	"q_pairwise_L1_mean": float(np.mean(q_dists)) if len(q_dists) > 0 else 0.0,
	"q_std_per_edge": q_std,
	"q_std_mean": float(np.mean(q_std)),
	"shape_std_per_node": shape_std,
	}


	def compute_variance_per_edge(
	model,
	x_target: Float[Array, "N3"],
	structure,
	key,
	n_samples: int = 50,
	) -> Float[Array, "E"]:
	"""Map solution freedom to individual structural members.

	Samples n_samples force density vectors from the VAE posterior
	and computes the variance of q per edge. High variance indicates
	the structure has design freedom at that member; low variance
	indicates the member is strongly constrained by the target geometry.

	This enables visualization of "structural design freedom" per member,
	a capability enabled by the variational formulation.

	Parameters
	----------
	model : VariationalAutoEncoder
	Trained VAE model.
	x_target : Array (N*3,)
	Target shape.
	structure : EquilibriumStructure
	Mesh structure.
	key : PRNGKey
	Random key for sampling.
	n_samples : int
	Number of samples (more = better estimate).

	Returns
	-------
	q_variance : Array (E,)
	Variance of force density per edge across samples.
	"""
	_, qs = model.sample(x_target, structure, key, num_samples=n_samples)
	return jnp.var(qs, axis=0)