vae: use_downsample: true num_latents: 256 point_feats: 3 out_dim: 1 embed_dim: 64 width: 768 heads: 12 num_encoder_layers: 8 num_decoder_layers: 16 init_scale: 0.25 qkv_bias: false use_ln_post: true use_udf_extraction: true token_scales: - 128.0 - 256.0 - 384.0 - 512.0 - 640.0 - 1024.0 - 2048.0 token_probability: - 0.025 - 0.025 - 0.025 - 0.025 - 0.05 - 0.2 - 0.65