{ "models": { "property_encoder": { "name": "ElasticPropertyEncoder", "args": { "resolution": 64, "in_channels": 3072, "in_channels_phy": 14, "model_channels": 768, "latent_channels": 8, "num_blocks": 4, "num_heads": 12, "mlp_ratio": 4, "attn_mode": "swin", "window_size": 8, "use_fp16": true } }, "property_decoder": { "name": "ElasticPropertyDecoder", "args": { "resolution": 64, "model_channels": 2048, "latent_channels": 8, "num_blocks": 4, "num_heads": 16, "mlp_ratio": 4, "attn_mode": "swin", "window_size": 8, "use_fp16": true, "representation_config": { "use_color": true } } }, "property_output": { "name": "PropertyOutput", "args": { "model_channels": 32, "output_channels_lang": 3072, "output_channels_phy": 14, "use_fp16": true } }, "decoder": { "name": "ElasticSLatMeshDecodernew", "args": { "resolution": 64, "model_channels": 768, "phy_channels": 2048, "latent_channels": 8, "num_blocks": 12, "num_heads": 12, "mlp_ratio": 4, "attn_mode": "swin", "window_size": 8, "use_fp16": true, "representation_config": { "use_color": true } } } }, "dataset": { "name": "Slat2RenderGeomesh", "args": { "image_size": 384, "latent_model": "dinov2_vitl14_reg_slat_enc_swin8_B_64l8_fp16", "min_aesthetic_score": 4.5, "max_num_voxels": 28000 } }, "trainer": { "name": "SLatVaeMeshTrainer", "args": { "onlyphy_property": true, "max_steps": 1000000, "batch_size_per_gpu": 4, "batch_split": 4, "optimizer": { "name": "AdamW", "args": { "lr": 0.0001, "weight_decay": 0.0 } }, "ema_rate": [ 0.9999 ], "fp16_mode": "inflat_all", "fp16_scale_growth": 0.001, "elastic": { "name": "LinearMemoryController", "args": { "target_ratio": 0.6, "max_mem_ratio_start": 0.5 } }, "grad_clip": { "name": "AdaptiveGradClipper", "args": { "max_norm": 1.0, "clip_percentile": 95 } }, "i_log": 10, "i_sample": 5000, "i_save": 10000, "lambda_ssim": 0.2, "lambda_lpips": 0.2, "lambda_tsdf": 0.01, "lambda_depth": 10.0, "lambda_color": 0.1, "lambda_kl": 1e-06, "depth_loss_type": "smooth_l1" } } }