| { | |
| "models": { | |
| "encoder": { | |
| "name": "SparseStructureEncoder", | |
| "args": { | |
| "in_channels": 1, | |
| "latent_channels": 8, | |
| "num_res_blocks": 2, | |
| "num_res_blocks_middle": 2, | |
| "channels": [ | |
| 32, | |
| 128, | |
| 512 | |
| ], | |
| "use_fp16": true | |
| } | |
| }, | |
| "decoder": { | |
| "name": "SparseStructureDecoder", | |
| "args": { | |
| "out_channels": 1, | |
| "latent_channels": 8, | |
| "num_res_blocks": 2, | |
| "num_res_blocks_middle": 2, | |
| "channels": [ | |
| 512, | |
| 128, | |
| 32 | |
| ], | |
| "use_fp16": true | |
| } | |
| } | |
| }, | |
| "dataset": { | |
| "name": "SparseStructure", | |
| "args": { | |
| "resolution": 64, | |
| "min_aesthetic_score": 4.5, | |
| "min_num_voxels": 1024 | |
| } | |
| }, | |
| "trainer": { | |
| "name": "SparseStructureVaeTrainer", | |
| "args": { | |
| "max_steps": 1000000, | |
| "batch_size_per_gpu": 16, | |
| "batch_split": 1, | |
| "num_workers": 4, | |
| "prefetch_factor": 2, | |
| "optimizer": { | |
| "name": "AdamW", | |
| "args": { | |
| "lr": 0.0001, | |
| "weight_decay": 0.0 | |
| } | |
| }, | |
| "ema_rate": [ | |
| 0.9999 | |
| ], | |
| "fp16_mode": "inflat_all", | |
| "fp16_scale_growth": 0.001, | |
| "grad_clip": { | |
| "name": "AdaptiveGradClipper", | |
| "args": { | |
| "max_norm": 1.0, | |
| "clip_percentile": 95 | |
| } | |
| }, | |
| "i_log": 1, | |
| "i_sample": 1000, | |
| "i_save": 2000, | |
| "loss_type": "dice", | |
| "lambda_kl": 0.001, | |
| "finetune_ckpt": { | |
| "encoder": "/data-nas/models/TRELLIS/TRELLIS-image-large/ckpts/ss_enc_conv3d_16l8_fp16.ckpt", | |
| "decoder": "/data-nas/models/TRELLIS/TRELLIS-image-large/ckpts/ss_dec_conv3d_16l8_fp16.ckpt" | |
| } | |
| } | |
| } | |
| } |