| { |
| "models": { |
| "denoiser": { |
| "name": "ElasticSLatCondFlowModel", |
| "args": { |
| "resolution": 64, |
| "in_channels": 8, |
| "out_channels": 8, |
| "model_channels": 1024, |
| "cond_channels": 1024, |
| "num_blocks": 24, |
| "num_heads": 16, |
| "mlp_ratio": 4, |
| "patch_size": 2, |
| "num_io_res_blocks": 2, |
| "io_block_channels": [ |
| 128 |
| ], |
| "pe_mode": "ape", |
| "qk_rms_norm": true, |
| "use_fp16": true, |
| "use_point_embedder": true, |
| "point_embedder_out_channels": 1024, |
| "use_mask_embedder": true, |
| "mask_embedder_out_channels": 1024, |
| "use_pose_embedder": true, |
| "pose_embedder_out_channels": 1024, |
| "pose_representation": "9d_translation_scale" |
| } |
| } |
| }, |
| "dataset": { |
| "name": "WebDatasetMultiViewStructuredLatent", |
| "s3_data_root": "s3://data_root", |
| "args": { |
| "shards": "ABO_wds/shard-{000000..000398},HSSD_wds/shard-{000000..000559},Objaverse_recgen_new_wds/shard-{000000..008374},Objaverse_recgen_new_wds/shard-{010000..018208},PartNeXt_wds_full/shard-{000000..001981}}.tar", |
| "num_views": 2, |
| "image_size": 518, |
| "min_aesthetic_score": 4.5, |
| "shuffle_buffer": 200, |
| "length": 189119, |
| "require_pose_data": true, |
| "require_pointmap": true, |
| "use_pose_normalization": true, |
| "pose_variant": "median_quantile_5per", |
| "mix_stereo_depth": true, |
| "normalization": { |
| "mean": [ |
| -2.1687545776367188, |
| -0.004347046371549368, |
| -0.13352349400520325, |
| -0.08418072760105133, |
| -0.5271206498146057, |
| 0.7238689064979553, |
| -1.1414450407028198, |
| 1.2039363384246826 |
| ], |
| "std": [ |
| 2.377650737762451, |
| 2.386378288269043, |
| 2.124418020248413, |
| 2.1748552322387695, |
| 2.663944721221924, |
| 2.371192216873169, |
| 2.6217446327209473, |
| 2.684523105621338 |
| ] |
| }, |
| "metadata_root": "/tmp/wds_metadata" |
| } |
| }, |
| "trainer": { |
| "name": "MultiImageConditionedSparseFlowMatchingCFGTrainer", |
| "args": { |
| "max_steps": 100000, |
| "batch_size_per_gpu": 8, |
| "batch_split": 2, |
| "finetune_ckpt": { |
| "denoiser": "checkpoints/TRELLIS-image-large/ckpts/slat_flow_img_dit_L_64l8p2_fp16.pt" |
| }, |
| "optimizer": { |
| "name": "AdamW", |
| "args": { |
| "lr": 0.0001, |
| "weight_decay": 0.0 |
| } |
| }, |
| "ema_rate": [ |
| 0.9999 |
| ], |
| "fp16_mode": "inflat_all", |
| "fp16_scale_growth": 0.001, |
| "elastic": { |
| "name": "LinearMemoryController", |
| "args": { |
| "target_ratio": 0.85, |
| "max_mem_ratio_start": 0.6 |
| } |
| }, |
| "grad_clip": { |
| "name": "AdaptiveGradClipper", |
| "args": { |
| "max_norm": 1.0, |
| "clip_percentile": 95 |
| } |
| }, |
| "i_print": 200, |
| "i_log": 200, |
| "i_sample": 1000, |
| "i_save": 5000, |
| "i_ddpcheck": 5000, |
| "p_uncond": 0.1, |
| "p_single_view": 0.33, |
| "single_view_drop_mode": "last", |
| "t_schedule": { |
| "name": "logitNormal", |
| "args": { |
| "mean": 1.0, |
| "std": 1.0 |
| } |
| }, |
| "sigma_min": 1e-05, |
| "image_cond_model": "dinov2_vitl14_reg", |
| "use_pose_conditioning": true, |
| "pose_representation": "9d_translation_scale", |
| "enable_snapshot": true |
| } |
| }, |
| "use_wandb": true, |
| "wandb_name": "slat-multiview-2pose-s3", |
| "wandb_config": { |
| "experiment_type": "slat_multiview_2pose", |
| "datasets": [ |
| "ABO_wds", |
| "HSSD_wds", |
| "Objaverse_recgen_new_wds", |
| "PartNeXt_wds" |
| ], |
| "num_views": 2, |
| "notes": "SLat multi-view training with per-view additive pose conditioning (9D rotation). Stereo depth mixing, flexible view dropout (33% single), shared CFG dropout (10% uncond). Robust pose normalization (median_quantile_5per). S3 streaming." |
| } |
| } |