{ "cfg": { "decoder": { "d_model": 768, "freeze_text_encoder": true, "input_feat_dim": 256, "num_decoder_layers": 8, "num_queries": 256, "text_encoder": "clip", "transformer_dim_feedforward": 3072, "transformer_dropout": 0.1, "transformer_max_drop_path": 0.0, "transformer_n_heads": 12, "transformer_use_checkpointing": true }, "encoder": { "embed_dim": 256, "input_feat_dim": 1536, "num_rgb_harmonic_functions": 16, "ptv3_args": { "dec_channels": [ 256, 256, 512, 512 ], "enc_channels": [ 256, 256, 512, 512, 1024 ], "in_channels": 256 }, "rgb_proj_dim": 256 } } }