{ "backbone": { "embed_dim": 384, "embed_layer": "ConvPyramidPatchEmbed", "ffn_layer": "mlp", "ffn_ratio": 4, "interaction_indexes": [ 10, 11 ], "name": "ecpose_vits", "num_heads": 6, "num_levels": 3, "patch_size": 16, "proj_dim": 256, "skip_load_backbone": false }, "decoder": { "activation": "relu", "aux_loss": true, "cls_no_bias": false, "dec_n_points": 4, "dec_pred_class_embed_share": false, "dec_pred_pose_embed_share": false, "dim_feedforward": 1024, "dropout": 0.0, "eval_spatial_size": [ 640, 640 ], "feat_strides": [ 8, 16, 32 ], "hidden_dim": 256, "learnable_tgt_init": true, "nhead": 8, "normalize_before": false, "num_body_points": 17, "num_decoder_layers": 4, "num_feature_levels": 3, "num_queries": 60, "reg_max": 32, "reg_scale": 4, "return_intermediate_dec": true, "two_stage_bbox_embed_share": false, "two_stage_class_embed_share": false, "two_stage_type": "standard" }, "encoder": { "act": "silu", "csp_type": "csp2", "depth_mult": 1, "dim_feedforward": 1024, "dropout": 0.0, "expansion": 0.75, "feat_strides": [ 8, 16, 32 ], "fuse_op": "sum", "hidden_dim": 256, "in_channels": [ 256, 256, 256 ], "nhead": 8, "num_encoder_layers": 1, "use_encoder_idx": [ 2 ] }, "postprocessor": { "num_body_points": 17, "num_select": 60 } }