{
  "backbone": {
    "embed_dim": 384,
    "embed_layer": "ConvPyramidPatchEmbed",
    "ffn_layer": "mlp",
    "ffn_ratio": 4,
    "interaction_indexes": [
      10,
      11
    ],
    "name": "ecpose_vits",
    "num_heads": 6,
    "num_levels": 3,
    "patch_size": 16,
    "proj_dim": 256,
    "skip_load_backbone": false
  },
  "decoder": {
    "activation": "relu",
    "aux_loss": true,
    "cls_no_bias": false,
    "dec_n_points": 4,
    "dec_pred_class_embed_share": false,
    "dec_pred_pose_embed_share": false,
    "dim_feedforward": 1024,
    "dropout": 0.0,
    "eval_spatial_size": [
      640,
      640
    ],
    "feat_strides": [
      8,
      16,
      32
    ],
    "hidden_dim": 256,
    "learnable_tgt_init": true,
    "nhead": 8,
    "normalize_before": false,
    "num_body_points": 17,
    "num_decoder_layers": 4,
    "num_feature_levels": 3,
    "num_queries": 60,
    "reg_max": 32,
    "reg_scale": 4,
    "return_intermediate_dec": true,
    "two_stage_bbox_embed_share": false,
    "two_stage_class_embed_share": false,
    "two_stage_type": "standard"
  },
  "encoder": {
    "act": "silu",
    "csp_type": "csp2",
    "depth_mult": 1,
    "dim_feedforward": 1024,
    "dropout": 0.0,
    "expansion": 0.75,
    "feat_strides": [
      8,
      16,
      32
    ],
    "fuse_op": "sum",
    "hidden_dim": 256,
    "in_channels": [
      256,
      256,
      256
    ],
    "nhead": 8,
    "num_encoder_layers": 1,
    "use_encoder_idx": [
      2
    ]
  },
  "postprocessor": {
    "num_body_points": 17,
    "num_select": 60
  }
}