| { | |
| "backbone": { | |
| "embed_dim": 256, | |
| "embed_layer": "ConvPyramidPatchEmbed", | |
| "ffn_layer": "mlp", | |
| "ffn_ratio": 4, | |
| "interaction_indexes": [ | |
| 10, | |
| 11 | |
| ], | |
| "name": "ecvittplus", | |
| "num_heads": 4, | |
| "num_levels": 3, | |
| "patch_size": 16, | |
| "proj_dim": null, | |
| "skip_load_backbone": false | |
| }, | |
| "decoder": { | |
| "activation": "silu", | |
| "aux_loss": true, | |
| "box_noise_scale": 1.0, | |
| "cross_attn_method": "default", | |
| "dim_feedforward": 1024, | |
| "dropout": 0.0, | |
| "eps": 0.01, | |
| "eval_idx": -1, | |
| "eval_spatial_size": [ | |
| 640, | |
| 640 | |
| ], | |
| "feat_channels": [ | |
| 256, | |
| 256, | |
| 256 | |
| ], | |
| "feat_strides": [ | |
| 8, | |
| 16, | |
| 32 | |
| ], | |
| "hidden_dim": 256, | |
| "label_noise_ratio": 0.5, | |
| "layer_scale": 1, | |
| "learn_query_content": false, | |
| "mask_downsample_ratio": null, | |
| "nhead": 8, | |
| "num_classes": 80, | |
| "num_denoising": 100, | |
| "num_layers": 4, | |
| "num_levels": 3, | |
| "num_points": [ | |
| 3, | |
| 6, | |
| 3 | |
| ], | |
| "num_queries": 300, | |
| "query_select_method": "default", | |
| "reg_max": 32, | |
| "reg_scale": 4, | |
| "share_bbox_head": false, | |
| "share_score_head": false | |
| }, | |
| "encoder": { | |
| "act": "silu", | |
| "csp_type": "csp2", | |
| "depth_mult": 0.67, | |
| "dim_feedforward": 512, | |
| "dropout": 0.0, | |
| "eval_spatial_size": null, | |
| "expansion": 0.75, | |
| "feat_strides": [ | |
| 8, | |
| 16, | |
| 32 | |
| ], | |
| "fuse_op": "sum", | |
| "hidden_dim": 256, | |
| "in_channels": [ | |
| 256, | |
| 256, | |
| 256 | |
| ], | |
| "nhead": 8, | |
| "num_encoder_layers": 1, | |
| "pe_temperature": 10000, | |
| "use_encoder_idx": [ | |
| 2 | |
| ] | |
| }, | |
| "postprocessor": { | |
| "num_classes": 80, | |
| "num_top_queries": 300, | |
| "remap_mscoco_category": false, | |
| "use_focal_loss": true | |
| } | |
| } |