File size: 2,162 Bytes
ebe3500 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | {
"model": {
"type": "detector-v4",
"labels": [
"interaction"
],
"num_queries": 12,
"num_classes": 2,
"use_stuff_head": true,
"stuff_classes": [
0
],
"mlp_point_proj": false,
"full_in_channels": 1232,
"hidden_channels": 256,
"num_heads": 16,
"depth": 3,
"mlp_ratio": 4.0,
"qkv_bias": true,
"qk_scale": null,
"layer_scale": null,
"pre_norm": true,
"query_type": "learned",
"pos_emb": true,
"supervise_attn_mask": true,
"enable_flash": false,
"upcast_attention": false,
"upcast_softmax": false,
"backbone": {
"type": "PT-v3m2",
"in_channels": 4,
"order": [
"hilbert",
"hilbert-trans",
"z",
"z-trans"
],
"stride": [
2,
2,
2,
2
],
"enc_depths": [
3,
3,
3,
9,
3
],
"enc_channels": [
48,
96,
192,
384,
512
],
"enc_num_head": [
3,
6,
12,
24,
32
],
"enc_patch_size": [
256,
256,
256,
256,
256
],
"mlp_ratio": 4,
"qkv_bias": true,
"qk_scale": null,
"layer_scale": 1e-05,
"attn_drop": 0.0,
"proj_drop": 0.0,
"drop_path": 0.3,
"shuffle_orders": true,
"pre_norm": true,
"enable_rpe": false,
"enable_flash": true,
"upcast_attention": false,
"upcast_softmax": false,
"traceable": false,
"mask_token": false,
"enc_mode": true,
"freeze_encoder": false
},
"criteria": [
{
"type": "FastUnifiedInstanceLoss",
"cost_mask": 1.0,
"cost_dice": 1.0,
"cost_class": 1.0,
"loss_weight_focal": 2.0,
"loss_weight_dice": 5.0,
"cls_weight_matched": 2.0,
"cls_weight_noobj": 0.5,
"focal_alpha": 0.25,
"focal_gamma": 2.0,
"aux_loss_weight": 1.0,
"num_points": 100000,
"truth_label": "instance",
"noobj_mask_loss_weight": 0.0
}
]
}
}
|