ustc-community
/

dfine-small-coco

@@ -1,10 +1,8 @@
 {
   "activation_dropout": 0.0,
   "activation_function": "silu",
   "anchor_image_size": null,
-  "architectures": [
-    "DFineForObjectDetection"
-  ],
   "attention_dropout": 0.0,
   "auxiliary_loss": true,
   "backbone": null,
@@ -30,51 +28,52 @@
     ],
     "stage_config": [
       [
         64,
-        64,
-        128,
         1,
         false,
         false,
         3,
-        6
       ],
       [
-        128,
-        128,
-        512,
-        2,
         true,
         false,
         3,
-        6
       ],
       [
-        512,
         256,
-        1024,
-        5,
         true,
         true,
         5,
-        6
       ],
       [
-        1024,
         512,
-        2048,
-        2,
         true,
         true,
         5,
-        6
       ]
     ],
     "stem_channels": [
       3,
-      32,
-      64
-    ]
   },
   "backbone_kwargs": null,
   "batch_norm_eps": 1e-05,
@@ -84,19 +83,15 @@
   "decoder_attention_heads": 8,
   "decoder_ffn_dim": 1024,
   "decoder_in_channels": [
-    384,
-    384,
-    384
   ],
-  "decoder_layers": 6,
   "decoder_method": "default",
-  "decoder_n_points": [
-    3,
-    6,
-    3
-  ],
   "decoder_offset_scale": 0.5,
-  "depth_mult": 1.0,
   "disable_custom_kernels": true,
   "dropout": 0.0,
   "encode_proj_layers": [
@@ -104,12 +99,12 @@
   ],
   "encoder_activation_function": "gelu",
   "encoder_attention_heads": 8,
-  "encoder_ffn_dim": 2048,
-  "encoder_hidden_dim": 384,
   "encoder_in_channels": [
     512,
-    1024,
-    2048
   ],
   "encoder_layers": 1,
   "eos_coefficient": 0.0001,
@@ -123,7 +118,7 @@
   "focal_loss_alpha": 0.75,
   "focal_loss_gamma": 2.0,
   "freeze_backbone_batch_norms": true,
-  "hidden_expansion": 1.0,
   "id2label": {
     "0": "person",
     "1": "bicycle",
@@ -311,7 +306,6 @@
   "reg_max": 32,
   "reg_scale": 4.0,
   "top_prob_values": 4,
-  "torch_dtype": "float32",
   "transformers_version": "4.49.0.dev0",
   "use_focal_loss": true,
   "use_pretrained_backbone": false,

 {
+  "_attn_implementation_autoset": true,
   "activation_dropout": 0.0,
   "activation_function": "silu",
   "anchor_image_size": null,
   "attention_dropout": 0.0,
   "auxiliary_loss": true,
   "backbone": null,
     ],
     "stage_config": [
       [
+        16,
+        16,
         64,
         1,
         false,
         false,
         3,
+        3
       ],
       [
+        64,
+        32,
+        256,
+        1,
         true,
         false,
         3,
+        3
       ],
       [
         256,
+        64,
+        512,
+        2,
         true,
         true,
         5,
+        3
       ],
       [
         512,
+        128,
+        1024,
+        1,
         true,
         true,
         5,
+        3
       ]
     ],
     "stem_channels": [
       3,
+      16,
+      16
+    ],
+    "use_lab": true
   },
   "backbone_kwargs": null,
   "batch_norm_eps": 1e-05,
   "decoder_attention_heads": 8,
   "decoder_ffn_dim": 1024,
   "decoder_in_channels": [
+    256,
+    256,
+    256
   ],
+  "decoder_layers": 3,
   "decoder_method": "default",
+  "decoder_n_points": 4,
   "decoder_offset_scale": 0.5,
+  "depth_mult": 0.34,
   "disable_custom_kernels": true,
   "dropout": 0.0,
   "encode_proj_layers": [
   ],
   "encoder_activation_function": "gelu",
   "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 1024,
+  "encoder_hidden_dim": 256,
   "encoder_in_channels": [
+    256,
     512,
+    1024
   ],
   "encoder_layers": 1,
   "eos_coefficient": 0.0001,
   "focal_loss_alpha": 0.75,
   "focal_loss_gamma": 2.0,
   "freeze_backbone_batch_norms": true,
+  "hidden_expansion": 0.5,
   "id2label": {
     "0": "person",
     "1": "bicycle",
   "reg_max": 32,
   "reg_scale": 4.0,
   "top_prob_values": 4,
   "transformers_version": "4.49.0.dev0",
   "use_focal_loss": true,
   "use_pretrained_backbone": false,