stevenbucaille
/

rf-detr-large

@@ -13,30 +13,30 @@
     "drop_path_rate": 0.0,
     "hidden_act": "gelu",
     "hidden_dropout_prob": 0.0,
-    "hidden_size": 768,
-    "image_size": 518,
     "initializer_range": 0.02,
     "layer_norm_eps": 1e-06,
     "layerscale_value": 1.0,
     "mlp_ratio": 4,
     "model_type": "rf_detr_dinov2",
-    "num_attention_heads": 12,
     "num_channels": 3,
     "num_hidden_layers": 12,
-    "num_windows": 4,
     "out_features": [
-      "stage2",
-      "stage5",
-      "stage8",
-      "stage11"
     ],
     "out_indices": [
-      2,
-      5,
-      8,
-      11
     ],
-    "patch_size": 14,
     "qkv_bias": true,
     "reshape_hidden_states": true,
     "stage_names": [
@@ -59,12 +59,13 @@
     "window_block_indexes": [
       0,
       1,
-      3,
       4,
-      6,
       7,
-      9,
-      10
     ]
   },
   "bbox_cost": 5,
@@ -72,13 +73,13 @@
   "c2f_num_blocks": 3,
   "class_cost": 2,
   "class_loss_coefficient": 1,
-  "d_model": 384,
   "decoder_activation_function": "relu",
-  "decoder_cross_attention_heads": 24,
   "decoder_ffn_dim": 2048,
-  "decoder_layers": 3,
-  "decoder_n_points": 4,
-  "decoder_self_attention_heads": 12,
   "dice_loss_coefficient": 1,
   "disable_custom_kernels": true,
   "dropout": 0.1,
@@ -283,17 +284,15 @@
   "mask_loss_coefficient": 1,
   "mask_point_sample_ratio": 16,
   "model_type": "rf_detr",
-  "num_feature_levels": 2,
   "num_queries": 300,
   "projector_in_channels": [
-    384,
-    384
   ],
-  "projector_out_channels": 384,
   "projector_scale_factors": [
-    2.0,
-    0.5
   ],
   "segmentation_head_activation_function": "gelu",
-  "transformers_version": "5.0.0.dev0"
 }

     "drop_path_rate": 0.0,
     "hidden_act": "gelu",
     "hidden_dropout_prob": 0.0,
+    "hidden_size": 384,
+    "image_size": 704,
     "initializer_range": 0.02,
     "layer_norm_eps": 1e-06,
     "layerscale_value": 1.0,
     "mlp_ratio": 4,
     "model_type": "rf_detr_dinov2",
+    "num_attention_heads": 6,
     "num_channels": 3,
     "num_hidden_layers": 12,
+    "num_windows": 2,
     "out_features": [
+      "stage3",
+      "stage6",
+      "stage9",
+      "stage12"
     ],
     "out_indices": [
+      3,
+      6,
+      9,
+      12
     ],
+    "patch_size": 16,
     "qkv_bias": true,
     "reshape_hidden_states": true,
     "stage_names": [
     "window_block_indexes": [
       0,
       1,
+      2,
       4,
+      5,
       7,
+      8,
+      10,
+      11
     ]
   },
   "bbox_cost": 5,
   "c2f_num_blocks": 3,
   "class_cost": 2,
   "class_loss_coefficient": 1,
+  "d_model": 256,
   "decoder_activation_function": "relu",
+  "decoder_cross_attention_heads": 16,
   "decoder_ffn_dim": 2048,
+  "decoder_layers": 4,
+  "decoder_n_points": 2,
+  "decoder_self_attention_heads": 8,
   "dice_loss_coefficient": 1,
   "disable_custom_kernels": true,
   "dropout": 0.1,
   "mask_loss_coefficient": 1,
   "mask_point_sample_ratio": 16,
   "model_type": "rf_detr",
+  "num_feature_levels": 1,
   "num_queries": 300,
   "projector_in_channels": [
+    256
   ],
+  "projector_out_channels": 256,
   "projector_scale_factors": [
+    1.0
   ],
   "segmentation_head_activation_function": "gelu",
+  "transformers_version": "5.0.1.dev0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b7ad3cc3c67e41679d0630f2c98095551d4a287ffe796c135c21dbea6ba84a2
-size 542617560

 version https://git-lfs.github.com/spec/v1
+oid sha256:366c6e28361817aad7fc4da97db2ce400db177bea7a2f5eb9f7d324e5a07d46f
+size 135796200