Upload model

Browse files

Files changed (3) hide show

config.json +1 -0
configuration_rf_detr.py +3 -3
modeling_rf_detr.py +6 -12

config.json CHANGED Viewed

@@ -11,6 +11,7 @@
   "ca_nheads": 16,
   "dec_layers": 3,
   "dec_n_points": 2,
   "encoder": "dinov2_windowed_small",
   "gradient_checkpointing": false,
   "group_detr": 13,

   "ca_nheads": 16,
   "dec_layers": 3,
   "dec_n_points": 2,
+  "device": "cpu",
   "encoder": "dinov2_windowed_small",
   "gradient_checkpointing": false,
   "group_detr": 13,

configuration_rf_detr.py CHANGED Viewed

@@ -6,7 +6,7 @@ from optimum.exporters.onnx.model_configs import ViTOnnxConfig
 ### modified from https://github.com/roboflow/rf-detr/blob/main/rfdetr/config.py
-#DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
 class RFDetrConfig(PretrainedConfig):
     model_type = 'rf-detr'
@@ -24,7 +24,7 @@ class RFDetrConfig(PretrainedConfig):
         amp: bool = True,
         num_classes: int = 90,
         num_queries: int  = 300,
-        # device: Literal["cpu", "cuda", "mps"] = DEVICE,
         resolution: int = 560,
         group_detr: int = 13,
         gradient_checkpointing: bool = False,
@@ -40,7 +40,7 @@ class RFDetrConfig(PretrainedConfig):
         self.layer_norm = layer_norm
         self.amp = amp
         self.num_classes = num_classes
-        # self.device = device
         self.resolution = resolution
         self.group_detr = group_detr
         self.gradient_checkpointing = gradient_checkpointing

 ### modified from https://github.com/roboflow/rf-detr/blob/main/rfdetr/config.py
+DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
 class RFDetrConfig(PretrainedConfig):
     model_type = 'rf-detr'
         amp: bool = True,
         num_classes: int = 90,
         num_queries: int  = 300,
+        device: Literal["cpu", "cuda", "mps"] = DEVICE,
         resolution: int = 560,
         group_detr: int = 13,
         gradient_checkpointing: bool = False,
         self.layer_norm = layer_norm
         self.amp = amp
         self.num_classes = num_classes
+        self.device = device
         self.resolution = resolution
         self.group_detr = group_detr
         self.gradient_checkpointing = gradient_checkpointing

modeling_rf_detr.py CHANGED Viewed

@@ -40,7 +40,7 @@ class RFDetrModelForObjectDetection(PreTrainedModel):
             layer_norm = config.layer_norm,
             amp = config.amp,
             num_classes = config.num_classes,
-            #device = config.device,
             resolution = config.resolution,
             group_detr = config.group_detr,
             gradient_checkpointing = config.gradient_checkpointing,
@@ -106,19 +106,15 @@ class RFDetrModelForObjectDetection(PreTrainedModel):
         wr = self.config.resolution / float(w)
         for label in labels:
-            boxes = label["boxes"]#.to(device=self.config.device, dtype=torch.float32)
             # resize boxes to model's resolution
             boxes[:, [0, 2]] *= wr
             boxes[:, [1, 3]] *= hr
-            # boxes[:, 0] *= wr
-            # boxes[:, 1] *= hr
-            # boxes[:, 2] *= wr
-            # boxes[:, 3] *= hr
             # normalize to [0, 1] by model's resolution
             boxes[:] /= self.config.resolution
             label["boxes"] = boxes
-            # if "labels" in label:
-            #     label["labels"] = label["labels"].to(self.config.device)
     def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor, labels=None, **kwargs) -> ModelOutput:
         resize = Resize((self.config.resolution, self.config.resolution))
@@ -135,16 +131,14 @@ class RFDetrModelForObjectDetection(PreTrainedModel):
             self.criterion.training = False
         # resize pixel values and mask to model's resolution
-        # pixel_values = pixel_values.to(self.config.device)
-        # pixel_mask = pixel_mask.to(self.config.device)
         pixel_values = resize(pixel_values)
         pixel_mask = resize(pixel_mask)
         samples = NestedTensor(pixel_values, pixel_mask)
         outputs = self.model(samples)
-        #TODO: check format of pred_boxes
         # compute loss, return none and empty dict if not training
         loss, loss_dict = self.compute_loss(labels, outputs)

             layer_norm = config.layer_norm,
             amp = config.amp,
             num_classes = config.num_classes,
+            device = config.device,
             resolution = config.resolution,
             group_detr = config.group_detr,
             gradient_checkpointing = config.gradient_checkpointing,
         wr = self.config.resolution / float(w)
         for label in labels:
+            boxes = label["boxes"].to(device=self.config.device, dtype=torch.float32)
             # resize boxes to model's resolution
             boxes[:, [0, 2]] *= wr
             boxes[:, [1, 3]] *= hr
             # normalize to [0, 1] by model's resolution
             boxes[:] /= self.config.resolution
             label["boxes"] = boxes
+            if "labels" in label:
+                label["labels"] = label["labels"].to(self.config.device)
     def forward(self, pixel_values: torch.Tensor, pixel_mask: torch.Tensor, labels=None, **kwargs) -> ModelOutput:
         resize = Resize((self.config.resolution, self.config.resolution))
             self.criterion.training = False
         # resize pixel values and mask to model's resolution
+        pixel_values = pixel_values.to(self.config.device)
+        pixel_mask = pixel_mask.to(self.config.device)
         pixel_values = resize(pixel_values)
         pixel_mask = resize(pixel_mask)
         samples = NestedTensor(pixel_values, pixel_mask)
         outputs = self.model(samples)
         # compute loss, return none and empty dict if not training
         loss, loss_dict = self.compute_loss(labels, outputs)