Spaces:

BiasLab2025
/

perception

Sleeping

Zhen Ye commited on 27 days ago

Commit

f78d96f

1 Parent(s): 06e44d3

Fix BGR to RGB conversion for DETR and GroundingDino inference

Files changed (2) hide show

models/detectors/detr.py CHANGED Viewed

@@ -44,7 +44,11 @@ class DetrDetector(ObjectDetector):
         )
     def predict(self, frame: np.ndarray, queries: Sequence[str]) -> DetectionResult:
-        inputs = self.processor(images=frame, return_tensors="pt")
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():
             outputs = self.model(**inputs)
@@ -57,7 +61,11 @@ class DetrDetector(ObjectDetector):
         return self._parse_single_result(processed)
     def predict_batch(self, frames: Sequence[np.ndarray], queries: Sequence[str]) -> Sequence[DetectionResult]:
-        inputs = self.processor(images=frames, return_tensors="pt", padding=True)
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():

         )
     def predict(self, frame: np.ndarray, queries: Sequence[str]) -> DetectionResult:
+        # OpenCV frames are BGR, model expects RGB
+        import cv2
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        inputs = self.processor(images=frame_rgb, return_tensors="pt")
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():
             outputs = self.model(**inputs)
         return self._parse_single_result(processed)
     def predict_batch(self, frames: Sequence[np.ndarray], queries: Sequence[str]) -> Sequence[DetectionResult]:
+        # OpenCV frames are BGR, model expects RGB
+        import cv2
+        frames_rgb = [cv2.cvtColor(f, cv2.COLOR_BGR2RGB) for f in frames]
+        inputs = self.processor(images=frames_rgb, return_tensors="pt", padding=True)
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():

models/detectors/grounding_dino.py CHANGED Viewed

@@ -74,8 +74,12 @@ class GroundingDinoDetector(ObjectDetector):
         )
     def predict(self, frame: np.ndarray, queries: Sequence[str]) -> DetectionResult:
         prompt = self._build_prompt(queries)
-        inputs = self.processor(images=frame, text=prompt, return_tensors="pt")
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():
             outputs = self.model(**inputs)
@@ -84,9 +88,13 @@ class GroundingDinoDetector(ObjectDetector):
         return self._parse_single_result(processed_list[0])
     def predict_batch(self, frames: Sequence[np.ndarray], queries: Sequence[str]) -> Sequence[DetectionResult]:
         prompt = self._build_prompt(queries)
         # Same prompt for all frames in batch
-        inputs = self.processor(images=frames, text=[prompt]*len(frames), return_tensors="pt", padding=True)
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():

         )
     def predict(self, frame: np.ndarray, queries: Sequence[str]) -> DetectionResult:
+        # OpenCV frames are BGR, model expects RGB
+        import cv2
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         prompt = self._build_prompt(queries)
+        inputs = self.processor(images=frame_rgb, text=prompt, return_tensors="pt")
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():
             outputs = self.model(**inputs)
         return self._parse_single_result(processed_list[0])
     def predict_batch(self, frames: Sequence[np.ndarray], queries: Sequence[str]) -> Sequence[DetectionResult]:
+        # OpenCV frames are BGR, model expects RGB
+        import cv2
+        frames_rgb = [cv2.cvtColor(f, cv2.COLOR_BGR2RGB) for f in frames]
         prompt = self._build_prompt(queries)
         # Same prompt for all frames in batch
+        inputs = self.processor(images=frames_rgb, text=[prompt]*len(frames), return_tensors="pt", padding=True)
         inputs = {key: value.to(self.device) for key, value in inputs.items()}
         with torch.no_grad():