submission-template-mobilevit

Sleeping

Guill-Bla commited on Jan 30

Commit

f82759b

verified ·

1 Parent(s): cccaa05

Update tasks/image.py

Files changed (1) hide show

tasks/image.py CHANGED Viewed

@@ -35,13 +35,15 @@ model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
 model.eval()
 def preprocess(image):
-    image = image.resize((512,512))
-    image = np.array(image)[:, :, ::-1]  # Convert RGB to BGR
-    # Normalize pixel values to [0, 1]
     image = np.array(image, dtype=np.float32) / 255.0
     return image
 def get_bounding_boxes_from_mask(mask):
     """Extract bounding boxes from a binary mask."""
     pred_boxes = []
@@ -165,6 +167,7 @@ async def evaluate_image(request: ImageEvaluationRequest):
         # Model Inference
         # image_input = feature_extractor(images=image, return_tensors="pt").pixel_values
         image_input = feature_extractor(images=image, return_tensors="pt", padding=True).pixel_values
         with torch.no_grad():
             outputs = model(pixel_values=image_input)
@@ -173,7 +176,7 @@ async def evaluate_image(request: ImageEvaluationRequest):
         probabilities = torch.sigmoid(logits)
         predicted_mask = (probabilities[0, 1] > 0.30).cpu().numpy().astype(np.uint8)
         # predicted_mask_resized = cv2.resize(predicted_mask, (512, 512), interpolation=cv2.INTER_NEAREST)
-        predicted_mask_resized = cv2.resize(predicted_mask, original_shape, interpolation=cv2.INTER_NEAREST)
         # Extract predicted bounding boxes

 model.eval()
 def preprocess(image):
+    image = image.resize((512, 512))
+    image = np.array(image)[:, :, ::-1]  # RGB to BGR
     image = np.array(image, dtype=np.float32) / 255.0
+    # Convert back to PIL Image to maintain compatibility with feature extractor
+    image = Image.fromarray((image * 255).astype(np.uint8))
     return image
 def get_bounding_boxes_from_mask(mask):
     """Extract bounding boxes from a binary mask."""
     pred_boxes = []
         # Model Inference
         # image_input = feature_extractor(images=image, return_tensors="pt").pixel_values
         image_input = feature_extractor(images=image, return_tensors="pt", padding=True).pixel_values
+        image_input = feature_extractor(images=[image], return_tensors="pt", padding=True).pixel_values
         with torch.no_grad():
             outputs = model(pixel_values=image_input)
         probabilities = torch.sigmoid(logits)
         predicted_mask = (probabilities[0, 1] > 0.30).cpu().numpy().astype(np.uint8)
         # predicted_mask_resized = cv2.resize(predicted_mask, (512, 512), interpolation=cv2.INTER_NEAREST)
+        predicted_mask_resized = cv2.resize(predicted_mask, original_shape[::-1], interpolation=cv2.INTER_NEAREST)
         # Extract predicted bounding boxes