submission-template-mobilevit

Sleeping

App Files Files Community

Guill-Bla commited on Jan 29, 2025

Commit

f7ad336

verified ·

1 Parent(s): 05104a4

Update tasks/image.py

Browse files

Files changed (1) hide show

tasks/image.py +22 -4

tasks/image.py CHANGED Viewed

@@ -13,7 +13,6 @@ from PIL import Image
 from transformers import MobileViTImageProcessor, MobileViTForSemanticSegmentation
 import cv2
 from tqdm import tqdm
-from dataset import WildfireSmokeDataset
 from torch.utils.data import DataLoader
 from dotenv import load_dotenv
@@ -30,6 +29,19 @@ model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobile
 model.load_state_dict(torch.load(model_path))
 model.eval()
 def get_bounding_boxes_from_mask(mask):
     """Extract bounding boxes from a binary mask."""
     pred_boxes = []
@@ -39,7 +51,7 @@ def get_bounding_boxes_from_mask(mask):
             x, y, w, h = cv2.boundingRect(contour)
             pred_boxes.append((x, y, x + w, y + h))
     return pred_boxes
 def parse_boxes(annotation_string):
     """Parse multiple boxes from a single annotation string.
     Each box has 5 values: class_id, x_center, y_center, width, height"""
@@ -130,6 +142,10 @@ async def evaluate_image(request: ImageEvaluationRequest):
     for example in test_dataset:
         # Extract image and annotations
         image = example["image"]
         annotation = example.get("annotations", "").strip()
@@ -154,8 +170,10 @@ async def evaluate_image(request: ImageEvaluationRequest):
         probabilities = torch.sigmoid(logits)
         predicted_mask = (probabilities[0, 1] > 0.30).cpu().numpy().astype(np.uint8)
-        predicted_mask_resized = cv2.resize(predicted_mask, (512, 512), interpolation=cv2.INTER_NEAREST)
         # Extract predicted bounding boxes
         predicted_boxes = get_bounding_boxes_from_mask(predicted_mask_resized)
         pred_boxes.append(predicted_boxes)

 from transformers import MobileViTImageProcessor, MobileViTForSemanticSegmentation
 import cv2
 from tqdm import tqdm
 from torch.utils.data import DataLoader
 from dotenv import load_dotenv
 model.load_state_dict(torch.load(model_path))
 model.eval()
+def preprocess(image):
+    image = image.resize((512,512))
+    # Convert to BGR
+    image = np.array(image)[:, :, ::-1]  # Convert RGB to BGR
+    image = Image.fromarray(image)
+    image = image.resize(self.image_size)
+    # Normalize pixel values to [0, 1]
+    image = np.array(image, dtype=np.float32) / 255.0
+    return image
 def get_bounding_boxes_from_mask(mask):
     """Extract bounding boxes from a binary mask."""
     pred_boxes = []
             x, y, w, h = cv2.boundingRect(contour)
             pred_boxes.append((x, y, x + w, y + h))
     return pred_boxes
 def parse_boxes(annotation_string):
     """Parse multiple boxes from a single annotation string.
     Each box has 5 values: class_id, x_center, y_center, width, height"""
     for example in test_dataset:
         # Extract image and annotations
         image = example["image"]
+        original_shape = (len(image), len(image[0]))
+        image = preprocess(image)
         annotation = example.get("annotations", "").strip()
         probabilities = torch.sigmoid(logits)
         predicted_mask = (probabilities[0, 1] > 0.30).cpu().numpy().astype(np.uint8)
+        # predicted_mask_resized = cv2.resize(predicted_mask, (512, 512), interpolation=cv2.INTER_NEAREST)
+        predicted_mask_resized = cv2.resize(predicted_mask, original_shape, interpolation=cv2.INTER_NEAREST)
         # Extract predicted bounding boxes
         predicted_boxes = get_bounding_boxes_from_mask(predicted_mask_resized)
         pred_boxes.append(predicted_boxes)