submission-template-mobilevit

Sleeping

App Files Files Community

Guill-Bla commited on Jan 30

Commit

ca9f528

verified ·

1 Parent(s): 3e4ce08

Update tasks/image.py

Browse files

Files changed (1) hide show

tasks/image.py +98 -44

tasks/image.py CHANGED Viewed

@@ -6,6 +6,8 @@ from sklearn.metrics import accuracy_score
 import random
 import os
 from ultralytics import YOLO
 from .utils.evaluation import ImageEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
@@ -45,7 +47,19 @@ def preprocess(image):
     # Return as a PIL Image for feature extractor compatibility
     return Image.fromarray((image * 255).astype(np.uint8))
 def get_bounding_boxes_from_mask(mask):
     """Extract bounding boxes from a binary mask."""
@@ -126,7 +140,7 @@ async def evaluate_image(request: ImageEvaluationRequest):
     # Load and prepare the dataset
     dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
     # Split dataset
     train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
     test_dataset = dataset["val"]#train_test["test"]
@@ -139,67 +153,107 @@ async def evaluate_image(request: ImageEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline with your model inference
     #--------------------------------------------------------------------------------------------
     predictions = []
     true_labels = []
     pred_boxes = []
     true_boxes_list = []
-    for example in test_dataset:
-        # Extract image and annotations
-        image = example["image"]
-        original_shape = image.size
-        annotation = example.get("annotations", "").strip()
-        has_smoke = len(annotation) > 0
-        true_labels.append(1 if has_smoke else 0)
-        if has_smoke:
-            image_true_boxes = parse_boxes(annotation)
-            if image_true_boxes:
-                true_boxes_list.append(image_true_boxes)
             else:
                 true_boxes_list.append([])
-        else:
-            true_boxes_list.append([])
-        # Model Inference
-        # Preprocess image
-        image = preprocess(image)
-        # Ensure correct feature extraction
-        image_input = feature_extractor(images=image, return_tensors="pt").pixel_values
-        # Perform inference
-        with torch.no_grad():
-            outputs = model(pixel_values=image_input)
-            logits = outputs.logits
-        # Threshold and process the segmentation mask
-        probabilities = torch.sigmoid(logits)
-        predicted_mask = (probabilities[0, 1] > 0.30).cpu().numpy().astype(np.uint8)
-        predicted_mask_resized = cv2.resize(predicted_mask, (512,512), interpolation=cv2.INTER_NEAREST)
-        # Extract bounding boxes
-        predicted_boxes = get_bounding_boxes_from_mask(predicted_mask_resized)
-        pred_boxes.append(predicted_boxes)
-        # Smoke prediction based on bounding box presence
-        predictions.append(1 if len(predicted_boxes) > 0 else 0)
-        print(f"Prediction : {1 if len(predicted_boxes) > 0 else 0}")
-    # Filter only valid box pairs
-    filtered_true_boxes_list = []
-    filtered_pred_boxes = []
-    for true_boxes, pred_boxes_entry in zip(true_boxes_list, pred_boxes):
-        if true_boxes and pred_boxes_entry:
-            filtered_true_boxes_list.append(true_boxes)
-            filtered_pred_boxes.append(pred_boxes_entry)
-    true_boxes_list = filtered_true_boxes_list
-    pred_boxes = filtered_pred_boxes
     #--------------------------------------------------------------------------------------------

 import random
 import os
+from torch.utils.data import DataLoader
 from ultralytics import YOLO
 from .utils.evaluation import ImageEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
     # Return as a PIL Image for feature extractor compatibility
     return Image.fromarray((image * 255).astype(np.uint8))
+def preprocess_batch(images):
+    """
+    Preprocess a batch of images for MobileViT inference.
+    Resize to a fixed size (512, 512) and return as PIL Images.
+    """
+    preprocessed_images = []
+    for image in images:
+        resized_image = image.resize((512, 512))
+        image_array = np.array(resized_image)[:, :, ::-1]  # Convert RGB to BGR
+        image_float = np.array(image_array, dtype=np.float32) / 255.0
+        processed_image = Image.fromarray((image_float * 255).astype(np.uint8))
+        preprocessed_images.append(processed_image)
+    return preprocessed_images
 def get_bounding_boxes_from_mask(mask):
     """Extract bounding boxes from a binary mask."""
     # Load and prepare the dataset
     dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
     # Split dataset
     train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
     test_dataset = dataset["val"]#train_test["test"]
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline with your model inference
     #--------------------------------------------------------------------------------------------
+    dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
     predictions = []
     true_labels = []
     pred_boxes = []
     true_boxes_list = []
+    for batch_idx, batch_examples in enumerate(dataloader):
+        # Extract images and preprocess
+        images = [example["image"] for example in batch_examples]
+        annotations = [example.get("annotations", "").strip() for example in batch_examples]
+        has_smoke_list = [len(annotation) > 0 for annotation in annotations]
+        true_labels.extend([1 if has_smoke else 0 for has_smoke in has_smoke_list])
+        # Preprocess images and extract features
+        preprocessed_images = preprocess_batch(images)
+        image_inputs = feature_extractor(images=preprocessed_images, return_tensors="pt", padding=True).pixel_values
+        # Perform inference
+        with torch.no_grad():
+            outputs = model(pixel_values=image_inputs)
+            logits = outputs.logits
+        # Threshold and process the segmentation masks
+        probabilities = torch.sigmoid(logits)
+        batch_predicted_masks = (probabilities[:, 1, :, :] > 0.30).cpu().numpy().astype(np.uint8)
+        for mask in batch_predicted_masks:
+            mask_resized = cv2.resize(mask, (512, 512), interpolation=cv2.INTER_NEAREST)
+            predicted_boxes = get_bounding_boxes_from_mask(mask_resized)
+            pred_boxes.append(predicted_boxes)
+            # Append smoke detection based on bounding boxes
+            predictions.append(1 if len(predicted_boxes) > 0 else 0)
+            print(f"Batch {batch_idx + 1}, Image Prediction: {1 if len(predicted_boxes) > 0 else 0}")
+        # Parse true boxes for this batch
+        for annotation in annotations:
+            if len(annotation) > 0:
+                true_boxes_list.append(parse_boxes(annotation))
             else:
                 true_boxes_list.append([])
+    # for example in test_dataset:
+    #     # Extract image and annotations
+    #     image = example["image"]
+    #     original_shape = image.size
+    #     annotation = example.get("annotations", "").strip()
+    #     has_smoke = len(annotation) > 0
+    #     true_labels.append(1 if has_smoke else 0)
+    #     if has_smoke:
+    #         image_true_boxes = parse_boxes(annotation)
+    #         if image_true_boxes:
+    #             true_boxes_list.append(image_true_boxes)
+    #         else:
+    #             true_boxes_list.append([])
+    #     else:
+    #         true_boxes_list.append([])
+    #     # Model Inference
+    #     # Preprocess image
+    #     image = preprocess(image)
+    #     # Ensure correct feature extraction
+    #     image_input = feature_extractor(images=image, return_tensors="pt").pixel_values
+    #     # Perform inference
+    #     with torch.no_grad():
+    #         outputs = model(pixel_values=image_input)
+    #         logits = outputs.logits
+    #     # Threshold and process the segmentation mask
+    #     probabilities = torch.sigmoid(logits)
+    #     predicted_mask = (probabilities[0, 1] > 0.30).cpu().numpy().astype(np.uint8)
+    #     predicted_mask_resized = cv2.resize(predicted_mask, (512,512), interpolation=cv2.INTER_NEAREST)
+    #     # Extract bounding boxes
+    #     predicted_boxes = get_bounding_boxes_from_mask(predicted_mask_resized)
+    #     pred_boxes.append(predicted_boxes)
+    #     # Smoke prediction based on bounding box presence
+    #     predictions.append(1 if len(predicted_boxes) > 0 else 0)
+    #     print(f"Prediction : {1 if len(predicted_boxes) > 0 else 0}")
+    # # Filter only valid box pairs
+    # filtered_true_boxes_list = []
+    # filtered_pred_boxes = []
+    # for true_boxes, pred_boxes_entry in zip(true_boxes_list, pred_boxes):
+    #     if true_boxes and pred_boxes_entry:
+    #         filtered_true_boxes_list.append(true_boxes)
+    #         filtered_pred_boxes.append(pred_boxes_entry)
+    # true_boxes_list = filtered_true_boxes_list
+    # pred_boxes = filtered_pred_boxes
     #--------------------------------------------------------------------------------------------