Spaces:

paddeh
/

is-it-max

Sleeping

App Files Files Community

paddeh commited on Feb 20, 2025

Commit

5106f97

1 Parent(s): fa0ab0e

Import code from training notebook

Browse files

Files changed (4) hide show

app.py +43 -3
classes.json +10 -0
functions.py +119 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,13 +1,53 @@
 import gradio as gr
 from transformers import AutoModelForImageClassification, AutoImageProcessor
-from PIL import Image
 import torch
-model_name = "paddeh/is-it-max"
 model = AutoModelForImageClassification.from_pretrained(model_name)
 processor = AutoImageProcessor.from_pretrained(model_name)
 def classify_image(image):
     inputs = processor(images=image, return_tensors="pt")
     with torch.no_grad():
@@ -15,5 +55,5 @@ def classify_image(image):
     predicted_class = logits.argmax(-1).item()
     return f"Predicted class: {predicted_class}"
-iface = gr.Interface(fn=classify_image, inputs="image", outputs="text")
 iface.launch()

 import gradio as gr
 from transformers import AutoModelForImageClassification, AutoImageProcessor
 import torch
+from torchvision import transforms, models
+from torchvision.models.segmentation import deeplabv3_resnet101, DeepLabV3_ResNet101_Weights
+from .functions import import_class_labels, segment_image, crop_dog
+# Load DeepLabV3 model for segmentation
+seg_model = models.segmentation \
+       .deeplabv3_resnet101(weights=DeepLabV3_ResNet101_Weights.DEFAULT)
+# Load trained model and feature extractor
+model_name = "paddeh/is-it-max"
 model = AutoModelForImageClassification.from_pretrained(model_name)
 processor = AutoImageProcessor.from_pretrained(model_name)
+class_labels = import_class_labels('./')
+# Define image transformations
+transform = transforms.Compose([
+    transforms.Resize(model_img_size, interpolation=transforms.InterpolationMode.BICUBIC),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
+])
+def classify_image_with_cropping(image):
+    # 1. Segment the image
+    image, mask = segment_image(image, seg_model)
+    if mask is None:
+        print(f"Skipping due to failed segmentation.")
+        return None, 'unknown'
+    # 2. Crop to the dog (if found)
+    cropped_image = crop_dog(image, mask)
+    # 3. Preprocess and classify the cropped image
+    input_tensor = transform(cropped_image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        outputs = model(input_tensor)
+    predicted_class_idx = outputs.logits.argmax(-1).item()
+    predicted_label = class_labels[predicted_class_idx]
+    return cropped_image, f"Predicted class: {predicted_class}"
 def classify_image(image):
     inputs = processor(images=image, return_tensors="pt")
     with torch.no_grad():
     predicted_class = logits.argmax(-1).item()
     return f"Predicted class: {predicted_class}"
+iface = gr.Interface(fn=classify_image_with_cropping, inputs="image", outputs="image, text")
 iface.launch()

classes.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "class_names": [
+        "max",
+        "not_max"
+    ],
+    "class_to_idx": {
+        "max": 0,
+        "not_max": 1
+    }
+}

functions.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import os
+import json
+import torch
+from torchvision import transforms
+import numpy as np
+import cv2
+import skimage.segmentation as seg
+dog_class = 12
+def import_class_labels(model_path):
+    """Imports class labels from the classes.json file, ensuring correct sorting."""
+    classes_file_path = os.path.join(model_path, "classes.json")
+    with open(classes_file_path, "r") as f:
+        class_data = json.load(f)
+    # Get class names and their original indices
+    class_names = class_data["class_names"]
+    class_to_idx = class_data["class_to_idx"]
+    # Create a list of (index, class_name) tuples
+    idx_class_pairs = [(idx, class_name) for class_name, idx in class_to_idx.items()]
+    # Sort the list by index to ensure the correct order
+    idx_class_pairs.sort(key=lambda item: item[0])
+    # Extract the sorted class names
+    sorted_class_names = [class_name for _, class_name in idx_class_pairs]
+    return sorted_class_names
+def refine_dog_mask(mask, image):
+    # Merge all dog segments together
+    dog_mask = np.zeros_like(mask, dtype=np.uint8)
+    for class_id in np.unique(mask):
+        if class_id == 12:  # Dog class
+            dog_mask[mask == class_id] = 1
+    # Apply morphological operations to connect fragmented segments
+    kernel = np.ones((15, 15), np.uint8)
+    dog_mask = cv2.morphologyEx(dog_mask, cv2.MORPH_CLOSE, kernel)  # Close gaps
+    dog_mask = cv2.dilate(dog_mask, kernel, iterations=2)  # Expand segmentation
+    # Refine mask using superpixel segmentation
+    segments = seg.slic(np.array(image), n_segments=100, compactness=10)
+    refined_dog_mask = np.where(dog_mask == 1, segments, 0)
+    # Restore the dog class label (12) in refined regions
+    refined_dog_mask[dog_mask == 1] = dog_class
+    # Restore the dog class label (12) in refined regions
+    mask[refined_dog_mask > 0] = dog_class
+    # Convert mask to np.uint8 if necessary
+    return mask.astype(np.uint8)
+def segment_image(image, seg_model):
+    image = image.convert("RGB")
+    orig_size = image.size
+    transform = transforms.Compose([
+        transforms.ToTensor()
+    ])
+    image_tensor = transform(image).unsqueeze(0).to(device)
+    with torch.no_grad():
+        output = seg_model(image_tensor)['out'][0]
+    mask = output.argmax(0)  # Keep on GPU
+    # Dynamically determine the main object class
+    unique_classes = mask.unique()
+    unique_classes = unique_classes[unique_classes != 0]  # Remove background class (0)
+    if len(unique_classes) == 0:
+        print(f'No segmentation found for {image_path}')
+        return image, None # Skip image if no valid segmentation found
+    mask = mask.cpu().numpy()  # Move to CPU only when needed
+    mask = refine_dog_mask(mask, image)
+    return image, mask
+def crop_dog(image, mask, target_aspect=1, padding=20):
+    # Get bounding box of the dog
+    y_indices, x_indices = np.where(mask == dog_class)  # Dog class pixels
+    if len(y_indices) == 0 or len(x_indices) == 0:
+        return image  # No dog detected
+    x_min, x_max = x_indices.min(), x_indices.max()
+    y_min, y_max = y_indices.min(), y_indices.max()
+    # Calculate aspect ratio of resize target
+    width = x_max - x_min
+    height = y_max - y_min
+    current_aspect = width / height
+    # Adjust bounding box to match target aspect ratio
+    if current_aspect > target_aspect:
+        new_height = width / target_aspect
+        diff = (new_height - height) / 2
+        y_min = max(0, int(y_min - diff))
+        y_max = min(mask.shape[0], int(y_max + diff))
+    else:
+        new_width = height * target_aspect
+        diff = (new_width - width) / 2
+        x_min = max(0, int(x_min - diff))
+        x_max = min(mask.shape[1], int(x_max + diff))
+    # Apply padding
+    x_min = max(0, x_min - padding)
+    x_max = min(mask.shape[1], x_max + padding)
+    y_min = max(0, y_min - padding)
+    y_max = min(mask.shape[0], y_max + padding)
+    cropped_image = image.crop((x_min, y_min, x_max, y_max))
+    return cropped_image

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ transformers
 torch
 gradio
 Pillow

 torch
 gradio
 Pillow
+torchvision