Spaces:

mostlycached
/

smart-cropper

Build error

App Files Files Community

mostlycached commited on Apr 28, 2025

Commit

f9efdb7

verified ·

1 Parent(s): b84e980

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -78

app.py CHANGED Viewed

@@ -3,53 +3,71 @@ import numpy as np
 import cv2
 from PIL import Image
 import torch
-from transformers import AutoImageProcessor, AutoModelForObjectDetection
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
-# Load object detection model for identifying important content
-print("Loading object detection model...")
-processor = AutoImageProcessor.from_pretrained("facebook/detr-resnet-50")
-model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(device)
-def detect_objects(image):
-    """Detect important objects in the image using DETR"""
-    # Convert to PIL if needed
-    if not isinstance(image, Image.Image):
-        image_pil = Image.fromarray(image)
-    else:
         image_pil = image
-        image = np.array(image_pil)
-    # Get image dimensions
-    h, w = image.shape[:2]
-    # Process image for object detection
-    inputs = processor(images=image_pil, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = model(**inputs)
-    # Convert outputs to usable format
-    target_sizes = torch.tensor([image_pil.size[::-1]]).to(device)
-    results = processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[0]
-    # Store detected objects
-    detected_boxes = []
-    # For each detected object
-    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
-        box = box.cpu().numpy().astype(int)
-        detected_boxes.append({
-            'box': box,
-            'score': score.item(),
-            'label': model.config.id2label[label.item()]
-        })
-    return detected_boxes
-def find_optimal_crop(image, target_ratio, objects):
     """Find the optimal crop area that preserves important content while matching target ratio"""
     # Get image dimensions
     if not isinstance(image, np.ndarray):
@@ -59,34 +77,6 @@ def find_optimal_crop(image, target_ratio, objects):
     current_ratio = w / h
     target_ratio_value = eval(target_ratio.replace(':', '/'))
-    # If no objects detected, use center crop
-    if not objects:
-        if current_ratio > target_ratio_value:
-            # Need to crop width
-            new_width = int(h * target_ratio_value)
-            left = (w - new_width) // 2
-            right = left + new_width
-            return (left, 0, right, h)
-        else:
-            # Need to crop height
-            new_height = int(w / target_ratio_value)
-            top = (h - new_height) // 2
-            bottom = top + new_height
-            return (0, top, w, bottom)
-    # Create a combined importance map from all detected objects
-    importance_map = np.zeros((h, w), dtype=np.float32)
-    # Add all objects to the importance map
-    for obj in objects:
-        x1, y1, x2, y2 = obj['box']
-        # Ensure box is within image boundaries
-        x1, y1 = max(0, x1), max(0, y1)
-        x2, y2 = min(w-1, x2), min(h-1, y2)
-        # Add object to importance map with its confidence score
-        importance_map[y1:y2, x1:x2] = max(importance_map[y1:y2, x1:x2], obj['score'])
     # If current ratio is wider than target, we need to crop width
     if current_ratio > target_ratio_value:
         new_width = int(h * target_ratio_value)
@@ -144,16 +134,16 @@ def apply_crop(image, crop_box):
 def adjust_aspect_ratio(image, target_ratio):
     """Main function to adjust aspect ratio through intelligent cropping"""
-    # Detect objects in the image
-    objects = detect_objects(image)
     # Find optimal crop box
-    crop_box = find_optimal_crop(image, target_ratio, objects)
     # Apply the crop
     result = apply_crop(image, crop_box)
-    return result
 def process_image(input_image, target_ratio="16:9"):
     """Process function for Gradio interface"""
@@ -165,7 +155,7 @@ def process_image(input_image, target_ratio="16:9"):
             image = input_image
         # Adjust aspect ratio
-        result = adjust_aspect_ratio(image, target_ratio)
         # Convert result to appropriate format
         if isinstance(result, np.ndarray):
@@ -173,15 +163,26 @@ def process_image(input_image, target_ratio="16:9"):
         else:
             result_pil = result
-        return result_pil
     except Exception as e:
         print(f"Error processing image: {e}")
-        return None
 # Create the Gradio interface
-with gr.Blocks(title="Smart Crop Aspect Ratio Adjuster") as demo:
-    gr.Markdown("# Smart Crop Aspect Ratio Adjuster")
     gr.Markdown("Upload an image, choose your target aspect ratio, and the AI will intelligently crop it to preserve important content.")
     with gr.Row():
@@ -199,23 +200,24 @@ with gr.Blocks(title="Smart Crop Aspect Ratio Adjuster") as demo:
         with gr.Column():
             output_image = gr.Image(label="Processed Image")
     submit_btn.click(
         process_image,
         inputs=[input_image, aspect_ratio],
-        outputs=output_image
     )
     gr.Markdown("""
     ## How it works
-    1. **Object Detection**: The app uses a DETR (DEtection TRansformer) model to identify important objects in your image
-    2. **Importance Mapping**: It creates an importance map based on detected objects
-    3. **Smart Cropping**: The algorithm finds the optimal crop window that preserves the most important content
     ## Tips
-    - For best results, ensure important subjects are visible and not too close to the edges
     - Try different aspect ratios to see what works best with your image
-    - The model works best with clear, well-lit images with distinct objects
     """)
 # Launch the app

 import cv2
 from PIL import Image
 import torch
+from transformers import SamModel, SamProcessor
+import os
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
+# Load SAM model for segmentation
+print("Loading SAM model...")
+sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to(device)
+sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+def get_sam_masks(image):
+    """Get segmentation masks using SAM model"""
+    # Convert to numpy if needed
+    if isinstance(image, Image.Image):
         image_pil = image
+        image_np = np.array(image)
+    else:
+        image_np = image
+        image_pil = Image.fromarray(image_np)
+    h, w = image_np.shape[:2]
+    # Create a grid of points to sample the image
+    x_points = np.linspace(w//4, 3*w//4, 5, dtype=int)
+    y_points = np.linspace(h//4, 3*h//4, 5, dtype=int)
+    grid_points = []
+    for y in y_points:
+        for x in x_points:
+            grid_points.append([x, y])
+    points = [grid_points]
+    # Process image through SAM
+    inputs = sam_processor(
+        images=image_pil,
+        input_points=points,
+        return_tensors="pt"
+    ).to(device)
+    # Generate masks
     with torch.no_grad():
+        outputs = sam_model(**inputs)
+        masks = sam_processor.image_processor.post_process_masks(
+            outputs.pred_masks.cpu(),
+            inputs["original_sizes"].cpu(),
+            inputs["reshaped_input_sizes"].cpu()
+        )
+    # Combine all masks to create importance map
+    importance_map = np.zeros((h, w), dtype=np.float32)
+    individual_masks = []
+    for i in range(len(masks[0])):
+        mask = masks[0][i].numpy().astype(np.float32)
+        individual_masks.append(mask)
+        importance_map += mask
+    # Normalize to 0-1
+    if importance_map.max() > 0:
+        importance_map = importance_map / importance_map.max()
+    return importance_map, individual_masks
+def find_optimal_crop(image, target_ratio, importance_map):
     """Find the optimal crop area that preserves important content while matching target ratio"""
     # Get image dimensions
     if not isinstance(image, np.ndarray):
     current_ratio = w / h
     target_ratio_value = eval(target_ratio.replace(':', '/'))
     # If current ratio is wider than target, we need to crop width
     if current_ratio > target_ratio_value:
         new_width = int(h * target_ratio_value)
 def adjust_aspect_ratio(image, target_ratio):
     """Main function to adjust aspect ratio through intelligent cropping"""
+    # Get segmentation masks and importance map
+    importance_map, _ = get_sam_masks(image)
     # Find optimal crop box
+    crop_box = find_optimal_crop(image, target_ratio, importance_map)
     # Apply the crop
     result = apply_crop(image, crop_box)
+    return result, importance_map
 def process_image(input_image, target_ratio="16:9"):
     """Process function for Gradio interface"""
             image = input_image
         # Adjust aspect ratio
+        result, importance_map = adjust_aspect_ratio(image, target_ratio)
         # Convert result to appropriate format
         if isinstance(result, np.ndarray):
         else:
             result_pil = result
+        # Visualize importance map for debugging
+        if isinstance(importance_map, np.ndarray):
+            # Convert to heatmap
+            heatmap = (importance_map * 255).astype(np.uint8)
+            heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
+            # Convert to PIL
+            heatmap_pil = Image.fromarray(heatmap)
+            return [result_pil, heatmap_pil]
+        return [result_pil, None]
     except Exception as e:
         print(f"Error processing image: {e}")
+        return [None, None]
 # Create the Gradio interface
+with gr.Blocks(title="SAM-Based Smart Crop Aspect Ratio Adjuster") as demo:
+    gr.Markdown("# SAM-Based Smart Crop Aspect Ratio Adjuster")
     gr.Markdown("Upload an image, choose your target aspect ratio, and the AI will intelligently crop it to preserve important content.")
     with gr.Row():
         with gr.Column():
             output_image = gr.Image(label="Processed Image")
+            importance_map_vis = gr.Image(label="Importance Map (Debug View)")
     submit_btn.click(
         process_image,
         inputs=[input_image, aspect_ratio],
+        outputs=[output_image, importance_map_vis]
     )
     gr.Markdown("""
     ## How it works
+    1. **Segmentation**: Uses Meta's Segment Anything Model (SAM) to identify important regions in your image
+    2. **Importance Mapping**: Creates a heatmap of important areas based on segmentation masks
+    3. **Smart Cropping**: Finds the optimal crop window that preserves the most important content
     ## Tips
+    - For best results, ensure important subjects are clearly visible in the image
+    - The importance map shows what the AI considers important (red/yellow = important, blue = less important)
     - Try different aspect ratios to see what works best with your image
     """)
 # Launch the app