Spaces:

mostlycached
/

aspect-ratio-adjuster

Runtime error

App Files Files Community

mostlycached commited on Apr 28, 2025

Commit

2698a3f

verified ·

1 Parent(s): b344378

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -121

app.py CHANGED Viewed

@@ -2,10 +2,11 @@ import gradio as gr
 import torch
 import numpy as np
 import cv2
-from PIL import Image, ImageOps
 from transformers import SamModel, SamProcessor
 from diffusers import StableDiffusionInpaintPipeline
-import os
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -23,35 +24,27 @@ inpaint_model = StableDiffusionInpaintPipeline.from_pretrained(
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device)
-def get_importance_map(image, points=None):
-    """Get importance map using SAM model to identify key content regions"""
-    # Convert to numpy if needed
-    if isinstance(image, Image.Image):
-        image_np = np.array(image)
     else:
-        image_np = image
-    h, w = image_np.shape[:2]
-    # If no points provided, use grid sampling to identify important areas
-    if points is None:
-        # Create a grid of points to sample the image
-        x_points = np.linspace(w//4, 3*w//4, 5, dtype=int)
-        y_points = np.linspace(h//4, 3*h//4, 5, dtype=int)
-        grid_points = []
-        for y in y_points:
-            for x in x_points:
-                grid_points.append([x, y])
-        points = [grid_points]
-    # Process image through SAM
     inputs = sam_processor(
-        images=image_np,
         input_points=points,
         return_tensors="pt"
     ).to(device)
-    # Generate masks
     with torch.no_grad():
         outputs = sam_model(**inputs)
         masks = sam_processor.image_processor.post_process_masks(
@@ -60,123 +53,86 @@ def get_importance_map(image, points=None):
             inputs["reshaped_input_sizes"].cpu()
         )
-    # Combine all masks to create importance map
-    importance_map = np.zeros((h, w), dtype=np.float32)
-    for i in range(len(masks[0])):
-        importance_map += masks[0][i].numpy().astype(np.float32)
-    # Normalize to 0-1
-    if importance_map.max() > 0:
-        importance_map = importance_map / importance_map.max()
-    return importance_map
-def find_optimal_placement(importance_map, original_size, new_size):
-    """Find the optimal placement for the original image within the new canvas based on importance"""
-    oh, ow = original_size
-    nh, nw = new_size
-    # If the new size is smaller in any dimension, then just center it
-    if nh <= oh or nw <= ow:
-        x_offset = max(0, (nw - ow) // 2)
-        y_offset = max(0, (nh - oh) // 2)
-        return x_offset, y_offset
-    # Calculate all possible positions
-    possible_x = nw - ow + 1
-    possible_y = nh - oh + 1
-    best_score = -np.inf
-    best_x = 0
-    best_y = 0
-    # Create a border-weighted importance map (gives extra weight to content near borders)
-    y_coords, x_coords = np.ogrid[:oh, :ow]
-    border_weight = np.minimum(np.minimum(x_coords, ow-1-x_coords), np.minimum(y_coords, oh-1-y_coords))
-    border_weight = 1.0 - border_weight / border_weight.max()
-    weighted_importance = importance_map * (1.0 + 0.5 * border_weight)
-    # Optimize for 9 positions (corners, center of edges, and center)
-    positions = [
-        (0, 0),  # Top-left
-        (0, (possible_y-1)//2),  # Middle-left
-        (0, possible_y-1),  # Bottom-left
-        ((possible_x-1)//2, 0),  # Top-center
-        ((possible_x-1)//2, (possible_y-1)//2),  # Center
-        ((possible_x-1)//2, possible_y-1),  # Bottom-center
-        (possible_x-1, 0),  # Top-right
-        (possible_x-1, (possible_y-1)//2),  # Middle-right
-        (possible_x-1, possible_y-1)  # Bottom-right
-    ]
-    # Find position with highest importance score
-    for x, y in positions:
-        # Calculate importance score for this position
-        score = weighted_importance.sum()
-        if score > best_score:
-            best_score = score
-            best_x = x
-            best_y = y
-    return best_x, best_y
-def adjust_aspect_ratio(image, target_ratio, prompt=""):
     """Adjust image to target aspect ratio while preserving important content"""
     # Convert PIL to numpy if needed
     if isinstance(image, Image.Image):
-        image_pil = image
         image_np = np.array(image)
     else:
         image_np = image
-        image_pil = Image.fromarray(image_np)
-    # Get dimensions
     h, w = image_np.shape[:2]
     current_ratio = w / h
     target_ratio_value = eval(target_ratio.replace(':', '/'))
-    # Generate importance map to identify key regions
-    importance_map = get_importance_map(image_np)
-    # Calculate new dimensions
     if current_ratio < target_ratio_value:
         # Need to add width (outpaint left/right)
         new_width = int(h * target_ratio_value)
         new_height = h
     else:
         # Need to add height (outpaint top/bottom)
         new_width = w
         new_height = int(w / target_ratio_value)
-    # Find optimal placement based on importance map
-    x_offset, y_offset = find_optimal_placement(importance_map, (h, w), (new_height, new_width))
-    # Create new canvas
-    result = np.zeros((new_height, new_width, 3), dtype=np.uint8)
-    mask = np.ones((new_height, new_width), dtype=np.uint8) * 255
-    # Place original image at calculated position
-    result[y_offset:y_offset+h, x_offset:x_offset+w] = image_np
-    mask[y_offset:y_offset+h, x_offset:x_offset+w] = 0
-    # Convert to PIL for inpainting
-    result_pil = Image.fromarray(result)
     mask_pil = Image.fromarray(mask)
-    # Use default prompt if none provided
     if not prompt or prompt.strip() == "":
-        if len(image_np.shape) == 3 and image_np.shape[2] == 4:  # Check if image has alpha channel
-            prompt = "seamless extension of the image, same style and content"
-        else:
-            prompt = "seamless extension of the image, same style, same scene, consistent lighting"
-    # Perform outpainting using Stable Diffusion
     output = inpaint_model(
         prompt=prompt,
-        image=result_pil,
         mask_image=mask_pil,
         guidance_scale=7.5,
-        num_inference_steps=30
     ).images[0]
     return np.array(output)
@@ -184,7 +140,7 @@ def adjust_aspect_ratio(image, target_ratio, prompt=""):
 def process_image(input_image, target_ratio="16:9", prompt=""):
     """Main processing function for the Gradio interface"""
     try:
-        # Convert from Gradio format if needed
         if isinstance(input_image, dict) and 'image' in input_image:
             image = input_image['image']
         else:
@@ -196,8 +152,11 @@ def process_image(input_image, target_ratio="16:9", prompt=""):
         else:
             image_np = image
         # Adjust aspect ratio while preserving content
-        result = adjust_aspect_ratio(image_np, target_ratio, prompt)
         # Convert result to PIL for visualization
         result_pil = Image.fromarray(result)
@@ -209,9 +168,9 @@ def process_image(input_image, target_ratio="16:9", prompt=""):
         return None
 # Create the Gradio interface
-with gr.Blocks(title="Smart Aspect Ratio Adjuster") as demo:
-    gr.Markdown("# Smart Aspect Ratio Adjuster")
-    gr.Markdown("Upload an image, choose your target aspect ratio, and the AI will adjust it while intelligently preserving important content.")
     with gr.Row():
         with gr.Column():
@@ -219,7 +178,7 @@ with gr.Blocks(title="Smart Aspect Ratio Adjuster") as demo:
             with gr.Row():
                 aspect_ratio = gr.Dropdown(
-                    choices=["16:9", "4:3", "1:1", "9:16", "3:4", "2:1", "1:2"],
                     value="16:9",
                     label="Target Aspect Ratio"
                 )
@@ -242,9 +201,9 @@ with gr.Blocks(title="Smart Aspect Ratio Adjuster") as demo:
     gr.Markdown("""
     ## How it works
-    1. **Content Analysis**: SAM (Segment Anything Model) identifies important regions in your image
-    2. **Smart Placement**: The algorithm calculates optimal positioning to preserve key content
-    3. **AI Outpainting**: Stable Diffusion fills in new areas with matching content
     ## Tips
     - For best results, provide a descriptive prompt that matches the scene

 import torch
 import numpy as np
 import cv2
+from PIL import Image
 from transformers import SamModel, SamProcessor
 from diffusers import StableDiffusionInpaintPipeline
+import requests
+from io import BytesIO
 # Set up device
 device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device)
+def get_sam_mask(image, points=None):
+    """Get segmentation mask using SAM model"""
+    if points is None:
+        # If no points provided, use center point
+        height, width = image.shape[:2]
+        points = [[[width // 2, height // 2]]]
+    # Convert to PIL if needed
+    if not isinstance(image, Image.Image):
+        image_pil = Image.fromarray(image)
     else:
+        image_pil = image
+    # Process the image and point prompts
     inputs = sam_processor(
+        images=image_pil,
         input_points=points,
         return_tensors="pt"
     ).to(device)
+    # Generate mask
     with torch.no_grad():
         outputs = sam_model(**inputs)
         masks = sam_processor.image_processor.post_process_masks(
             inputs["reshaped_input_sizes"].cpu()
         )
+    # Get the mask
+    mask = masks[0][0].numpy()
+    return mask
+def adjust_aspect_ratio(image, mask, target_ratio, prompt=""):
     """Adjust image to target aspect ratio while preserving important content"""
     # Convert PIL to numpy if needed
     if isinstance(image, Image.Image):
         image_np = np.array(image)
     else:
         image_np = image
     h, w = image_np.shape[:2]
     current_ratio = w / h
     target_ratio_value = eval(target_ratio.replace(':', '/'))
+    # Determine if we need to add width or height
     if current_ratio < target_ratio_value:
         # Need to add width (outpaint left/right)
         new_width = int(h * target_ratio_value)
         new_height = h
+        # Calculate padding
+        pad_width = new_width - w
+        pad_left = pad_width // 2
+        pad_right = pad_width - pad_left
+        # Create canvas with padding
+        result = np.zeros((new_height, new_width, 3), dtype=np.uint8)
+        # Place original image in the center
+        result[:, pad_left:pad_left+w, :] = image_np
+        # Create mask for inpainting
+        inpaint_mask = np.ones((new_height, new_width), dtype=np.uint8) * 255
+        inpaint_mask[:, pad_left:pad_left+w] = 0
+        # Perform outpainting using Stable Diffusion
+        result = outpaint_regions(result, inpaint_mask, prompt)
     else:
         # Need to add height (outpaint top/bottom)
         new_width = w
         new_height = int(w / target_ratio_value)
+        # Calculate padding
+        pad_height = new_height - h
+        pad_top = pad_height // 2
+        pad_bottom = pad_height - pad_top
+        # Create canvas with padding
+        result = np.zeros((new_height, new_width, 3), dtype=np.uint8)
+        # Place original image in the center
+        result[pad_top:pad_top+h, :, :] = image_np
+        # Create mask for inpainting
+        inpaint_mask = np.ones((new_height, new_width), dtype=np.uint8) * 255
+        inpaint_mask[pad_top:pad_top+h, :] = 0
+        # Perform outpainting using Stable Diffusion
+        result = outpaint_regions(result, inpaint_mask, prompt)
+    return result
+def outpaint_regions(image, mask, prompt):
+    """Use Stable Diffusion to outpaint masked regions"""
+    # Convert to PIL images
+    image_pil = Image.fromarray(image)
     mask_pil = Image.fromarray(mask)
+    # If prompt is empty, use a generic one
     if not prompt or prompt.strip() == "":
+        prompt = "seamless extension of the image, same style, same scene"
+    # Generate the outpainting
     output = inpaint_model(
         prompt=prompt,
+        image=image_pil,
         mask_image=mask_pil,
         guidance_scale=7.5,
+        num_inference_steps=25
     ).images[0]
     return np.array(output)
 def process_image(input_image, target_ratio="16:9", prompt=""):
     """Main processing function for the Gradio interface"""
     try:
+        # Convert from Gradio format
         if isinstance(input_image, dict) and 'image' in input_image:
             image = input_image['image']
         else:
         else:
             image_np = image
+        # Get SAM mask to identify important regions
+        mask = get_sam_mask(image_np)
         # Adjust aspect ratio while preserving content
+        result = adjust_aspect_ratio(image_np, mask, target_ratio, prompt)
         # Convert result to PIL for visualization
         result_pil = Image.fromarray(result)
         return None
 # Create the Gradio interface
+with gr.Blocks(title="Automatic Aspect Ratio Adjuster") as demo:
+    gr.Markdown("# Automatic Aspect Ratio Adjuster")
+    gr.Markdown("Upload an image, choose your target aspect ratio, and let the AI adjust it while preserving important content.")
     with gr.Row():
         with gr.Column():
             with gr.Row():
                 aspect_ratio = gr.Dropdown(
+                    choices=["16:9", "4:3", "1:1", "9:16", "3:4"],
                     value="16:9",
                     label="Target Aspect Ratio"
                 )
     gr.Markdown("""
     ## How it works
+    1. SAM (Segment Anything Model) identifies important content in your image
+    2. The algorithm calculates how to adjust the aspect ratio while preserving this content
+    3. Stable Diffusion fills in the new areas with AI-generated content that matches the original image
     ## Tips
     - For best results, provide a descriptive prompt that matches the scene