Spaces:
Running
Running
| import gradio as gr | |
| import torch | |
| import torchvision.transforms as T | |
| from torchvision.models.detection import maskrcnn_resnet50_fpn | |
| from PIL import Image | |
| import numpy as np | |
| import cv2 | |
| # Load pretrained model for segmentation | |
| model = maskrcnn_resnet50_fpn(pretrained=True) | |
| model.eval() | |
| # Function to segment human from input photo | |
| def segment_human(image_path): | |
| input_image = Image.open(image_path).convert("RGB") | |
| preprocess = T.Compose([ | |
| T.ToTensor(), | |
| ]) | |
| input_tensor = preprocess(input_image) | |
| with torch.no_grad(): | |
| output = model([input_tensor]) | |
| # Get person class mask (COCO classes, person is class 1) | |
| masks = output[0]['masks'] | |
| scores = output[0]['scores'] | |
| indices = [i for i, score in enumerate(scores) if score > 0.5] # Threshold for confidence | |
| masks = masks[indices] | |
| if masks.size(0) == 0: | |
| raise ValueError("No person found in the image.") | |
| # Take the first mask (if multiple persons are found) | |
| mask = masks[0, 0].cpu().numpy() # Get the first mask and convert to numpy | |
| # Convert to binary mask | |
| binary_mask = (mask > 0.5).astype(np.uint8) # Threshold to create a binary mask | |
| # Apply mask to input image | |
| human_array = np.array(input_image) * binary_mask[..., np.newaxis] | |
| human = Image.fromarray(human_array, "RGB") | |
| # Create alpha channel for transparency | |
| alpha_channel = Image.fromarray(binary_mask * 255, "L") | |
| human.putalpha(alpha_channel) | |
| return human | |
| # Function to add segmented human to stereoscopic image of environment | |
| def overlay_human(env_img, human_img, x_offset=0, y_offset=0, scale=1.0): | |
| env_w, env_h = env_img.size | |
| human_w, human_h = human_img.size | |
| # Resize human image | |
| human_img = human_img.resize((int(human_w * scale), int(human_h * scale))) | |
| human_w, human_h = human_img.size | |
| x = (env_w - human_w) // 2 + x_offset | |
| y = (env_h - human_h) // 2 + y_offset | |
| env_img.paste(human_img, (x, y), human_img) | |
| return env_img | |
| # Function to create an anaglyph image from left and right images | |
| def create_anaglyph(left_img, right_img): | |
| # Extract channels | |
| left_red_channel = left_img[:, :, 2] | |
| right_green_channel = right_img[:, :, 1] | |
| right_blue_channel = right_img[:, :, 0] | |
| # Create an empty image with the same dimensions | |
| anaglyph = np.zeros_like(left_img) | |
| # Assign the channels accordingly | |
| anaglyph[:, :, 2] = left_red_channel # Red channel from left image | |
| anaglyph[:, :, 1] = right_green_channel # Green channel from right image | |
| anaglyph[:, :, 0] = right_blue_channel # Blue channel from right image | |
| return anaglyph | |
| def generate_anaglyph(human_image, background_choice, x_offset, y_offset, scale, offset): | |
| backgrounds = { | |
| "Environment 1": "env1.jpg", | |
| "Environment 2": "env2.jpg", | |
| "Environment 3": "env3.jpg" | |
| } | |
| env_img_path = backgrounds[background_choice] | |
| human_img = segment_human(human_image) | |
| # Split environment image into left and right for stereoscopic effect | |
| stereo_image = cv2.imread(env_img_path) | |
| height, width, _ = stereo_image.shape | |
| midpoint = width // 2 | |
| left_image = stereo_image[:, :midpoint] | |
| right_image = stereo_image[:, midpoint:] | |
| left_image_rgb = cv2.cvtColor(left_image, cv2.COLOR_BGR2RGB) | |
| right_image_rgb = cv2.cvtColor(right_image, cv2.COLOR_BGR2RGB) | |
| left_image_rgb = overlay_human(Image.fromarray(left_image_rgb), human_img, x_offset - offset // 2, -y_offset, scale) | |
| right_image_rgb = overlay_human(Image.fromarray(right_image_rgb), human_img, x_offset + offset // 2, -y_offset, scale) | |
| left_image_rgb = cv2.cvtColor(np.array(left_image_rgb), cv2.COLOR_BGR2RGB) | |
| right_image_rgb = cv2.cvtColor(np.array(right_image_rgb), cv2.COLOR_BGR2RGB) | |
| anaglyph = create_anaglyph(left_image_rgb, right_image_rgb) | |
| anaglyph_rgb = cv2.cvtColor(anaglyph, cv2.COLOR_BGR2RGB) | |
| return anaglyph_rgb | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Anaglyph Image Generator") | |
| with gr.Row(): | |
| background_choice = gr.Dropdown(["Environment 1", "Environment 2", "Environment 3"], value="Environment 1", label="Select Background") | |
| human_image = gr.Image(label="Upload Human Image", type="filepath") | |
| with gr.Row(): | |
| x_offset = gr.Slider(-500, 500, value=0, step=1, label="Horizontal Offset") | |
| y_offset = gr.Slider(-500, 500, value=0, step=1, label="Vertical Offset") | |
| with gr.Row(): | |
| scale = gr.Slider(0.1, 2.0, value=1.0, label="Scale Human") | |
| offset = gr.Slider(-20, 20, value=0, step=2, label="Depth - Negative is towards from viewer and Positive is away from viewer.") | |
| generate_button = gr.Button("Generate Anaglyph") | |
| output_image = gr.Image(label="Anaglyph Image Output") | |
| generate_button.click( | |
| generate_anaglyph, | |
| inputs=[human_image, background_choice, x_offset, y_offset, scale, offset], | |
| outputs=output_image | |
| ) | |
| demo.launch() | |