import gradio as gr import torch import torchvision.transforms as T from torchvision.models.detection import maskrcnn_resnet50_fpn from PIL import Image import numpy as np import cv2 # Load pretrained model for segmentation model = maskrcnn_resnet50_fpn(pretrained=True) model.eval() # Function to segment human from input photo def segment_human(image_path): input_image = Image.open(image_path).convert("RGB") preprocess = T.Compose([ T.ToTensor(), ]) input_tensor = preprocess(input_image) with torch.no_grad(): output = model([input_tensor]) # Get person class mask (COCO classes, person is class 1) masks = output[0]['masks'] scores = output[0]['scores'] indices = [i for i, score in enumerate(scores) if score > 0.5] # Threshold for confidence masks = masks[indices] if masks.size(0) == 0: raise ValueError("No person found in the image.") # Take the first mask (if multiple persons are found) mask = masks[0, 0].cpu().numpy() # Get the first mask and convert to numpy # Convert to binary mask binary_mask = (mask > 0.5).astype(np.uint8) # Threshold to create a binary mask # Apply mask to input image human_array = np.array(input_image) * binary_mask[..., np.newaxis] human = Image.fromarray(human_array, "RGB") # Create alpha channel for transparency alpha_channel = Image.fromarray(binary_mask * 255, "L") human.putalpha(alpha_channel) return human # Function to add segmented human to stereoscopic image of environment def overlay_human(env_img, human_img, x_offset=0, y_offset=0, scale=1.0): env_w, env_h = env_img.size human_w, human_h = human_img.size # Resize human image human_img = human_img.resize((int(human_w * scale), int(human_h * scale))) human_w, human_h = human_img.size x = (env_w - human_w) // 2 + x_offset y = (env_h - human_h) // 2 + y_offset env_img.paste(human_img, (x, y), human_img) return env_img # Function to create an anaglyph image from left and right images def create_anaglyph(left_img, right_img): # Extract channels left_red_channel = left_img[:, :, 2] right_green_channel = right_img[:, :, 1] right_blue_channel = right_img[:, :, 0] # Create an empty image with the same dimensions anaglyph = np.zeros_like(left_img) # Assign the channels accordingly anaglyph[:, :, 2] = left_red_channel # Red channel from left image anaglyph[:, :, 1] = right_green_channel # Green channel from right image anaglyph[:, :, 0] = right_blue_channel # Blue channel from right image return anaglyph def generate_anaglyph(human_image, background_choice, x_offset, y_offset, scale, offset): backgrounds = { "Environment 1": "env1.jpg", "Environment 2": "env2.jpg", "Environment 3": "env3.jpg" } env_img_path = backgrounds[background_choice] human_img = segment_human(human_image) # Split environment image into left and right for stereoscopic effect stereo_image = cv2.imread(env_img_path) height, width, _ = stereo_image.shape midpoint = width // 2 left_image = stereo_image[:, :midpoint] right_image = stereo_image[:, midpoint:] left_image_rgb = cv2.cvtColor(left_image, cv2.COLOR_BGR2RGB) right_image_rgb = cv2.cvtColor(right_image, cv2.COLOR_BGR2RGB) left_image_rgb = overlay_human(Image.fromarray(left_image_rgb), human_img, x_offset - offset // 2, -y_offset, scale) right_image_rgb = overlay_human(Image.fromarray(right_image_rgb), human_img, x_offset + offset // 2, -y_offset, scale) left_image_rgb = cv2.cvtColor(np.array(left_image_rgb), cv2.COLOR_BGR2RGB) right_image_rgb = cv2.cvtColor(np.array(right_image_rgb), cv2.COLOR_BGR2RGB) anaglyph = create_anaglyph(left_image_rgb, right_image_rgb) anaglyph_rgb = cv2.cvtColor(anaglyph, cv2.COLOR_BGR2RGB) return anaglyph_rgb # Gradio Interface with gr.Blocks() as demo: gr.Markdown("## Anaglyph Image Generator") with gr.Row(): background_choice = gr.Dropdown(["Environment 1", "Environment 2", "Environment 3"], value="Environment 1", label="Select Background") human_image = gr.Image(label="Upload Human Image", type="filepath") with gr.Row(): x_offset = gr.Slider(-500, 500, value=0, step=1, label="Horizontal Offset") y_offset = gr.Slider(-500, 500, value=0, step=1, label="Vertical Offset") with gr.Row(): scale = gr.Slider(0.1, 2.0, value=1.0, label="Scale Human") offset = gr.Slider(-20, 20, value=0, step=2, label="Depth - Negative is towards from viewer and Positive is away from viewer.") generate_button = gr.Button("Generate Anaglyph") output_image = gr.Image(label="Anaglyph Image Output") generate_button.click( generate_anaglyph, inputs=[human_image, background_choice, x_offset, y_offset, scale, offset], outputs=output_image ) demo.launch()