import gradio as gr
import torch
import torchvision.transforms as T
from torchvision.models.detection import maskrcnn_resnet50_fpn
from PIL import Image
import numpy as np
import cv2

# Load pretrained model for segmentation
model = maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

# Function to segment human from input photo
def segment_human(image_path):
    input_image = Image.open(image_path).convert("RGB")
    preprocess = T.Compose([
        T.ToTensor(),
    ])
    input_tensor = preprocess(input_image)
    
    with torch.no_grad():
        output = model([input_tensor])
    
    # Get person class mask (COCO classes, person is class 1)
    masks = output[0]['masks']
    scores = output[0]['scores']
    indices = [i for i, score in enumerate(scores) if score > 0.5]  # Threshold for confidence
    masks = masks[indices]
    
    if masks.size(0) == 0:
        raise ValueError("No person found in the image.")

    # Take the first mask (if multiple persons are found)
    mask = masks[0, 0].cpu().numpy()  # Get the first mask and convert to numpy
    
    # Convert to binary mask
    binary_mask = (mask > 0.5).astype(np.uint8)  # Threshold to create a binary mask

    # Apply mask to input image
    human_array = np.array(input_image) * binary_mask[..., np.newaxis]
    human = Image.fromarray(human_array, "RGB")
    
    # Create alpha channel for transparency
    alpha_channel = Image.fromarray(binary_mask * 255, "L")
    human.putalpha(alpha_channel)
    
    return human

# Function to add segmented human to stereoscopic image of environment
def overlay_human(env_img, human_img, x_offset=0, y_offset=0, scale=1.0):
    env_w, env_h = env_img.size
    human_w, human_h = human_img.size
    
    # Resize human image
    human_img = human_img.resize((int(human_w * scale), int(human_h * scale)))
    human_w, human_h = human_img.size
    
    x = (env_w - human_w) // 2 + x_offset
    y = (env_h - human_h) // 2 + y_offset
    
    env_img.paste(human_img, (x, y), human_img)
    return env_img

# Function to create an anaglyph image from left and right images
def create_anaglyph(left_img, right_img):
    # Extract channels
    left_red_channel = left_img[:, :, 2]
    right_green_channel = right_img[:, :, 1]
    right_blue_channel = right_img[:, :, 0]

    # Create an empty image with the same dimensions
    anaglyph = np.zeros_like(left_img)

    # Assign the channels accordingly
    anaglyph[:, :, 2] = left_red_channel      # Red channel from left image
    anaglyph[:, :, 1] = right_green_channel   # Green channel from right image
    anaglyph[:, :, 0] = right_blue_channel    # Blue channel from right image

    return anaglyph

def generate_anaglyph(human_image, background_choice, x_offset, y_offset, scale, offset):
    backgrounds = {
        "Environment 1": "env1.jpg",
        "Environment 2": "env2.jpg",
        "Environment 3": "env3.jpg"
    }
    env_img_path = backgrounds[background_choice]
    human_img = segment_human(human_image)

    # Split environment image into left and right for stereoscopic effect
    stereo_image = cv2.imread(env_img_path)
    height, width, _ = stereo_image.shape
    midpoint = width // 2
    left_image = stereo_image[:, :midpoint]
    right_image = stereo_image[:, midpoint:]
    
    left_image_rgb = cv2.cvtColor(left_image, cv2.COLOR_BGR2RGB)
    right_image_rgb = cv2.cvtColor(right_image, cv2.COLOR_BGR2RGB)
    
    left_image_rgb = overlay_human(Image.fromarray(left_image_rgb), human_img, x_offset - offset // 2, -y_offset, scale)
    right_image_rgb = overlay_human(Image.fromarray(right_image_rgb), human_img, x_offset + offset // 2, -y_offset, scale)
    
    left_image_rgb = cv2.cvtColor(np.array(left_image_rgb), cv2.COLOR_BGR2RGB)
    right_image_rgb = cv2.cvtColor(np.array(right_image_rgb), cv2.COLOR_BGR2RGB)

    anaglyph = create_anaglyph(left_image_rgb, right_image_rgb)
    anaglyph_rgb = cv2.cvtColor(anaglyph, cv2.COLOR_BGR2RGB)

    return anaglyph_rgb

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## Anaglyph Image Generator")
    with gr.Row():
        background_choice = gr.Dropdown(["Environment 1", "Environment 2", "Environment 3"], value="Environment 1", label="Select Background")
        human_image = gr.Image(label="Upload Human Image", type="filepath")
    with gr.Row():
        x_offset = gr.Slider(-500, 500, value=0, step=1, label="Horizontal Offset")
        y_offset = gr.Slider(-500, 500, value=0, step=1, label="Vertical Offset")
    with gr.Row():
        scale = gr.Slider(0.1, 2.0, value=1.0, label="Scale Human")
        offset = gr.Slider(-20, 20, value=0, step=2, label="Depth - Negative is towards from viewer and Positive is away from viewer.")
    generate_button = gr.Button("Generate Anaglyph")
    
    output_image = gr.Image(label="Anaglyph Image Output")

    generate_button.click(
        generate_anaglyph,
        inputs=[human_image, background_choice, x_offset, y_offset, scale, offset],
        outputs=output_image
    )

demo.launch()