|
|
import gradio as gr
|
|
|
import torch
|
|
|
from torchvision import models, transforms
|
|
|
from PIL import Image
|
|
|
import numpy as np
|
|
|
import cv2
|
|
|
|
|
|
|
|
|
weights = models.segmentation.DeepLabV3_ResNet101_Weights.DEFAULT
|
|
|
model = models.segmentation.deeplabv3_resnet101(weights=weights).eval()
|
|
|
|
|
|
|
|
|
preprocess = transforms.Compose([
|
|
|
transforms.ToTensor(),
|
|
|
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
|
|
])
|
|
|
|
|
|
|
|
|
default_background = Image.open("environment1024.jpg").convert("RGB")
|
|
|
|
|
|
def process_images(person_image, background_image=None):
|
|
|
|
|
|
if background_image is None:
|
|
|
background_image = default_background
|
|
|
|
|
|
|
|
|
input_image = Image.fromarray(person_image).convert("RGB")
|
|
|
|
|
|
|
|
|
bg_width, bg_height = background_image.size
|
|
|
input_image.thumbnail((bg_width, bg_height), Image.Resampling.LANCZOS)
|
|
|
|
|
|
input_tensor = preprocess(input_image)
|
|
|
input_batch = input_tensor.unsqueeze(0)
|
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
output = model(input_batch)['out'][0]
|
|
|
output_predictions = output.argmax(0)
|
|
|
|
|
|
|
|
|
mask = (output_predictions == 15).cpu().numpy().astype(np.uint8)
|
|
|
|
|
|
|
|
|
background_image_cv = cv2.cvtColor(np.array(background_image), cv2.COLOR_RGB2BGR)
|
|
|
|
|
|
|
|
|
gray = cv2.cvtColor(background_image_cv, cv2.COLOR_BGR2GRAY)
|
|
|
edges = cv2.Canny(gray, threshold1=30, threshold2=100)
|
|
|
saliencyMap = cv2.GaussianBlur(edges, (5, 5), 0)
|
|
|
|
|
|
|
|
|
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(saliencyMap)
|
|
|
print(f"Punto de mayor interés visual: {maxLoc}")
|
|
|
|
|
|
|
|
|
height, width = background_image_cv.shape[:2]
|
|
|
thirds_x = [width // 3, 2 * width // 3]
|
|
|
thirds_y = [height // 3, 2 * height // 3]
|
|
|
|
|
|
|
|
|
positions = [(thirds_x[0], thirds_y[0]), (thirds_x[1], thirds_y[0]),
|
|
|
(thirds_x[0], thirds_y[1]), (thirds_x[1], thirds_y[1])]
|
|
|
|
|
|
|
|
|
person_array = np.array(input_image) * mask[:, :, np.newaxis]
|
|
|
|
|
|
|
|
|
person_height, person_width = person_array.shape[:2]
|
|
|
|
|
|
best_position = maxLoc
|
|
|
x, y = best_position
|
|
|
x -= person_width // 2
|
|
|
y -= person_height // 2
|
|
|
|
|
|
|
|
|
x = max(0, min(x, width - person_width))
|
|
|
y = max(0, min(y, height - person_height))
|
|
|
|
|
|
|
|
|
combined_image = background_image_cv.copy()
|
|
|
|
|
|
|
|
|
for i in range(person_height):
|
|
|
for j in range(person_width):
|
|
|
if mask[i, j]:
|
|
|
if 0 <= y+i < height and 0 <= x+j < width:
|
|
|
combined_image[y+i, x+j] = person_array[i, j]
|
|
|
|
|
|
|
|
|
final_image = Image.fromarray(cv2.cvtColor(combined_image, cv2.COLOR_BGR2RGB))
|
|
|
|
|
|
return final_image
|
|
|
|
|
|
|
|
|
iface = gr.Interface(
|
|
|
fn=process_images,
|
|
|
inputs=[gr.Image(type="numpy", label="Imagen de la persona")],
|
|
|
outputs=gr.Image(type="numpy", label="Imagen final"),
|
|
|
title="Integración de Persona en Entorno",
|
|
|
description="Sube una imagen de una persona y se integrará en el entorno predeterminado."
|
|
|
)
|
|
|
|
|
|
iface.launch()
|
|
|
|