Env_mixer / envmixer.py
Inmental's picture
Upload folder using huggingface_hub
4c62147 verified
import torch
import numpy as np
import os
import sys
from diffusers import EulerDiscreteScheduler
from huggingface_hub import hf_hub_download
from rembg import remove
from PIL import Image
import cv2
from photomaker import PhotoMakerStableDiffusionXLPipeline
# Diccionario de estilos
styles = {
"Cinematic HD": ("cinematic HD {prompt}", "low quality"),
"Photographic (Default)": ("photographic {prompt}", "low quality"),
# Puedes a帽adir m谩s estilos aqu铆
}
# Variables globales
base_model_path = 'SG161222/RealVisXL_V3.0'
person_image_folder = r'D:\I+D\ia\Env mixer\in'
environment_image_path = r'D:\I+D\ia\Env mixer\environment10241.jpg'
face_w = 512
face_h = 512
output_w = 512 # Set desired output width
output_h = 512 # Set desired output height
border=100
try:
if torch.cuda.is_available():
device = "cuda"
elif sys.platform == "darwin" and torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
except:
device = "cpu"
MAX_SEED = np.iinfo(np.int32).max
DEFAULT_STYLE_NAME = "Photographic (Default)"
# Descargar el checkpoint de PhotoMaker a la cach茅
photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker", filename="photomaker-v1.bin", repo_type="model")
if device == "mps":
torch_dtype = torch.float16
else:
torch_dtype = torch.bfloat16
pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
base_model_path,
torch_dtype=torch_dtype,
use_safetensors=True,
variant="fp16"
).to(device)
pipe.load_photomaker_adapter(
os.path.dirname(photomaker_ckpt),
subfolder="",
weight_name=os.path.basename(photomaker_ckpt),
trigger_word="img"
)
pipe.id_encoder.to(device)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.fuse_lora()
def remove_background(image_path):
try:
input_image = Image.open(image_path)
output_image = remove(input_image)
output_image.save(f"removed_bg_{os.path.basename(image_path)}.png") # Guardar para inspecci贸n como PNG
return output_image
except Exception as e:
print(f"Error in remove_background: {e}")
return None
def detect_face(image, image_path):
gray = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
for (x, y, w, h) in faces:
cv2.rectangle(np.array(image), (x, y), (x+w, y+h), (255, 0, 0), 2)
image.save(f"faces_detected_{os.path.basename(image_path)}.png") # Guardar para inspecci贸n como PNG
return faces
def crop_and_resize_face(image, face):
x, y, w, h = face
face_img = image.crop((x-border, y-border, x + w+border, y + h+border))
# Crear una imagen de fondo transparente
background = Image.new('RGBA', (face_w, face_h), (0, 0, 0, 0))
# Redimensionar la imagen de la cara para que encaje en el lienzo de 256x256
face_img.thumbnail((face_w, face_h), Image.Resampling.LANCZOS)
# Calcular la posici贸n para centrar la imagen de la cara en el fondo transparente
x_offset = (background.width - face_img.width) // 2
y_offset = (background.height - face_img.height) // 2
# Pegar la imagen de la cara en el fondo transparente
background.paste(face_img, (x_offset, y_offset), face_img)
return background
def apply_style(style_name: str, positive: str, negative: str = ""):
p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
return p.replace("{prompt}", positive), n + ' ' + negative
def process_image(image_path):
return remove_background(image_path)
def main():
prompt = "cinematic photo of a person img sniffing cocaine, 35mm photograph, film, bokeh, professional, 4k, highly detailed,"
negative_prompt = "nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"
num_steps =25 # Aumentar el n煤mero de pasos para mejor calidad
style_strength_ratio = 4
num_outputs = 1
guidance_scale = 5
seed = 1700
start_merge_step = 2 # Establecer el paso de inicio de fusi贸n
style_name = "Cinematic HD" # Nombre de estilo de ejemplo
# Verificar el prompt para la palabra clave
image_token_id = pipe.tokenizer.convert_tokens_to_ids(pipe.trigger_word)
input_ids = pipe.tokenizer.encode(prompt)
if image_token_id not in input_ids:
raise ValueError(f"Cannot find the trigger word '{pipe.trigger_word}' in text prompt!")
if input_ids.count(image_token_id) > 1:
raise ValueError(f"Cannot use multiple trigger words '{pipe.trigger_word}' in text prompt!")
# Verificar el negative prompt para la palabra clave
if negative_prompt:
negative_prompt_ids = pipe.tokenizer.encode(negative_prompt)
if image_token_id in negative_prompt_ids:
raise ValueError(f"Cannot use trigger word '{pipe.trigger_word}' in negative prompt!")
# Aplicar estilo al prompt
styled_prompt, styled_negative_prompt = apply_style(style_name, prompt, negative_prompt)
# Cargar imagen del entorno si est谩 disponible
environment_image = None
if os.path.exists(environment_image_path):
try:
environment_image = Image.open(environment_image_path)
except Exception as e:
print(f"Error loading environment image: {e}")
# Cargar y procesar cada imagen de persona en la carpeta de forma secuencial
try:
person_image_paths = [os.path.join(person_image_folder, filename) for filename in os.listdir(person_image_folder) if filename.lower().endswith(('.png', '.jpg', '.jpeg'))]
person_images = [process_image(image_path) for image_path in person_image_paths]
person_images = [img for img in person_images if img is not None] # Filtrar cualquier imagen None
except Exception as e:
print(f"Error processing person images: {e}")
return
# Detectar y recortar caras de cada imagen de persona, redimensionar y pegar en un fondo transparente
processed_faces = []
for img, img_path in zip(person_images, person_image_paths):
faces = detect_face(img, img_path)
if len(faces) > 0:
face_image = crop_and_resize_face(img, faces[0])
processed_faces.append(face_image)
else:
print(f"No face detected in {img_path}, skipping this image")
# Convertir caras procesadas a tensores
face_tensors = []
for face_img in processed_faces:
print(f"Face Image Size: {face_img.size}")
face_tensor = torch.tensor(np.array(face_img.convert("RGB"))).permute(2, 0, 1).to(device, dtype=torch_dtype)
face_tensors.append(face_tensor)
if not face_tensors:
print("No faces processed")
return
# Apilar tensores en un solo tensor (batch_size, num_inputs, channels, height, width)
conditioning_tensor = torch.stack(face_tensors).unsqueeze(0)
# Imprimir par谩metros para verificaci贸n
print(f"Prompt: {styled_prompt}")
print(f"Negative Prompt: {styled_negative_prompt}")
print(f"Output Width: {output_w}")
print(f"Output Height: {output_h}")
print(f"Number of Outputs: {num_outputs}")
print(f"Number of Inference Steps: {num_steps}")
print(f"Start Merge Step: {start_merge_step}")
print(f"Guidance Scale: {guidance_scale}")
print(f"Device: {device}")
print(f"Number of Processed Faces: {len(processed_faces)}")
print(f"Conditioning Tensor Shape: {conditioning_tensor.shape}")
# Generar la imagen final usando el modelo AI
generator = torch.manual_seed(seed)
try:
result = pipe(
prompt=styled_prompt,
width=output_w,
height=output_h,
input_id_images=processed_faces, # Asegurarse de pasar el tensor correcto
negative_prompt=styled_negative_prompt,
num_images_per_prompt=num_outputs,
num_inference_steps=num_steps,
start_merge_step=start_merge_step,
generator=generator,
guidance_scale=guidance_scale,
).images
if result:
final_image = result[0]
final_image.save("result.png")
print("Image saved as result.png")
else:
print("No image generated")
except Exception as e:
print(f"Error during image generation: {e}")
if __name__ == "__main__":
main()