Spaces:

Inmental
/

Env_mixer

Build error

App Files Files Community

Env_mixer / envmixer.py

Inmental

Upload folder using huggingface_hub

4c62147 verified almost 2 years ago

raw

history blame contribute delete

8.78 kB

	import torch
	import numpy as np
	import os
	import sys
	from diffusers import EulerDiscreteScheduler
	from huggingface_hub import hf_hub_download
	from rembg import remove
	from PIL import Image
	import cv2
	from photomaker import PhotoMakerStableDiffusionXLPipeline

	# Diccionario de estilos
	styles = {
	"Cinematic HD": ("cinematic HD {prompt}", "low quality"),
	"Photographic (Default)": ("photographic {prompt}", "low quality"),
	# Puedes añadir más estilos aquí
	}

	# Variables globales
	base_model_path = 'SG161222/RealVisXL_V3.0'
	person_image_folder = r'D:\I+D\ia\Env mixer\in'
	environment_image_path = r'D:\I+D\ia\Env mixer\environment10241.jpg'
	face_w = 512
	face_h = 512
	output_w = 512 # Set desired output width
	output_h = 512 # Set desired output height
	border=100

	try:
	if torch.cuda.is_available():
	device = "cuda"
	elif sys.platform == "darwin" and torch.backends.mps.is_available():
	device = "mps"
	else:
	device = "cpu"
	except:
	device = "cpu"

	MAX_SEED = np.iinfo(np.int32).max
	DEFAULT_STYLE_NAME = "Photographic (Default)"

	# Descargar el checkpoint de PhotoMaker a la caché
	photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker", filename="photomaker-v1.bin", repo_type="model")

	if device == "mps":
	torch_dtype = torch.float16
	else:
	torch_dtype = torch.bfloat16

	pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
	base_model_path,
	torch_dtype=torch_dtype,
	use_safetensors=True,
	variant="fp16"
	).to(device)

	pipe.load_photomaker_adapter(
	os.path.dirname(photomaker_ckpt),
	subfolder="",
	weight_name=os.path.basename(photomaker_ckpt),
	trigger_word="img"
	)
	pipe.id_encoder.to(device)
	pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
	pipe.fuse_lora()

	def remove_background(image_path):
	try:
	input_image = Image.open(image_path)
	output_image = remove(input_image)
	output_image.save(f"removed_bg_{os.path.basename(image_path)}.png") # Guardar para inspección como PNG
	return output_image
	except Exception as e:
	print(f"Error in remove_background: {e}")
	return None

	def detect_face(image, image_path):
	gray = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
	face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
	faces = face_cascade.detectMultiScale(gray, 1.1, 4)
	for (x, y, w, h) in faces:
	cv2.rectangle(np.array(image), (x, y), (x+w, y+h), (255, 0, 0), 2)
	image.save(f"faces_detected_{os.path.basename(image_path)}.png") # Guardar para inspección como PNG
	return faces

	def crop_and_resize_face(image, face):
	x, y, w, h = face
	face_img = image.crop((x-border, y-border, x + w+border, y + h+border))

	# Crear una imagen de fondo transparente
	background = Image.new('RGBA', (face_w, face_h), (0, 0, 0, 0))

	# Redimensionar la imagen de la cara para que encaje en el lienzo de 256x256
	face_img.thumbnail((face_w, face_h), Image.Resampling.LANCZOS)

	# Calcular la posición para centrar la imagen de la cara en el fondo transparente
	x_offset = (background.width - face_img.width) // 2
	y_offset = (background.height - face_img.height) // 2

	# Pegar la imagen de la cara en el fondo transparente
	background.paste(face_img, (x_offset, y_offset), face_img)

	return background

	def apply_style(style_name: str, positive: str, negative: str = ""):
	p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
	return p.replace("{prompt}", positive), n + ' ' + negative

	def process_image(image_path):
	return remove_background(image_path)

	def main():
	prompt = "cinematic photo of a person img sniffing cocaine, 35mm photograph, film, bokeh, professional, 4k, highly detailed,"
	negative_prompt = "nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"
	num_steps =25 # Aumentar el número de pasos para mejor calidad
	style_strength_ratio = 4
	num_outputs = 1
	guidance_scale = 5
	seed = 1700
	start_merge_step = 2 # Establecer el paso de inicio de fusión
	style_name = "Cinematic HD" # Nombre de estilo de ejemplo

	# Verificar el prompt para la palabra clave
	image_token_id = pipe.tokenizer.convert_tokens_to_ids(pipe.trigger_word)
	input_ids = pipe.tokenizer.encode(prompt)
	if image_token_id not in input_ids:
	raise ValueError(f"Cannot find the trigger word '{pipe.trigger_word}' in text prompt!")

	if input_ids.count(image_token_id) > 1:
	raise ValueError(f"Cannot use multiple trigger words '{pipe.trigger_word}' in text prompt!")

	# Verificar el negative prompt para la palabra clave
	if negative_prompt:
	negative_prompt_ids = pipe.tokenizer.encode(negative_prompt)
	if image_token_id in negative_prompt_ids:
	raise ValueError(f"Cannot use trigger word '{pipe.trigger_word}' in negative prompt!")

	# Aplicar estilo al prompt
	styled_prompt, styled_negative_prompt = apply_style(style_name, prompt, negative_prompt)

	# Cargar imagen del entorno si está disponible
	environment_image = None
	if os.path.exists(environment_image_path):
	try:
	environment_image = Image.open(environment_image_path)
	except Exception as e:
	print(f"Error loading environment image: {e}")

	# Cargar y procesar cada imagen de persona en la carpeta de forma secuencial
	try:
	person_image_paths = [os.path.join(person_image_folder, filename) for filename in os.listdir(person_image_folder) if filename.lower().endswith(('.png', '.jpg', '.jpeg'))]
	person_images = [process_image(image_path) for image_path in person_image_paths]
	person_images = [img for img in person_images if img is not None] # Filtrar cualquier imagen None
	except Exception as e:
	print(f"Error processing person images: {e}")
	return

	# Detectar y recortar caras de cada imagen de persona, redimensionar y pegar en un fondo transparente
	processed_faces = []
	for img, img_path in zip(person_images, person_image_paths):
	faces = detect_face(img, img_path)
	if len(faces) > 0:
	face_image = crop_and_resize_face(img, faces[0])
	processed_faces.append(face_image)
	else:
	print(f"No face detected in {img_path}, skipping this image")

	# Convertir caras procesadas a tensores
	face_tensors = []
	for face_img in processed_faces:
	print(f"Face Image Size: {face_img.size}")
	face_tensor = torch.tensor(np.array(face_img.convert("RGB"))).permute(2, 0, 1).to(device, dtype=torch_dtype)
	face_tensors.append(face_tensor)

	if not face_tensors:
	print("No faces processed")
	return

	# Apilar tensores en un solo tensor (batch_size, num_inputs, channels, height, width)
	conditioning_tensor = torch.stack(face_tensors).unsqueeze(0)

	# Imprimir parámetros para verificación
	print(f"Prompt: {styled_prompt}")
	print(f"Negative Prompt: {styled_negative_prompt}")
	print(f"Output Width: {output_w}")
	print(f"Output Height: {output_h}")
	print(f"Number of Outputs: {num_outputs}")
	print(f"Number of Inference Steps: {num_steps}")
	print(f"Start Merge Step: {start_merge_step}")
	print(f"Guidance Scale: {guidance_scale}")
	print(f"Device: {device}")
	print(f"Number of Processed Faces: {len(processed_faces)}")
	print(f"Conditioning Tensor Shape: {conditioning_tensor.shape}")

	# Generar la imagen final usando el modelo AI
	generator = torch.manual_seed(seed)
	try:
	result = pipe(
	prompt=styled_prompt,
	width=output_w,
	height=output_h,
	input_id_images=processed_faces, # Asegurarse de pasar el tensor correcto
	negative_prompt=styled_negative_prompt,
	num_images_per_prompt=num_outputs,
	num_inference_steps=num_steps,
	start_merge_step=start_merge_step,
	generator=generator,
	guidance_scale=guidance_scale,
	).images

	if result:
	final_image = result[0]
	final_image.save("result.png")
	print("Image saved as result.png")
	else:
	print("No image generated")
	except Exception as e:
	print(f"Error during image generation: {e}")

	if __name__ == "__main__":
	main()