Spaces:

Inmental
/

Env_mixer

Build error

App Files Files Community

Env_mixer / envmixer2.py

Inmental

Upload folder using huggingface_hub

4c62147 verified almost 2 years ago

raw

history blame contribute delete

6.73 kB

	import torch
	import numpy as np
	import os
	import sys
	from concurrent.futures import ThreadPoolExecutor
	from diffusers.utils import load_image
	from diffusers import EulerDiscreteScheduler
	from huggingface_hub import hf_hub_download
	from photomaker import PhotoMakerStableDiffusionXLPipeline
	from rembg import remove
	from PIL import Image

	# Define styles directly in the script
	styles = {
	"Cinematic HD": ("cinematic HD {prompt}", "low quality"),
	}

	# Global variables
	base_model_path = 'SG161222/RealVisXL_V3.0'
	person_image_folder = 'in'
	environment_image_path = 'environment1024.jpg'

	try:
	if torch.cuda.is_available():
	device = "cuda"
	elif sys.platform == "darwin" and torch.backends.mps.is_available():
	device = "mps"
	else:
	device = "cpu"
	except:
	device = "cpu"

	MAX_SEED = np.iinfo(np.int32).max
	DEFAULT_STYLE_NAME = "Cinematic HD"

	# Download PhotoMaker checkpoint to cache
	photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker", filename="photomaker-v1.bin", repo_type="model")

	if device == "mps":
	torch_dtype = torch.float16
	else:
	torch_dtype = torch.bfloat16

	pipe = PhotoMakerStableDiffusionXLPipeline.from_pretrained(
	base_model_path,
	torch_dtype=torch_dtype,
	use_safetensors=True,
	variant="fp16",
	).to(device)

	pipe.load_photomaker_adapter(
	os.path.dirname(photomaker_ckpt),
	subfolder="",
	weight_name=os.path.basename(photomaker_ckpt),
	trigger_word="img"
	)
	pipe.id_encoder.to(device)

	pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
	pipe.fuse_lora()

	def remove_background(image_path):
	try:
	input_image = Image.open(image_path)
	output_image = remove(input_image)
	return output_image
	except Exception as e:
	print(f"Error in remove_background: {e}")
	return None

	def overlay_images(background, person_images):
	try:
	background = background.convert("RGBA")
	for person_image in person_images:
	person_image = person_image.convert("RGBA")
	x_offset = (background.width - person_image.width) // 2
	y_offset = (background.height - person_image.height) // 2
	background.paste(person_image, (x_offset, y_offset), person_image)
	return background.convert("RGB")
	except Exception as e:
	print(f"Error in overlay_images: {e}")
	return background

	def generate_combined_image(combined_image, prompt, negative_prompt, num_steps, style_strength_ratio, num_outputs, guidance_scale, seed):
	try:
	person_token_id = pipe.tokenizer.convert_tokens_to_ids("img")
	input_ids = pipe.tokenizer.encode(prompt)

	if person_token_id not in input_ids:
	raise ValueError(f"Cannot find the trigger word 'img' in text prompt!")
	if input_ids.count(person_token_id) > 1:
	raise ValueError(f"Cannot use multiple trigger words 'img' in text prompt!")

	prompt, negative_prompt = apply_style(DEFAULT_STYLE_NAME, prompt, negative_prompt)

	input_id_images = [combined_image]

	generator = torch.Generator(device=device).manual_seed(seed)

	start_merge_step = int(float(style_strength_ratio) / 100 * num_steps)
	if start_merge_step > 30:
	start_merge_step = 30

	images = pipe(
	prompt=prompt,
	width=1280,
	height=720,
	input_id_images=input_id_images,
	negative_prompt=negative_prompt,
	num_images_per_prompt=num_outputs,
	num_inference_steps=num_steps,
	start_merge_step=start_merge_step,
	generator=generator,
	guidance_scale=guidance_scale,
	).images

	return images[0]
	except Exception as e:
	print(f"Error in generate_combined_image: {e}")
	return None

	def apply_style(style_name: str, positive: str, negative: str = ""):
	p, n = styles.get(style_name, styles["Cinematic HD"])
	return p.replace("{prompt}", positive), n + ' ' + negative

	def process_image(image_path):
	return remove_background(image_path)

	def main():
	prompt = "cinematic person img vicking, 35mm photograph, film, bokeh, professional, 4k, highly detailed,"
	negative_prompt = "nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"
	num_steps = 12 # Reduced the number of steps for speed
	style_strength_ratio = 48
	num_outputs = 1
	guidance_scale = 7.8
	seed = 52

	# Load environment image
	try:
	environment_image = load_image(environment_image_path)
	except Exception as e:
	print(f"Error loading environment image: {e}")
	return

	# Load and process each person image in the folder using parallel processing
	try:
	person_image_paths = [os.path.join(person_image_folder, filename) for filename in os.listdir(person_image_folder) if filename.lower().endswith(('.png', '.jpg', '.jpeg'))]
	with ThreadPoolExecutor() as executor:
	person_images = list(executor.map(process_image, person_image_paths))
	person_images = [img for img in person_images if img is not None] # Filter out any None images
	except Exception as e:
	print(f"Error processing person images: {e}")
	return

	# Overlay all person images onto the environment image
	combined_image = overlay_images(environment_image, person_images)

	# Save the combined image temporarily
	combined_image_path = "combined_image.jpg"
	try:
	combined_image.save(combined_image_path)
	except Exception as e:
	print(f"Error saving combined image: {e}")
	return

	# Load the combined image
	try:
	combined_image = load_image(combined_image_path)
	except Exception as e:
	print(f"Error loading combined image: {e}")
	return

	# Generate the final image
	result_image = generate_combined_image(
	combined_image,
	prompt,
	negative_prompt,
	num_steps,
	style_strength_ratio,
	num_outputs,
	guidance_scale,
	seed
	)

	if result_image:
	try:
	result_image.save("result.jpg")
	print("Image saved as result.jpg")
	except Exception as e:
	print(f"Error saving result image: {e}")
	else:
	print("No image generated")

	if __name__ == "__main__":
	main()