Spaces:

jolieee206
/

ComfyUI-Style-IPAdapterGenerator

Runtime error

App Files Files Community

ComfyUI-Style-IPAdapterGenerator / app.py

JoJoMonroe

Fix Gradio compatibility issues

c15729e 5 months ago

raw

history blame

15.6 kB

	import gradio as gr
	import torch
	from PIL import Image
	import numpy as np
	from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, DPMSolverMultistepScheduler
	from diffusers.utils import load_image
	import cv2
	import os
	from typing import Optional, Tuple
	import warnings
	import random
	from huggingface_hub import hf_hub_download
	warnings.filterwarnings("ignore")

	# Try to import IPAdapter, fallback to manual implementation
	try:
	from ip_adapter import IPAdapter
	HAS_IP_ADAPTER = True
	except ImportError:
	HAS_IP_ADAPTER = False
	print("IPAdapter not found, using fallback implementation")

	# Global variables for models
	pipe = None
	ip_adapter = None
	device = "cuda" if torch.cuda.is_available() else "cpu"
	current_model = None

	# Available models
	MODELS = {
	"Stable Diffusion 1.5": "runwayml/stable-diffusion-v1-5",
	"Stable Diffusion XL": "stabilityai/stable-diffusion-xl-base-1.0"
	}

	RESOLUTIONS = [
	"512x512",
	"768x768",
	"1024x1024",
	"512x768",
	"768x512"
	]

	class FallbackIPAdapter:
	"""Fallback IPAdapter implementation using CLIP image encoder"""
	def __init__(self, pipe, device):
	self.pipe = pipe
	self.device = device
	self.scale = 1.0

	def set_scale(self, scale):
	self.scale = scale

	def generate(self, pil_image, prompt, negative_prompt="", **kwargs):
	# Simple fallback: use the pipeline directly with image conditioning
	# This is a simplified version - real IPAdapter is more sophisticated
	try:
	# Convert image to tensor for conditioning (simplified approach)
	width = kwargs.get('width', 512)
	height = kwargs.get('height', 512)

	# Resize reference image to match output dimensions
	ref_image = pil_image.resize((width, height), Image.Resampling.LANCZOS)

	# Generate with standard pipeline
	result = self.pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=kwargs.get('num_inference_steps', 20),
	guidance_scale=kwargs.get('guidance_scale', 7.5),
	width=width,
	height=height,
	generator=torch.Generator(device=self.device).manual_seed(kwargs.get('seed', random.randint(0, 2**32-1)))
	)

	return result.images

	except Exception as e:
	print(f"Fallback generation error: {e}")
	# Return a blank image as last resort
	return [Image.new('RGB', (width, height), (128, 128, 128))]

	def parse_resolution(resolution_str: str) -> Tuple[int, int]:
	"""Parse resolution string to width, height tuple"""
	width, height = map(int, resolution_str.split('x'))
	return width, height

	def load_model(model_name: str):
	"""Load the selected model with IPAdapter"""
	global pipe, ip_adapter, current_model

	if current_model == model_name and pipe is not None:
	return "Model already loaded"

	try:
	# Clear previous models
	if pipe is not None:
	del pipe
	if ip_adapter is not None:
	del ip_adapter
	torch.cuda.empty_cache() if torch.cuda.is_available() else None

	model_id = MODELS[model_name]

	# Load pipeline based on model type
	if "xl" in model_id.lower():
	pipe = StableDiffusionXLPipeline.from_pretrained(
	model_id,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	use_safetensors=True,
	variant="fp16" if device == "cuda" else None
	)
	else:
	pipe = StableDiffusionPipeline.from_pretrained(
	model_id,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	use_safetensors=True
	)

	# Optimize for memory
	pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
	pipe = pipe.to(device)

	if device == "cuda":
	try:
	pipe.enable_memory_efficient_attention()
	except:
	pass
	try:
	pipe.enable_xformers_memory_efficient_attention()
	except:
	pass

	# Load IPAdapter
	if HAS_IP_ADAPTER:
	try:
	if "xl" in model_id.lower():
	ip_adapter = IPAdapter(pipe, "h94/IP-Adapter", "ip-adapter_sdxl.bin", device)
	else:
	ip_adapter = IPAdapter(pipe, "h94/IP-Adapter", "ip-adapter_sd15.bin", device)
	except Exception as e:
	print(f"IPAdapter loading failed, using fallback: {e}")
	ip_adapter = FallbackIPAdapter(pipe, device)
	else:
	ip_adapter = FallbackIPAdapter(pipe, device)

	current_model = model_name
	return f"✅ {model_name} loaded successfully"

	except Exception as e:
	return f"❌ Error loading model: {str(e)}"

	def enhance_face(image: Image.Image, use_codeformer: bool = False) -> Image.Image:
	"""Apply face enhancement using CodeFormer or GFPGAN"""
	try:
	if use_codeformer:
	# Placeholder for CodeFormer - would need actual implementation
	# For now, return original image
	return image
	else:
	# Placeholder for GFPGAN - would need actual implementation
	# For now, return original image
	return image
	except Exception as e:
	print(f"Face enhancement failed: {e}")
	return image

	def apply_lora(pipe, lora_path: str, lora_scale: float = 1.0):
	"""Apply LoRA weights to the pipeline"""
	try:
	if lora_path and os.path.exists(lora_path):
	pipe.load_lora_weights(lora_path)
	pipe.fuse_lora(lora_scale)
	return True
	except Exception as e:
	print(f"LoRA application failed: {e}")
	return False

	def generate_image(
	prompt: str,
	reference_image: Image.Image,
	model_name: str,
	guidance_scale: float,
	resolution: str,
	num_steps: int,
	ip_adapter_scale: float,
	seed: int,
	enable_face_enhancement: bool,
	use_codeformer: bool,
	lora_path: str,
	lora_scale: float
	) -> Tuple[Image.Image, str]:
	"""Generate image using IPAdapter"""

	if not prompt.strip():
	return None, "❌ Please enter a text prompt"

	if reference_image is None:
	return None, "❌ Please upload a reference image"

	try:
	# Load model if needed
	load_status = load_model(model_name)
	if "Error" in load_status:
	return None, load_status

	# Parse resolution
	width, height = parse_resolution(resolution)

	# Set seed for reproducibility
	if seed <= 0:
	seed = random.randint(0, 2**32-1)

	torch.manual_seed(seed)
	if torch.cuda.is_available():
	torch.cuda.manual_seed(seed)

	# Apply LoRA if specified
	lora_applied = False
	if lora_path and lora_path.strip():
	lora_applied = apply_lora(pipe, lora_path.strip(), lora_scale)

	# Prepare reference image
	ref_image = reference_image.convert("RGB")
	ref_image = ref_image.resize((width, height), Image.Resampling.LANCZOS)

	# Generate image with IPAdapter
	with torch.autocast(device):
	# Set IPAdapter scale
	ip_adapter.set_scale(ip_adapter_scale)

	# Generate
	generated_images = ip_adapter.generate(
	pil_image=ref_image,
	prompt=prompt,
	negative_prompt="blurry, low quality, distorted, deformed, ugly, bad anatomy",
	num_inference_steps=num_steps,
	guidance_scale=guidance_scale,
	width=width,
	height=height,
	seed=seed
	)

	generated_image = generated_images[0]

	# Apply face enhancement if enabled
	if enable_face_enhancement:
	generated_image = enhance_face(generated_image, use_codeformer)

	# Create side-by-side comparison
	comparison = create_comparison(ref_image, generated_image)

	status = f"✅ Image generated successfully (seed: {seed})"
	if lora_applied:
	status += f" (LoRA applied: {lora_scale:.2f})"

	return comparison, status

	except Exception as e:
	error_msg = f"❌ Generation failed: {str(e)}"
	print(error_msg)
	return None, error_msg

	def create_comparison(reference: Image.Image, generated: Image.Image) -> Image.Image:
	"""Create side-by-side comparison of reference and generated images"""
	# Ensure both images have the same height
	ref_width, ref_height = reference.size
	gen_width, gen_height = generated.size

	# Resize to match heights
	target_height = min(ref_height, gen_height, 512) # Limit height for display

	ref_aspect = ref_width / ref_height
	gen_aspect = gen_width / gen_height

	ref_resized = reference.resize((int(target_height * ref_aspect), target_height), Image.Resampling.LANCZOS)
	gen_resized = generated.resize((int(target_height * gen_aspect), target_height), Image.Resampling.LANCZOS)

	# Create comparison image
	total_width = ref_resized.width + gen_resized.width + 10 # 10px gap
	comparison = Image.new('RGB', (total_width, target_height), (255, 255, 255))

	comparison.paste(ref_resized, (0, 0))
	comparison.paste(gen_resized, (ref_resized.width + 10, 0))

	return comparison

	# Create Gradio interface
	def create_interface():
	with gr.Blocks(title="ComfyUI-Style IPAdapter Generator", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🎨 ComfyUI-Style IPAdapter Generator
	Generate images using text prompts and reference images with IPAdapter technology.
	Upload a reference image (face or style guide) and describe what you want to create!
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📝 Input Controls")

	# Model selection
	model_dropdown = gr.Dropdown(
	choices=list(MODELS.keys()),
	value="Stable Diffusion 1.5",
	label="Model"
	)

	# Text prompt
	prompt_input = gr.Textbox(
	label="Text Prompt",
	placeholder="Describe the image you want to generate...",
	lines=3
	)

	# Reference image
	gr.Markdown("Reference Image - Upload a face or style reference image")
	reference_input = gr.Image(
	label="Reference Image",
	type="pil"
	)

	with gr.Row():
	guidance_scale = gr.Slider(
	minimum=1.0,
	maximum=20.0,
	value=7.5,
	step=0.5,
	label="Guidance Scale"
	)

	ip_adapter_scale = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="IPAdapter Scale"
	)

	with gr.Row():
	resolution_dropdown = gr.Dropdown(
	choices=RESOLUTIONS,
	value="512x512",
	label="Resolution"
	)

	num_steps = gr.Slider(
	minimum=10,
	maximum=50,
	value=20,
	step=1,
	label="Inference Steps"
	)

	seed_input = gr.Number(
	label="Seed (0 for random)",
	value=0,
	precision=0
	)

	# Enhancement options
	gr.Markdown("### 🔧 Enhancement Options")

	enable_face_enhancement = gr.Checkbox(
	label="Enable Face Enhancement",
	value=False
	)

	use_codeformer = gr.Checkbox(
	label="Use CodeFormer (vs GFPGAN)",
	value=False
	)

	# LoRA options
	gr.Markdown("### 🎭 LoRA Style Options")

	lora_path = gr.Textbox(
	label="LoRA Model Path (optional)",
	placeholder="/path/to/lora/model.safetensors"
	)

	lora_scale = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="LoRA Scale"
	)

	generate_btn = gr.Button("🚀 Generate Image", variant="primary", size="lg")

	with gr.Column(scale=1):
	gr.Markdown("### 🖼️ Results")

	status_output = gr.Textbox(
	label="Status",
	interactive=False,
	value="Ready to generate..."
	)

	output_image = gr.Image(
	label="Reference \| Generated",
	type="pil"
	)

	# Event handlers
	generate_btn.click(
	fn=generate_image,
	inputs=[
	prompt_input,
	reference_input,
	model_dropdown,
	guidance_scale,
	resolution_dropdown,
	num_steps,
	ip_adapter_scale,
	seed_input,
	enable_face_enhancement,
	use_codeformer,
	lora_path,
	lora_scale
	],
	outputs=[output_image, status_output]
	)

	# Examples
	gr.Markdown("### 📚 Example Prompts")
	gr.Examples(
	examples=[
	["A professional headshot photo, studio lighting, high quality", None],
	["An oil painting portrait in the style of Renaissance masters", None],
	["A cyberpunk character with neon lighting and futuristic elements", None],
	["A fantasy warrior in medieval armor, dramatic lighting", None],
	["An anime-style character with vibrant colors", None]
	],
	inputs=[prompt_input, reference_input]
	)

	return demo

	if __name__ == "__main__":
	# Initialize with default model
	print("🚀 Starting ComfyUI-Style IPAdapter Generator...")
	print(f"Device: {device}")

	demo = create_interface()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_error=True
	)