import gradio as gr import torch from PIL import Image import numpy as np from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, DPMSolverMultistepScheduler from diffusers.utils import load_image import cv2 import os from typing import Optional, Tuple import warnings import random from huggingface_hub import hf_hub_download warnings.filterwarnings("ignore") # Try to import IPAdapter, fallback to manual implementation try: from ip_adapter import IPAdapter HAS_IP_ADAPTER = True except ImportError: HAS_IP_ADAPTER = False print("IPAdapter not found, using fallback implementation") # Global variables for models pipe = None ip_adapter = None device = "cuda" if torch.cuda.is_available() else "cpu" current_model = None # Available models MODELS = { "Stable Diffusion 1.5": "runwayml/stable-diffusion-v1-5", "Stable Diffusion XL": "stabilityai/stable-diffusion-xl-base-1.0" } RESOLUTIONS = [ "512x512", "768x768", "1024x1024", "512x768", "768x512" ] class FallbackIPAdapter: """Fallback IPAdapter implementation using CLIP image encoder""" def __init__(self, pipe, device): self.pipe = pipe self.device = device self.scale = 1.0 def set_scale(self, scale): self.scale = scale def generate(self, pil_image, prompt, negative_prompt="", **kwargs): # Simple fallback: use the pipeline directly with image conditioning # This is a simplified version - real IPAdapter is more sophisticated try: # Convert image to tensor for conditioning (simplified approach) width = kwargs.get('width', 512) height = kwargs.get('height', 512) # Resize reference image to match output dimensions ref_image = pil_image.resize((width, height), Image.Resampling.LANCZOS) # Generate with standard pipeline result = self.pipe( prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=kwargs.get('num_inference_steps', 20), guidance_scale=kwargs.get('guidance_scale', 7.5), width=width, height=height, generator=torch.Generator(device=self.device).manual_seed(kwargs.get('seed', random.randint(0, 2**32-1))) ) return result.images except Exception as e: print(f"Fallback generation error: {e}") # Return a blank image as last resort return [Image.new('RGB', (width, height), (128, 128, 128))] def parse_resolution(resolution_str: str) -> Tuple[int, int]: """Parse resolution string to width, height tuple""" width, height = map(int, resolution_str.split('x')) return width, height def load_model(model_name: str): """Load the selected model with IPAdapter""" global pipe, ip_adapter, current_model if current_model == model_name and pipe is not None: return "Model already loaded" try: # Clear previous models if pipe is not None: del pipe if ip_adapter is not None: del ip_adapter torch.cuda.empty_cache() if torch.cuda.is_available() else None model_id = MODELS[model_name] # Load pipeline based on model type if "xl" in model_id.lower(): pipe = StableDiffusionXLPipeline.from_pretrained( model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32, use_safetensors=True, variant="fp16" if device == "cuda" else None ) else: pipe = StableDiffusionPipeline.from_pretrained( model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32, use_safetensors=True ) # Optimize for memory pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) pipe = pipe.to(device) if device == "cuda": try: pipe.enable_memory_efficient_attention() except: pass try: pipe.enable_xformers_memory_efficient_attention() except: pass # Load IPAdapter if HAS_IP_ADAPTER: try: if "xl" in model_id.lower(): ip_adapter = IPAdapter(pipe, "h94/IP-Adapter", "ip-adapter_sdxl.bin", device) else: ip_adapter = IPAdapter(pipe, "h94/IP-Adapter", "ip-adapter_sd15.bin", device) except Exception as e: print(f"IPAdapter loading failed, using fallback: {e}") ip_adapter = FallbackIPAdapter(pipe, device) else: ip_adapter = FallbackIPAdapter(pipe, device) current_model = model_name return f"✅ {model_name} loaded successfully" except Exception as e: return f"❌ Error loading model: {str(e)}" def enhance_face(image: Image.Image, use_codeformer: bool = False) -> Image.Image: """Apply face enhancement using CodeFormer or GFPGAN""" try: if use_codeformer: # Placeholder for CodeFormer - would need actual implementation # For now, return original image return image else: # Placeholder for GFPGAN - would need actual implementation # For now, return original image return image except Exception as e: print(f"Face enhancement failed: {e}") return image def apply_lora(pipe, lora_path: str, lora_scale: float = 1.0): """Apply LoRA weights to the pipeline""" try: if lora_path and os.path.exists(lora_path): pipe.load_lora_weights(lora_path) pipe.fuse_lora(lora_scale) return True except Exception as e: print(f"LoRA application failed: {e}") return False def generate_image( prompt: str, reference_image: Image.Image, model_name: str, guidance_scale: float, resolution: str, num_steps: int, ip_adapter_scale: float, seed: int, enable_face_enhancement: bool, use_codeformer: bool, lora_path: str, lora_scale: float ) -> Tuple[Image.Image, str]: """Generate image using IPAdapter""" if not prompt.strip(): return None, "❌ Please enter a text prompt" if reference_image is None: return None, "❌ Please upload a reference image" try: # Load model if needed load_status = load_model(model_name) if "Error" in load_status: return None, load_status # Parse resolution width, height = parse_resolution(resolution) # Set seed for reproducibility if seed <= 0: seed = random.randint(0, 2**32-1) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) # Apply LoRA if specified lora_applied = False if lora_path and lora_path.strip(): lora_applied = apply_lora(pipe, lora_path.strip(), lora_scale) # Prepare reference image ref_image = reference_image.convert("RGB") ref_image = ref_image.resize((width, height), Image.Resampling.LANCZOS) # Generate image with IPAdapter with torch.autocast(device): # Set IPAdapter scale ip_adapter.set_scale(ip_adapter_scale) # Generate generated_images = ip_adapter.generate( pil_image=ref_image, prompt=prompt, negative_prompt="blurry, low quality, distorted, deformed, ugly, bad anatomy", num_inference_steps=num_steps, guidance_scale=guidance_scale, width=width, height=height, seed=seed ) generated_image = generated_images[0] # Apply face enhancement if enabled if enable_face_enhancement: generated_image = enhance_face(generated_image, use_codeformer) # Create side-by-side comparison comparison = create_comparison(ref_image, generated_image) status = f"✅ Image generated successfully (seed: {seed})" if lora_applied: status += f" (LoRA applied: {lora_scale:.2f})" return comparison, status except Exception as e: error_msg = f"❌ Generation failed: {str(e)}" print(error_msg) return None, error_msg def create_comparison(reference: Image.Image, generated: Image.Image) -> Image.Image: """Create side-by-side comparison of reference and generated images""" # Ensure both images have the same height ref_width, ref_height = reference.size gen_width, gen_height = generated.size # Resize to match heights target_height = min(ref_height, gen_height, 512) # Limit height for display ref_aspect = ref_width / ref_height gen_aspect = gen_width / gen_height ref_resized = reference.resize((int(target_height * ref_aspect), target_height), Image.Resampling.LANCZOS) gen_resized = generated.resize((int(target_height * gen_aspect), target_height), Image.Resampling.LANCZOS) # Create comparison image total_width = ref_resized.width + gen_resized.width + 10 # 10px gap comparison = Image.new('RGB', (total_width, target_height), (255, 255, 255)) comparison.paste(ref_resized, (0, 0)) comparison.paste(gen_resized, (ref_resized.width + 10, 0)) return comparison # Create Gradio interface def create_interface(): with gr.Blocks(title="ComfyUI-Style IPAdapter Generator", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 🎨 ComfyUI-Style IPAdapter Generator Generate images using text prompts and reference images with IPAdapter technology. Upload a reference image (face or style guide) and describe what you want to create! """) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📝 Input Controls") # Model selection model_dropdown = gr.Dropdown( choices=list(MODELS.keys()), value="Stable Diffusion 1.5", label="Model" ) # Text prompt prompt_input = gr.Textbox( label="Text Prompt", placeholder="Describe the image you want to generate...", lines=3 ) # Reference image gr.Markdown("**Reference Image** - Upload a face or style reference image") reference_input = gr.Image( label="Reference Image", type="pil" ) with gr.Row(): guidance_scale = gr.Slider( minimum=1.0, maximum=20.0, value=7.5, step=0.5, label="Guidance Scale" ) ip_adapter_scale = gr.Slider( minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="IPAdapter Scale" ) with gr.Row(): resolution_dropdown = gr.Dropdown( choices=RESOLUTIONS, value="512x512", label="Resolution" ) num_steps = gr.Slider( minimum=10, maximum=50, value=20, step=1, label="Inference Steps" ) seed_input = gr.Number( label="Seed (0 for random)", value=0, precision=0 ) # Enhancement options gr.Markdown("### 🔧 Enhancement Options") enable_face_enhancement = gr.Checkbox( label="Enable Face Enhancement", value=False ) use_codeformer = gr.Checkbox( label="Use CodeFormer (vs GFPGAN)", value=False ) # LoRA options gr.Markdown("### 🎭 LoRA Style Options") lora_path = gr.Textbox( label="LoRA Model Path (optional)", placeholder="/path/to/lora/model.safetensors" ) lora_scale = gr.Slider( minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="LoRA Scale" ) generate_btn = gr.Button("🚀 Generate Image", variant="primary", size="lg") with gr.Column(scale=1): gr.Markdown("### 🖼️ Results") status_output = gr.Textbox( label="Status", interactive=False, value="Ready to generate..." ) output_image = gr.Image( label="Reference | Generated", type="pil" ) # Event handlers generate_btn.click( fn=generate_image, inputs=[ prompt_input, reference_input, model_dropdown, guidance_scale, resolution_dropdown, num_steps, ip_adapter_scale, seed_input, enable_face_enhancement, use_codeformer, lora_path, lora_scale ], outputs=[output_image, status_output] ) # Examples gr.Markdown("### 📚 Example Prompts") gr.Examples( examples=[ ["A professional headshot photo, studio lighting, high quality", None], ["An oil painting portrait in the style of Renaissance masters", None], ["A cyberpunk character with neon lighting and futuristic elements", None], ["A fantasy warrior in medieval armor, dramatic lighting", None], ["An anime-style character with vibrant colors", None] ], inputs=[prompt_input, reference_input] ) return demo if __name__ == "__main__": # Initialize with default model print("🚀 Starting ComfyUI-Style IPAdapter Generator...") print(f"Device: {device}") demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=True, show_error=True )