import torch import numpy as np from diffusers import StableDiffusionInpaintPipeline, DDIMScheduler from PIL import Image class LEDITSModel: """ Implementation of LEDITS++ model for localized image editing using Stable Diffusion. """ def __init__(self, model_id="runwayml/stable-diffusion-inpainting", device=None): """ Initialize the LEDITS++ model. Args: model_id (str): Hugging Face model ID for the Stable Diffusion inpainting model device (str, optional): Device to run the model on ('cuda' or 'cpu') """ self.model_id = model_id # Determine device if device is None: self.device = "cuda" if torch.cuda.is_available() else "cpu" else: self.device = device # Model will be loaded on first use to save memory self.pipe = None def load_model(self): """ Load the Stable Diffusion inpainting model. """ if self.pipe is None: # Load the pipeline with DDIM scheduler for better quality scheduler = DDIMScheduler.from_pretrained( self.model_id, subfolder="scheduler" ) self.pipe = StableDiffusionInpaintPipeline.from_pretrained( self.model_id, scheduler=scheduler, safety_checker=None # Disable safety checker for NSFW content as per user request ) # Move to device self.pipe = self.pipe.to(self.device) # Enable memory optimization if on CUDA if self.device == "cuda": self.pipe.enable_attention_slicing() def edit_image(self, image, mask, prompt, intensity=0.5, guidance_scale=7.5, num_inference_steps=30): """ Edit an image using the LEDITS++ approach. Args: image (numpy.ndarray): Input image (normalized to [0, 1]) mask (numpy.ndarray): Mask indicating the region to edit (values in [0, 1]) prompt (str): Text prompt describing the desired edit intensity (float): Strength of the edit (0.0 to 1.0) guidance_scale (float): Guidance scale for diffusion model num_inference_steps (int): Number of denoising steps Returns: numpy.ndarray: Edited image """ # Load model if not already loaded self.load_model() # Convert numpy arrays to PIL Images if isinstance(image, np.ndarray): # Convert to uint8 if the image is float if image.dtype == np.float32 or image.dtype == np.float64: image_pil = Image.fromarray((image * 255).astype(np.uint8)) else: image_pil = Image.fromarray(image) else: image_pil = image if isinstance(mask, np.ndarray): # Convert to uint8 if the mask is float if mask.dtype == np.float32 or mask.dtype == np.float64: mask_pil = Image.fromarray((mask * 255).astype(np.uint8)) else: mask_pil = Image.fromarray(mask) # Ensure mask is grayscale if mask_pil.mode != 'L': mask_pil = mask_pil.convert('L') else: mask_pil = mask # Resize images to multiples of 8 (required by Stable Diffusion) width, height = image_pil.size new_width = width - (width % 8) new_height = height - (height % 8) if (new_width, new_height) != image_pil.size: image_pil = image_pil.resize((new_width, new_height), Image.LANCZOS) mask_pil = mask_pil.resize((new_width, new_height), Image.LANCZOS) # Run the inpainting pipeline with torch.no_grad(): output = self.pipe( prompt=prompt, image=image_pil, mask_image=mask_pil, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, strength=intensity, ).images[0] # Convert back to numpy array output_np = np.array(output) / 255.0 return output_np def __del__(self): """ Clean up resources when the object is deleted. """ if self.pipe is not None and self.device == "cuda": try: # Clear CUDA cache torch.cuda.empty_cache() except: pass class StableDiffusionModel: """ Implementation of Stable Diffusion model for image generation and editing. """ def __init__(self, model_id="runwayml/stable-diffusion-v1-5", device=None): """ Initialize the Stable Diffusion model. Args: model_id (str): Hugging Face model ID for the Stable Diffusion model device (str, optional): Device to run the model on ('cuda' or 'cpu') """ self.model_id = model_id # Determine device if device is None: self.device = "cuda" if torch.cuda.is_available() else "cpu" else: self.device = device # Model will be loaded on first use to save memory self.pipe = None def load_model(self): """ Load the Stable Diffusion model. """ if self.pipe is None: from diffusers import StableDiffusionPipeline self.pipe = StableDiffusionPipeline.from_pretrained( self.model_id, safety_checker=None # Disable safety checker for NSFW content as per user request ) # Move to device self.pipe = self.pipe.to(self.device) # Enable memory optimization if on CUDA if self.device == "cuda": self.pipe.enable_attention_slicing() def generate_image(self, prompt, negative_prompt="", width=512, height=512, guidance_scale=7.5, num_inference_steps=30): """ Generate an image using Stable Diffusion. Args: prompt (str): Text prompt describing the desired image negative_prompt (str): Text prompt describing what to avoid width (int): Width of the generated image height (int): Height of the generated image guidance_scale (float): Guidance scale for diffusion model num_inference_steps (int): Number of denoising steps Returns: numpy.ndarray: Generated image """ # Load model if not already loaded self.load_model() # Run the pipeline with torch.no_grad(): output = self.pipe( prompt=prompt, negative_prompt=negative_prompt, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, ).images[0] # Convert to numpy array output_np = np.array(output) / 255.0 return output_np def __del__(self): """ Clean up resources when the object is deleted. """ if self.pipe is not None and self.device == "cuda": try: # Clear CUDA cache torch.cuda.empty_cache() except: pass