Spaces:

c-broderick-225
/

PortraitPerfectAI

Sleeping

File size: 7,667 Bytes

68e4b96

import torch
import numpy as np
from diffusers import StableDiffusionInpaintPipeline, DDIMScheduler
from PIL import Image

class LEDITSModel:
    """
    Implementation of LEDITS++ model for localized image editing using Stable Diffusion.
    """
    
    def __init__(self, model_id="runwayml/stable-diffusion-inpainting", device=None):
        """
        Initialize the LEDITS++ model.
        
        Args:
            model_id (str): Hugging Face model ID for the Stable Diffusion inpainting model
            device (str, optional): Device to run the model on ('cuda' or 'cpu')
        """
        self.model_id = model_id
        
        # Determine device
        if device is None:
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
        else:
            self.device = device
            
        # Model will be loaded on first use to save memory
        self.pipe = None
        
    def load_model(self):
        """
        Load the Stable Diffusion inpainting model.
        """
        if self.pipe is None:
            # Load the pipeline with DDIM scheduler for better quality
            scheduler = DDIMScheduler.from_pretrained(
                self.model_id,
                subfolder="scheduler"
            )
            
            self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
                self.model_id,
                scheduler=scheduler,
                safety_checker=None  # Disable safety checker for NSFW content as per user request
            )
            
            # Move to device
            self.pipe = self.pipe.to(self.device)
            
            # Enable memory optimization if on CUDA
            if self.device == "cuda":
                self.pipe.enable_attention_slicing()
    
    def edit_image(self, image, mask, prompt, intensity=0.5, guidance_scale=7.5, num_inference_steps=30):
        """
        Edit an image using the LEDITS++ approach.
        
        Args:
            image (numpy.ndarray): Input image (normalized to [0, 1])
            mask (numpy.ndarray): Mask indicating the region to edit (values in [0, 1])
            prompt (str): Text prompt describing the desired edit
            intensity (float): Strength of the edit (0.0 to 1.0)
            guidance_scale (float): Guidance scale for diffusion model
            num_inference_steps (int): Number of denoising steps
            
        Returns:
            numpy.ndarray: Edited image
        """
        # Load model if not already loaded
        self.load_model()
        
        # Convert numpy arrays to PIL Images
        if isinstance(image, np.ndarray):
            # Convert to uint8 if the image is float
            if image.dtype == np.float32 or image.dtype == np.float64:
                image_pil = Image.fromarray((image * 255).astype(np.uint8))
            else:
                image_pil = Image.fromarray(image)
        else:
            image_pil = image
            
        if isinstance(mask, np.ndarray):
            # Convert to uint8 if the mask is float
            if mask.dtype == np.float32 or mask.dtype == np.float64:
                mask_pil = Image.fromarray((mask * 255).astype(np.uint8))
            else:
                mask_pil = Image.fromarray(mask)
                
            # Ensure mask is grayscale
            if mask_pil.mode != 'L':
                mask_pil = mask_pil.convert('L')
        else:
            mask_pil = mask
            
        # Resize images to multiples of 8 (required by Stable Diffusion)
        width, height = image_pil.size
        new_width = width - (width % 8)
        new_height = height - (height % 8)
        
        if (new_width, new_height) != image_pil.size:
            image_pil = image_pil.resize((new_width, new_height), Image.LANCZOS)
            mask_pil = mask_pil.resize((new_width, new_height), Image.LANCZOS)
        
        # Run the inpainting pipeline
        with torch.no_grad():
            output = self.pipe(
                prompt=prompt,
                image=image_pil,
                mask_image=mask_pil,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                strength=intensity,
            ).images[0]
        
        # Convert back to numpy array
        output_np = np.array(output) / 255.0
        
        return output_np
    
    def __del__(self):
        """
        Clean up resources when the object is deleted.
        """
        if self.pipe is not None and self.device == "cuda":
            try:
                # Clear CUDA cache
                torch.cuda.empty_cache()
            except:
                pass


class StableDiffusionModel:
    """
    Implementation of Stable Diffusion model for image generation and editing.
    """
    
    def __init__(self, model_id="runwayml/stable-diffusion-v1-5", device=None):
        """
        Initialize the Stable Diffusion model.
        
        Args:
            model_id (str): Hugging Face model ID for the Stable Diffusion model
            device (str, optional): Device to run the model on ('cuda' or 'cpu')
        """
        self.model_id = model_id
        
        # Determine device
        if device is None:
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
        else:
            self.device = device
            
        # Model will be loaded on first use to save memory
        self.pipe = None
        
    def load_model(self):
        """
        Load the Stable Diffusion model.
        """
        if self.pipe is None:
            from diffusers import StableDiffusionPipeline
            
            self.pipe = StableDiffusionPipeline.from_pretrained(
                self.model_id,
                safety_checker=None  # Disable safety checker for NSFW content as per user request
            )
            
            # Move to device
            self.pipe = self.pipe.to(self.device)
            
            # Enable memory optimization if on CUDA
            if self.device == "cuda":
                self.pipe.enable_attention_slicing()
    
    def generate_image(self, prompt, negative_prompt="", width=512, height=512, guidance_scale=7.5, num_inference_steps=30):
        """
        Generate an image using Stable Diffusion.
        
        Args:
            prompt (str): Text prompt describing the desired image
            negative_prompt (str): Text prompt describing what to avoid
            width (int): Width of the generated image
            height (int): Height of the generated image
            guidance_scale (float): Guidance scale for diffusion model
            num_inference_steps (int): Number of denoising steps
            
        Returns:
            numpy.ndarray: Generated image
        """
        # Load model if not already loaded
        self.load_model()
        
        # Run the pipeline
        with torch.no_grad():
            output = self.pipe(
                prompt=prompt,
                negative_prompt=negative_prompt,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
            ).images[0]
        
        # Convert to numpy array
        output_np = np.array(output) / 255.0
        
        return output_np
    
    def __del__(self):
        """
        Clean up resources when the object is deleted.
        """
        if self.pipe is not None and self.device == "cuda":
            try:
                # Clear CUDA cache
                torch.cuda.empty_cache()
            except:
                pass