PortraitPerfectAI / models /ledits_model.py
Your Name
Implement initial project structure and setup
68e4b96
import torch
import numpy as np
from diffusers import StableDiffusionInpaintPipeline, DDIMScheduler
from PIL import Image
class LEDITSModel:
"""
Implementation of LEDITS++ model for localized image editing using Stable Diffusion.
"""
def __init__(self, model_id="runwayml/stable-diffusion-inpainting", device=None):
"""
Initialize the LEDITS++ model.
Args:
model_id (str): Hugging Face model ID for the Stable Diffusion inpainting model
device (str, optional): Device to run the model on ('cuda' or 'cpu')
"""
self.model_id = model_id
# Determine device
if device is None:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
else:
self.device = device
# Model will be loaded on first use to save memory
self.pipe = None
def load_model(self):
"""
Load the Stable Diffusion inpainting model.
"""
if self.pipe is None:
# Load the pipeline with DDIM scheduler for better quality
scheduler = DDIMScheduler.from_pretrained(
self.model_id,
subfolder="scheduler"
)
self.pipe = StableDiffusionInpaintPipeline.from_pretrained(
self.model_id,
scheduler=scheduler,
safety_checker=None # Disable safety checker for NSFW content as per user request
)
# Move to device
self.pipe = self.pipe.to(self.device)
# Enable memory optimization if on CUDA
if self.device == "cuda":
self.pipe.enable_attention_slicing()
def edit_image(self, image, mask, prompt, intensity=0.5, guidance_scale=7.5, num_inference_steps=30):
"""
Edit an image using the LEDITS++ approach.
Args:
image (numpy.ndarray): Input image (normalized to [0, 1])
mask (numpy.ndarray): Mask indicating the region to edit (values in [0, 1])
prompt (str): Text prompt describing the desired edit
intensity (float): Strength of the edit (0.0 to 1.0)
guidance_scale (float): Guidance scale for diffusion model
num_inference_steps (int): Number of denoising steps
Returns:
numpy.ndarray: Edited image
"""
# Load model if not already loaded
self.load_model()
# Convert numpy arrays to PIL Images
if isinstance(image, np.ndarray):
# Convert to uint8 if the image is float
if image.dtype == np.float32 or image.dtype == np.float64:
image_pil = Image.fromarray((image * 255).astype(np.uint8))
else:
image_pil = Image.fromarray(image)
else:
image_pil = image
if isinstance(mask, np.ndarray):
# Convert to uint8 if the mask is float
if mask.dtype == np.float32 or mask.dtype == np.float64:
mask_pil = Image.fromarray((mask * 255).astype(np.uint8))
else:
mask_pil = Image.fromarray(mask)
# Ensure mask is grayscale
if mask_pil.mode != 'L':
mask_pil = mask_pil.convert('L')
else:
mask_pil = mask
# Resize images to multiples of 8 (required by Stable Diffusion)
width, height = image_pil.size
new_width = width - (width % 8)
new_height = height - (height % 8)
if (new_width, new_height) != image_pil.size:
image_pil = image_pil.resize((new_width, new_height), Image.LANCZOS)
mask_pil = mask_pil.resize((new_width, new_height), Image.LANCZOS)
# Run the inpainting pipeline
with torch.no_grad():
output = self.pipe(
prompt=prompt,
image=image_pil,
mask_image=mask_pil,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
strength=intensity,
).images[0]
# Convert back to numpy array
output_np = np.array(output) / 255.0
return output_np
def __del__(self):
"""
Clean up resources when the object is deleted.
"""
if self.pipe is not None and self.device == "cuda":
try:
# Clear CUDA cache
torch.cuda.empty_cache()
except:
pass
class StableDiffusionModel:
"""
Implementation of Stable Diffusion model for image generation and editing.
"""
def __init__(self, model_id="runwayml/stable-diffusion-v1-5", device=None):
"""
Initialize the Stable Diffusion model.
Args:
model_id (str): Hugging Face model ID for the Stable Diffusion model
device (str, optional): Device to run the model on ('cuda' or 'cpu')
"""
self.model_id = model_id
# Determine device
if device is None:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
else:
self.device = device
# Model will be loaded on first use to save memory
self.pipe = None
def load_model(self):
"""
Load the Stable Diffusion model.
"""
if self.pipe is None:
from diffusers import StableDiffusionPipeline
self.pipe = StableDiffusionPipeline.from_pretrained(
self.model_id,
safety_checker=None # Disable safety checker for NSFW content as per user request
)
# Move to device
self.pipe = self.pipe.to(self.device)
# Enable memory optimization if on CUDA
if self.device == "cuda":
self.pipe.enable_attention_slicing()
def generate_image(self, prompt, negative_prompt="", width=512, height=512, guidance_scale=7.5, num_inference_steps=30):
"""
Generate an image using Stable Diffusion.
Args:
prompt (str): Text prompt describing the desired image
negative_prompt (str): Text prompt describing what to avoid
width (int): Width of the generated image
height (int): Height of the generated image
guidance_scale (float): Guidance scale for diffusion model
num_inference_steps (int): Number of denoising steps
Returns:
numpy.ndarray: Generated image
"""
# Load model if not already loaded
self.load_model()
# Run the pipeline
with torch.no_grad():
output = self.pipe(
prompt=prompt,
negative_prompt=negative_prompt,
width=width,
height=height,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
).images[0]
# Convert to numpy array
output_np = np.array(output) / 255.0
return output_np
def __del__(self):
"""
Clean up resources when the object is deleted.
"""
if self.pipe is not None and self.device == "cuda":
try:
# Clear CUDA cache
torch.cuda.empty_cache()
except:
pass