File size: 2,285 Bytes
da88359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e079a3
da88359
 
 
 
 
 
 
 
6e079a3
da88359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75fd45d
728dd1f
da88359
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import torch
import numpy as np
import gradio as gr
from transformers import pipeline
from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler
from diffusers.utils import load_image, make_image_grid
from PIL import Image

# Function to get depth map
def get_depth_map(image, depth_estimator):
    image = depth_estimator(image)["depth"]
    image = np.array(image)
    image = image[:, :, None]
    image = np.concatenate([image, image, image], axis=2)
    detected_map = torch.from_numpy(image).float() / 255.0
    depth_map = detected_map.permute(2, 0, 1)
    return depth_map

# Main function to process the image and prompt
def process_image_and_prompt(input_image, prompt):
    # Convert PIL Image to the format expected by the pipeline
    input_image = input_image.convert("RGB")
    
    # Load depth estimator
    depth_estimator = pipeline("depth-estimation")

    # Get depth map
    depth_map = get_depth_map(input_image, depth_estimator).unsqueeze(0).half().to("cpu")
    
    # Load the ControlNet model and the StableDiffusionControlNetImg2ImgPipeline
    controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-normal", torch_dtype=torch.float16, use_safetensors=True)
    pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        controlnet=controlnet,
        torch_dtype=torch.float16,
        use_safetensors=True
    ).to("cpu")
    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_model_cpu_offload()
    
    # Generate the image
    output = pipe(
        prompt,
        image=input_image,
        control_image=depth_map,
    ).images[0]
    
    # Convert output to PIL Image for Gradio display
    output_image = Image.fromarray(output)
    
    return input_image, output_image

# Create the Gradio interface
iface = gr.Interface(
    fn=process_image_and_prompt,
    inputs=[gr.Image(type="pil"), gr.Textbox(label="Prompt")],
    outputs=[gr.Image(label="Original Image"), gr.Image(label="Generated Image")],
    title="Image and Prompt Processing with Stable Diffusion",
    description="Upload an image and enter a prompt to generate a new image."
)

# Launch the Gradio app
iface.launch()