ombhojane commited on
Commit
da88359
·
verified ·
1 Parent(s): ebbed2d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler
6
+ from diffusers.utils import load_image, make_image_grid
7
+ from PIL import Image
8
+
9
+ # Function to get depth map
10
+ def get_depth_map(image, depth_estimator):
11
+ image = depth_estimator(image)["depth"]
12
+ image = np.array(image)
13
+ image = image[:, :, None]
14
+ image = np.concatenate([image, image, image], axis=2)
15
+ detected_map = torch.from_numpy(image).float() / 255.0
16
+ depth_map = detected_map.permute(2, 0, 1)
17
+ return depth_map
18
+
19
+ # Main function to process the image and prompt
20
+ def process_image_and_prompt(input_image, prompt):
21
+ # Convert PIL Image to the format expected by the pipeline
22
+ input_image = input_image.convert("RGB")
23
+
24
+ # Load depth estimator
25
+ depth_estimator = pipeline("depth-estimation")
26
+
27
+ # Get depth map
28
+ depth_map = get_depth_map(input_image, depth_estimator).unsqueeze(0).half().to("cuda")
29
+
30
+ # Load the ControlNet model and the StableDiffusionControlNetImg2ImgPipeline
31
+ controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-normal", torch_dtype=torch.float16, use_safetensors=True)
32
+ pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
33
+ "runwayml/stable-diffusion-v1-5",
34
+ controlnet=controlnet,
35
+ torch_dtype=torch.float16,
36
+ use_safetensors=True
37
+ ).to("cuda")
38
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
39
+ pipe.enable_model_cpu_offload()
40
+
41
+ # Generate the image
42
+ output = pipe(
43
+ prompt,
44
+ image=input_image,
45
+ control_image=depth_map,
46
+ ).images[0]
47
+
48
+ # Convert output to PIL Image for Gradio display
49
+ output_image = Image.fromarray(output)
50
+
51
+ return input_image, output_image
52
+
53
+ # Create the Gradio interface
54
+ iface = gr.Interface(
55
+ fn=process_image_and_prompt,
56
+ inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox(label="Prompt")],
57
+ outputs=[gr.outputs.Image(label="Original Image"), gr.outputs.Image(label="Generated Image")],
58
+ title="Image and Prompt Processing with Stable Diffusion",
59
+ description="Upload an image and enter a prompt to generate a new image."
60
+ )
61
+
62
+ # Launch the Gradio app
63
+ iface.launch()