File size: 4,125 Bytes
61e8157
defabb3
61e8157
1b829a0
 
eb48411
 
 
1b829a0
61e8157
1b829a0
6e5055d
38f60df
6e5055d
61e8157
38f60df
bfc70f6
03d11bb
 
 
 
bfc70f6
456a8a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7751ed
456a8a0
11cf435
caed254
64b679f
 
 
 
 
 
 
 
 
03d11bb
0d402b9
28405c9
58d6d77
28405c9
737c688
1e19bab
38f60df
61e8157
 
38f60df
bfc70f6
361d5a5
ea6de43
 
61e8157
 
737c688
03d11bb
bfc70f6
2894805
61e8157
38f60df
64b679f
38f60df
ea6de43
 
 
 
 
 
 
61e8157
 
ab6e3a1
bec9471
61e8157
bfc70f6
 
055c2c9
bfc70f6
 
 
 
38f60df
03d11bb
bfc70f6
 
 
0d402b9
bfc70f6
 
ea6de43
bfc70f6
5672e0b
bfc70f6
 
 
 
 
 
2d7cd24
737c688
2d7cd24
 
2ff3a11
61e8157
1e44444
2ff3a11
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import gradio as gr
import spaces
import os
import sys
import subprocess
import numpy as np
from PIL import Image
import cv2

import torch

from diffusers import StableDiffusion3ControlNetPipeline
from diffusers.models import SD3ControlNetModel
from diffusers.utils import load_image

# Load pipeline
controlnet_canny = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Canny")
pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-3-medium-diffusers",
    controlnet=controlnet_canny
).to("cuda", torch.float16)

def resize_image(input_path, output_path, target_height):
    # Open the input image
    img = Image.open(input_path)

    # Calculate the aspect ratio of the original image
    original_width, original_height = img.size
    original_aspect_ratio = original_width / original_height

    # Calculate the new width while maintaining the aspect ratio and the target height
    new_width = int(target_height * original_aspect_ratio)

    # Resize the image while maintaining the aspect ratio and fixing the height
    img = img.resize((new_width, target_height), Image.LANCZOS)

    # Save the resized image
    img.save(output_path)

    return output_path, new_width, target_height


@spaces.GPU(duration=90)
def infer(
    image_in, 
    prompt, 
    negative_prompt="",
    inference_steps=25,
    guidance_scale=7.0,
    control_weight=0.7,
    progress=gr.Progress(track_tqdm=True)
):
    # Canny preprocessing
    control_image = load_image(image_in)
    control_image = control_image.convert('L')
    control_image = np.array(control_image)
    control_image = np.stack([control_image] * 3, axis=-1)
    control_image = Image.fromarray(control_image)
 
    # Infer
    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        control_image=control_image, 
        controlnet_conditioning_scale=control_weight,
        num_inference_steps=inference_steps,
        guidance_scale=guidance_scale,
    ).images[0]

    _, w, h = resize_image(image_in, "resized_input.jpg", 1024)
    image = image.resize((w, h), Image.LANCZOS)
    
    return image, gr.update(value=control_image , visible=True)



css = """
#col-container{
    margin: 0 auto;
    max-width: 1080px;
}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("""
        # SD3 ControlNet
        Experiment with Stable Diffusion 3 ControlNet models proposed and maintained by the InstantX team.<br />
        Model card: [InstantX/SD3-Controlnet-Canny](https://huggingface.co/InstantX/SD3-Controlnet-Canny)
        """)
        
        with gr.Column():
            
            with gr.Row():
                with gr.Column():
                    image_in = gr.Image(label="Image reference", sources=["upload"], type="filepath")
                    prompt = gr.Textbox(label="Prompt")
                    negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="Enter negative prompts here")
                    
                    with gr.Accordion("Advanced settings", open=False):
                        with gr.Column():
                            with gr.Row():
                                inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=100, step=1, value=50)
                                guidance_scale = gr.Slider(label="Guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=7.0)
                            control_weight = gr.Slider(label="Control Weight", minimum=0.0, maximum=1.0, step=0.01, value=0.7)
                    
                    submit_canny_btn = gr.Button("Submit")
                    
                with gr.Column():
                    result = gr.Image(label="Result")
                    canny_used = gr.Image(label="Preprocessed Canny", visible=False)


    submit_canny_btn.click(
        fn=infer,
        inputs=[image_in, prompt, negative_prompt, inference_steps, guidance_scale, control_weight],
        outputs=[result, canny_used],
        api_name="predict",
        show_api=True
    )

demo.queue().launch(show_api=True)