File size: 4,552 Bytes
4dfe4bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import random
from typing import Optional

import gradio as gr
import spaces
import torch
from diffusers import DiffusionPipeline

MODEL_ID = "Comfy-Org/stable_diffusion_2.1_unclip_repackaged"
DTYPE = torch.float16
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

if DEVICE != "cuda":
    raise EnvironmentError("This Space requires a GPU runtime to run Stable Diffusion 2.1 UNCLIP.")

pipe = DiffusionPipeline.from_pretrained(
    MODEL_ID,
    torch_dtype=DTYPE,
    safety_checker=None,
    use_safetensors=True,
)
if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
    pipe.enable_xformers_memory_efficient_attention()
pipe.to(DEVICE)
pipe.set_progress_bar_config(disable=True)


@spaces.GPU(duration=1500)
def compile_transformer():
    """
    Ahead-of-time compile the transformer for faster inference.
    """
    with spaces.aoti_capture(pipe.transformer) as call:
        pipe(
            prompt="high quality photo of a futuristic city skyline at sunset",
            negative_prompt="low quality, blurry",
            num_inference_steps=4,
            guidance_scale=5.0,
            width=512,
            height=512,
        )
    exported = torch.export.export(
        pipe.transformer,
        args=call.args,
        kwargs=call.kwargs,
    )
    return spaces.aoti_compile(exported)


compiled_transformer = compile_transformer()
spaces.aoti_apply(compiled_transformer, pipe.transformer)


@spaces.GPU(duration=60)
def generate_image(
    prompt: str,
    negative_prompt: str,
    guidance_scale: float,
    num_inference_steps: int,
    width: int,
    height: int,
    seed: int,
) -> torch.Tensor:
    """
    Run Stable Diffusion 2.1 UNCLIP to create an image.

    Args:
        prompt (str): Text prompt describing the desired image.
        negative_prompt (str): Undesired attributes to avoid.
        guidance_scale (float): CFG guidance strength.
        num_inference_steps (int): Number of denoising steps.
        width (int): Output image width.
        height (int): Output image height.
        seed (int): Random seed for reproducibility.

    Returns:
        torch.Tensor: Generated image.
    """
    cleaned_negative = negative_prompt.strip() or None
    generator = torch.Generator(device=DEVICE)
    generator.manual_seed(seed)
    result = pipe(
        prompt=prompt,
        negative_prompt=cleaned_negative,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        generator=generator,
    )
    return result.images[0]


with gr.Blocks(title="Stable Diffusion 2.1 UNCLIP Tester") as demo:
    gr.Markdown(
        """
        # Stable Diffusion 2.1 UNCLIP (Comfy-Org)
        [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)

        Experiment with prompts using the repackaged SD 2.1 UNCLIP model.
        """
    )

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Prompt",
                value="A hyper-detailed matte painting of a floating city above the clouds, cinematic lighting",
                lines=3,
                placeholder="Describe what you want to generate...",
            )
            negative_prompt = gr.Textbox(
                label="Negative Prompt",
                value="low quality, blurry, distorted, watermark",
                lines=3,
                placeholder="Describe what to avoid...",
            )
            with gr.Row():
                guidance_scale = gr.Slider(1.0, 15.0, value=7.5, step=0.1, label="Guidance Scale")
                steps = gr.Slider(10, 60, value=30, step=1, label="Inference Steps")
            with gr.Row():
                width = gr.Slider(512, 1024, value=768, step=64, label="Width")
                height = gr.Slider(512, 1024, value=768, step=64, label="Height")
            seed = gr.Slider(0, 2_147_483_647, value=42, step=1, label="Seed")
            random_seed_btn = gr.Button("Randomize Seed", variant="secondary")
            generate_btn = gr.Button("Generate", variant="primary")

        with gr.Column():
            output_image = gr.Image(label="Generated Image", show_download_button=True)

    random_seed_btn.click(
        fn=lambda: random.randint(0, 2_147_483_647),
        inputs=None,
        outputs=seed,
    )
    generate_btn.click(
        fn=generate_image,
        inputs=[prompt, negative_prompt, guidance_scale, steps, width, height, seed],
        outputs=output_image,
    )

demo.queue()
demo.launch()