import os import random from typing import Optional import gradio as gr import spaces import torch from diffusers import DiffusionPipeline MODEL_ID = "Comfy-Org/stable_diffusion_2.1_unclip_repackaged" DTYPE = torch.float16 DEVICE = "cuda" if torch.cuda.is_available() else "cpu" if DEVICE != "cuda": raise EnvironmentError("This Space requires a GPU runtime to run Stable Diffusion 2.1 UNCLIP.") pipe = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=DTYPE, safety_checker=None, use_safetensors=True, ) if hasattr(pipe, "enable_xformers_memory_efficient_attention"): pipe.enable_xformers_memory_efficient_attention() pipe.to(DEVICE) pipe.set_progress_bar_config(disable=True) @spaces.GPU(duration=1500) def compile_transformer(): """ Ahead-of-time compile the transformer for faster inference. """ with spaces.aoti_capture(pipe.transformer) as call: pipe( prompt="high quality photo of a futuristic city skyline at sunset", negative_prompt="low quality, blurry", num_inference_steps=4, guidance_scale=5.0, width=512, height=512, ) exported = torch.export.export( pipe.transformer, args=call.args, kwargs=call.kwargs, ) return spaces.aoti_compile(exported) compiled_transformer = compile_transformer() spaces.aoti_apply(compiled_transformer, pipe.transformer) @spaces.GPU(duration=60) def generate_image( prompt: str, negative_prompt: str, guidance_scale: float, num_inference_steps: int, width: int, height: int, seed: int, ) -> torch.Tensor: """ Run Stable Diffusion 2.1 UNCLIP to create an image. Args: prompt (str): Text prompt describing the desired image. negative_prompt (str): Undesired attributes to avoid. guidance_scale (float): CFG guidance strength. num_inference_steps (int): Number of denoising steps. width (int): Output image width. height (int): Output image height. seed (int): Random seed for reproducibility. Returns: torch.Tensor: Generated image. """ cleaned_negative = negative_prompt.strip() or None generator = torch.Generator(device=DEVICE) generator.manual_seed(seed) result = pipe( prompt=prompt, negative_prompt=cleaned_negative, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=width, height=height, generator=generator, ) return result.images[0] with gr.Blocks(title="Stable Diffusion 2.1 UNCLIP Tester") as demo: gr.Markdown( """ # Stable Diffusion 2.1 UNCLIP (Comfy-Org) [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder) Experiment with prompts using the repackaged SD 2.1 UNCLIP model. """ ) with gr.Row(): with gr.Column(): prompt = gr.Textbox( label="Prompt", value="A hyper-detailed matte painting of a floating city above the clouds, cinematic lighting", lines=3, placeholder="Describe what you want to generate...", ) negative_prompt = gr.Textbox( label="Negative Prompt", value="low quality, blurry, distorted, watermark", lines=3, placeholder="Describe what to avoid...", ) with gr.Row(): guidance_scale = gr.Slider(1.0, 15.0, value=7.5, step=0.1, label="Guidance Scale") steps = gr.Slider(10, 60, value=30, step=1, label="Inference Steps") with gr.Row(): width = gr.Slider(512, 1024, value=768, step=64, label="Width") height = gr.Slider(512, 1024, value=768, step=64, label="Height") seed = gr.Slider(0, 2_147_483_647, value=42, step=1, label="Seed") random_seed_btn = gr.Button("Randomize Seed", variant="secondary") generate_btn = gr.Button("Generate", variant="primary") with gr.Column(): output_image = gr.Image(label="Generated Image", show_download_button=True) random_seed_btn.click( fn=lambda: random.randint(0, 2_147_483_647), inputs=None, outputs=seed, ) generate_btn.click( fn=generate_image, inputs=[prompt, negative_prompt, guidance_scale, steps, width, height, seed], outputs=output_image, ) demo.queue() demo.launch()