import gradio as gr import torch import spaces from diffusers import DiffusionPipeline from PIL import Image from typing import List, Optional, Any # --- Model Configuration --- MODEL_V1 = "CompVis/stable-diffusion-v1-4" MODEL_V2 = "Manojb/stable-diffusion-2-1-base" DEVICE = "cuda" # Use bfloat16 for optimized performance on modern GPUs (H200/A100/H100) DTYPE = torch.bfloat16 # Default prompts for generation when user input is empty DEFAULT_PROMPT_V1 = "A stunning photorealistic image of a golden retriever wearing a crown, in a grand hall, cinematic lighting, masterpiece, 4k" DEFAULT_PROMPT_V2 = "A detailed matte painting of an ancient ruined city overgrown with vines, dramatic sunset, fantasy art, 8k, cinematic" print("Loading Models...") pipe_v1 = DiffusionPipeline.from_pretrained( MODEL_V1, torch_dtype=DTYPE, safety_checker=None, requires_safety_checker=False, # Use from_single_file=True if loading .ckpt or .safetensors files directly ).to(DEVICE) pipe_v2 = DiffusionPipeline.from_pretrained( MODEL_V2, torch_dtype=DTYPE, safety_checker=None, requires_safety_checker=False, ).to(DEVICE) print("Models Loaded.") @spaces.GPU(duration=1500) def compile_optimized_models(): """ Performs Ahead-of-Time (AoT) compilation for improved ZeroGPU performance. """ # --- Compilation for SD 1.4 (pipe_v1) --- print(f"Compiling UNet for {MODEL_V1} (SD 1.4)...") try: with spaces.aoti_capture(pipe_v1.unet) as call: # Run a quick example call (512x512, low steps) to capture inputs pipe_v1( prompt="compilation test", num_inference_steps=2, height=512, width=512 ) exported_v1 = torch.export.export(pipe_v1.unet, args=call.args, kwargs=call.kwargs) compiled_v1 = spaces.aoti_compile(exported_v1) spaces.aoti_apply(compiled_v1, pipe_v1.unet) print(f"Compilation for {MODEL_V1} complete.") except Exception as e: print(f"Warning: AoT compilation failed for SD 1.4. Running unoptimized. Error: {e}") # --- Compilation for SD 2.1 Base (pipe_v2) --- print(f"Compiling UNet for {MODEL_V2} (SD 2.1 Base)...") try: with spaces.aoti_capture(pipe_v2.unet) as call: # Run a quick example call (512x512, low steps) to capture inputs pipe_v2( prompt="compilation test", num_inference_steps=2, height=512, width=512 ) exported_v2 = torch.export.export(pipe_v2.unet, args=call.args, kwargs=call.kwargs) compiled_v2 = spaces.aoti_compile(exported_v2) spaces.aoti_apply(compiled_v2, pipe_v2.unet) print(f"Compilation for {MODEL_V2} complete.") except Exception as e: print(f"Warning: AoT compilation failed for SD 2.1 Base. Running unoptimized. Error: {e}") # Run compilation once at startup compile_optimized_models() @spaces.GPU def generate( model_choice: str, prompt: str, guidance_scale: float, num_inference_steps: int ) -> List[Image.Image]: """Generates images using the selected Stable Diffusion model.""" if model_choice == MODEL_V1: pipe = pipe_v1 if not prompt: prompt = DEFAULT_PROMPT_V1 elif model_choice == MODEL_V2: pipe = pipe_v2 if not prompt: prompt = DEFAULT_PROMPT_V2 else: raise gr.Error("Invalid model selection.") # We must use the resolution used during AoT compilation (512x512) # for best performance. result = pipe( prompt=prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, num_images_per_prompt=4, # Generate 4 images as implied by gallery output height=512, width=512 ).images return result def display_uploads(files: Optional[List[Any]]) -> List[str]: """Converts uploaded FileData objects to displayable paths.""" if files: # FileData objects have a .path attribute pointing to the temporary file location return [f.path for f in files] return [] # --- Gradio Interface --- with gr.Blocks(title="Stable Diffusion Models Demo") as demo: gr.HTML( """
Select a model and enter a prompt to generate up to 4 images. Empty prompts use a powerful default prompt.