# app.py # Text-to-Image Space using Diffusers + Gradio # Works on CPU (slow) and GPU (recommended). Choose a model in the UI. import os import math import torch import gradio as gr from typing import List, Optional from PIL import Image from diffusers import ( DiffusionPipeline, StableDiffusionPipeline, AutoPipelineForText2Image, ) # --------- Config --------- MODEL_CHOICES = { # Solid baseline, license-free to use after accepting on HF if required. "Stable Diffusion 1.5 (runwayml/stable-diffusion-v1-5)": "runwayml/stable-diffusion-v1-5", # Very fast for prototyping; outputs can be less detailed. Best with GPU. "SDXL Turbo (stabilityai/sdxl-turbo)": "stabilityai/sdxl-turbo", } DEFAULT_MODEL_LABEL = "Stable Diffusion 1.5 (runwayml/stable-diffusion-v1-5)" # Disable safety checker by default (your responsibility). Toggle in UI. DISABLE_SAFETY_DEFAULT = True # --------- Runtime helpers --------- def get_device() -> str: if torch.cuda.is_available(): return "cuda" # Spaces don't use Apple MPS; leaving for completeness if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available(): return "mps" return "cpu" def nearest_multiple_of_8(x: int) -> int: if x < 64: return 64 return int(round(x / 8) * 8) # Cache pipelines per model to avoid reloading on each call _PIPE_CACHE = {} def load_pipe(model_id: str, device: str, fp16: bool) -> DiffusionPipeline: key = (model_id, device, fp16) if key in _PIPE_CACHE: return _PIPE_CACHE[key] dtype = torch.float16 if (fp16 and device == "cuda") else torch.float32 # AutoPipeline works for many models; we fall back to SD pipeline for v1-5 try: pipe = AutoPipelineForTextToImage.from_pretrained( model_id, torch_dtype=dtype, use_safetensors=True, trust_remote_code=False, ) except Exception: # Legacy fallback for SD 1.5 pipe = StableDiffusionPipeline.from_pretrained( model_id, torch_dtype=dtype, use_safetensors=True, ) # Send to device pipe = pipe.to(device) # Try memory-efficient attention if available if device == "cuda": try: pipe.enable_xformers_memory_efficient_attention() except Exception: pass _PIPE_CACHE[key] = pipe return pipe # --------- Inference --------- def generate( prompt: str, negative: str, model_label: str, steps: int, guidance: float, width: int, height: int, seed: Optional[int], batch_size: int, disable_safety: bool, ) -> List[Image.Image]: prompt = (prompt or "").strip() if not prompt: raise gr.Error("Enter a non-empty prompt.") model_id = MODEL_CHOICES[model_label] device = get_device() # SDXL Turbo ignores CFG and uses very low steps; keep sensible defaults is_turbo = "sdxl-turbo" in model_id.lower() if is_turbo: steps = max(1, min(steps, 6)) # turbo is usually 1–6 steps guidance = 0.0 # turbo uses guidance-free sampling; CFG does nothing width = nearest_multiple_of_8(width) height = nearest_multiple_of_8(height) batch_size = max(1, min(batch_size, 8)) pipe = load_pipe(model_id, device, fp16=(device == "cuda")) # Safety checker if hasattr(pipe, "safety_checker"): pipe.safety_checker = None if disable_safety else pipe.safety_checker # Determinism generator = None if seed is not None and seed != "": try: seed = int(seed) except ValueError: seed = None if seed is not None: if device == "cuda": generator = torch.Generator(device="cuda").manual_seed(seed) elif device == "mps": generator = torch.Generator(device="cpu").manual_seed(seed) else: generator = torch.Generator(device="cpu").manual_seed(seed) prompts = [prompt] * batch_size negative_prompts = [negative] * batch_size if negative else None # Run with torch.autocast("cuda", enabled=(device == "cuda")): out = pipe( prompt=prompts, negative_prompt=negative_prompts, num_inference_steps=int(steps), guidance_scale=float(guidance), width=int(width), height=int(height), generator=generator, ) images = out.images return images # --------- UI --------- with gr.Blocks(css="footer {visibility: hidden}") as demo: gr.Markdown( """ # Text-to-Image (Diffusers) - **Models:** SD 1.5 and SDXL Turbo - **Tip:** SD 1.5 = better detail on CPU; Turbo = very fast on GPU, fewer steps. """ ) with gr.Row(): model_dd = gr.Dropdown( label="Model", choices=list(MODEL_CHOICES.keys()), value=DEFAULT_MODEL_LABEL, ) steps = gr.Slider(1, 75, value=30, step=1, label="Steps") guidance = gr.Slider(0.0, 15.0, value=7.5, step=0.1, label="Guidance (CFG)") with gr.Row(): width = gr.Slider(256, 1024, value=768, step=8, label="Width (multiple of 8)") height = gr.Slider(256, 1024, value=768, step=8, label="Height (multiple of 8)") batch_size = gr.Slider(1, 4, value=1, step=1, label="Batch size") prompt = gr.Textbox(label="Prompt", lines=2, placeholder="a cozy cabin at twilight beside a lake, cinematic lighting") negative = gr.Textbox(label="Negative Prompt", lines=1, placeholder="blurry, low quality, distorted") with gr.Row(): seed = gr.Textbox(label="Seed (optional integer)", value="") disable_safety = gr.Checkbox(label="Disable safety checker (you are responsible)", value=DISABLE_SAFETY_DEFAULT) run_btn = gr.Button("Generate", variant="primary") gallery = gr.Gallery(label="Results", columns=2, height=512, preview=True) def _on_change_model(label): # If Turbo selected, nudge UI to sane defaults if "Turbo" in label: return gr.update(value=4), gr.update(value=0.0) else: return gr.update(value=30), gr.update(value=7.5) model_dd.change(_on_change_model, inputs=model_dd, outputs=[steps, guidance]) run_btn.click( fn=generate, inputs=[prompt, negative, model_dd, steps, guidance, width, height, seed, batch_size, disable_safety], outputs=[gallery], api_name="generate", scroll_to_output=True, concurrency_limit=2, ) if __name__ == "__main__": # In Spaces, just running the file starts the app. Debug on for clearer stack traces. demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), debug=True)