import os import torch import spaces import gradio as gr from diffusers import DiffusionPipeline, EulerDiscreteScheduler # --------------------------------------------------------------------- # Model setup (maps roughly to UNETLoader + VAELoader + CLIPLoader) # --------------------------------------------------------------------- # Change this to your preferred SD3 model or a local path. # For example, you can replace with a local snapshot inside the Space repo. MODEL_ID = os.getenv("MODEL_ID", "Tongyi-MAI/Z-Image-Turbo") device = "cuda" if torch.cuda.is_available() else "cpu" pipe = DiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, use_safetensors=True, ) # KSampler → choose a scheduler (Euler is close to your Comfy euler/simple) pipe.to(device) pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"] spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3") # --------------------------------------------------------------------- # Inference function (maps to CLIPTextEncode + EmptySD3LatentImage + KSampler + VAEDecode) # --------------------------------------------------------------------- @spaces.GPU @spaces.GPU def generate_images( positive: str, negative: str, width: int, height: int, steps: int, cfg: float, seed: int, num_images: int, ): run_device = "cuda" if torch.cuda.is_available() else "cpu" pipe.to(run_device) num_images = int(num_images) width = int(width) height = int(height) steps = int(steps) images = [] # seed >= 0 -> deterministic series: seed, seed+1, ... # seed < 0 -> fully random seeds per image fixed_base_seed = int(seed) if seed >= 0 else None for i in range(num_images): if fixed_base_seed is None: # random seed for this image this_seed = torch.randint(0, 2**63 - 1, (1,), device=run_device).item() else: # deterministic offset this_seed = fixed_base_seed + i generator = torch.Generator(device=run_device).manual_seed(int(this_seed)) out = pipe( prompt=positive, negative_prompt=negative or None, width=width, height=height, num_inference_steps=steps, guidance_scale=float(cfg), num_images_per_prompt=1, generator=generator, ).images[0] images.append(out) return images # --------------------------------------------------------------------- # Gradio UI (inputs correspond to Comfy node widgets_values) # --------------------------------------------------------------------- with gr.Blocks() as demo: gr.Markdown("# SD3 Text-to-Image – ComfyUI Workflow Port") with gr.Row(): with gr.Column(): positive = gr.Textbox( label="Positive Prompt", value="masterpiece, best quality, extremely detailed, high resolution.", # from CLIP Text Encode (Positive Prompt) lines=5, ) negative = gr.Textbox( label="Negative Prompt", value="watermark, blurry, ugly, bad anatomy", # from CLIP Text Encode (Negative Prompt) lines=4, ) width = gr.Slider( label="Width", minimum=256, maximum=1536, step=64, value=512, # EmptySD3LatentImage width ) height = gr.Slider( label="Height", minimum=256, maximum=1536, step=64, value=768, # EmptySD3LatentImage height ) steps = gr.Slider( label="Steps (KSampler)", minimum=1, maximum=50, step=1, value=12, # KSampler steps ) cfg = gr.Slider( label="CFG (Guidance Scale)", minimum=1.0, maximum=20.0, step=0.1, value=1.5, # KSampler cfg in your graph ) num_images = gr.Slider( label="Batch Size", minimum=1, maximum=8, step=1, value=6, # EmptySD3LatentImage batch_size ) seed = gr.Number( label="Seed (negative for random)", value=-1, # "randomize" in Comfy precision=0, ) run_btn = gr.Button("Generate") with gr.Column(): gallery = gr.Gallery( label="Output Images", show_label=True, columns=3, height=768, object_fit="contain", # keep full image visible in cell preview=False, # do not start in zoomed preview mode allow_preview=True, # still allow zoom when clicked ) run_btn.click( fn=generate_images, inputs=[positive, negative, width, height, steps, cfg, seed, num_images], outputs=[gallery], ) if __name__ == "__main__": demo.launch()