ZImageTest / app.py
akiortagem's picture
Update app.py
d7d90ba verified
import os
import torch
import spaces
import gradio as gr
from diffusers import DiffusionPipeline, EulerDiscreteScheduler
# ---------------------------------------------------------------------
# Model setup (maps roughly to UNETLoader + VAELoader + CLIPLoader)
# ---------------------------------------------------------------------
# Change this to your preferred SD3 model or a local path.
# For example, you can replace with a local snapshot inside the Space repo.
MODEL_ID = os.getenv("MODEL_ID", "Tongyi-MAI/Z-Image-Turbo")
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = DiffusionPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
use_safetensors=True,
)
# KSampler → choose a scheduler (Euler is close to your Comfy euler/simple)
pipe.to(device)
pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")
# ---------------------------------------------------------------------
# Inference function (maps to CLIPTextEncode + EmptySD3LatentImage + KSampler + VAEDecode)
# ---------------------------------------------------------------------
@spaces.GPU
@spaces.GPU
def generate_images(
positive: str,
negative: str,
width: int,
height: int,
steps: int,
cfg: float,
seed: int,
num_images: int,
):
run_device = "cuda" if torch.cuda.is_available() else "cpu"
pipe.to(run_device)
num_images = int(num_images)
width = int(width)
height = int(height)
steps = int(steps)
images = []
# seed >= 0 -> deterministic series: seed, seed+1, ...
# seed < 0 -> fully random seeds per image
fixed_base_seed = int(seed) if seed >= 0 else None
for i in range(num_images):
if fixed_base_seed is None:
# random seed for this image
this_seed = torch.randint(0, 2**63 - 1, (1,), device=run_device).item()
else:
# deterministic offset
this_seed = fixed_base_seed + i
generator = torch.Generator(device=run_device).manual_seed(int(this_seed))
out = pipe(
prompt=positive,
negative_prompt=negative or None,
width=width,
height=height,
num_inference_steps=steps,
guidance_scale=float(cfg),
num_images_per_prompt=1,
generator=generator,
).images[0]
images.append(out)
return images
# ---------------------------------------------------------------------
# Gradio UI (inputs correspond to Comfy node widgets_values)
# ---------------------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("# SD3 Text-to-Image – ComfyUI Workflow Port")
with gr.Row():
with gr.Column():
positive = gr.Textbox(
label="Positive Prompt",
value="masterpiece, best quality, extremely detailed, high resolution.", # from CLIP Text Encode (Positive Prompt)
lines=5,
)
negative = gr.Textbox(
label="Negative Prompt",
value="watermark, blurry, ugly, bad anatomy", # from CLIP Text Encode (Negative Prompt)
lines=4,
)
width = gr.Slider(
label="Width",
minimum=256,
maximum=1536,
step=64,
value=512, # EmptySD3LatentImage width
)
height = gr.Slider(
label="Height",
minimum=256,
maximum=1536,
step=64,
value=768, # EmptySD3LatentImage height
)
steps = gr.Slider(
label="Steps (KSampler)",
minimum=1,
maximum=50,
step=1,
value=12, # KSampler steps
)
cfg = gr.Slider(
label="CFG (Guidance Scale)",
minimum=1.0,
maximum=20.0,
step=0.1,
value=1.5, # KSampler cfg in your graph
)
num_images = gr.Slider(
label="Batch Size",
minimum=1,
maximum=8,
step=1,
value=6, # EmptySD3LatentImage batch_size
)
seed = gr.Number(
label="Seed (negative for random)",
value=-1, # "randomize" in Comfy
precision=0,
)
run_btn = gr.Button("Generate")
with gr.Column():
gallery = gr.Gallery(
label="Output Images",
show_label=True,
columns=3,
height=768,
object_fit="contain", # keep full image visible in cell
preview=False, # do not start in zoomed preview mode
allow_preview=True, # still allow zoom when clicked
)
run_btn.click(
fn=generate_images,
inputs=[positive, negative, width, height, steps, cfg, seed, num_images],
outputs=[gallery],
)
if __name__ == "__main__":
demo.launch()