File size: 4,798 Bytes
55fe803
62594be
55fe803
 
9d9e3d4
 
bd4c365
9d9e3d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55fe803
 
9d9e3d4
55fe803
78bec1d
e24aa73
55fe803
6c5fa74
3e7b179
 
9d9e3d4
 
 
 
 
 
fa46cad
9d9e3d4
fa46cad
 
9d9e3d4
fa46cad
9d9e3d4
fa46cad
9d9e3d4
fa46cad
9d9e3d4
fa46cad
 
 
 
9d9e3d4
 
 
fa46cad
9d9e3d4
 
 
62594be
58264c9
9d9e3d4
 
 
 
 
 
 
 
 
55fe803
 
9d9e3d4
 
 
58264c9
9d9e3d4
58264c9
9d9e3d4
 
 
 
55fe803
9d9e3d4
 
3ac4904
55fe803
 
 
 
3ac4904
55fe803
5812881
9d9e3d4
 
3ac4904
55fe803
9d9e3d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55fe803
9d9e3d4
 
 
 
 
55fe803
 
9d9e3d4
 
 
55fe803
 
9d9e3d4
58264c9
55fe803
9d9e3d4
 
 
 
 
 
 
 
 
 
55fe803
9d9e3d4
 
 
 
 
55fe803
9d9e3d4
 
 
 
 
 
55fe803
c96ee5c
9d9e3d4
 
556b962
 
58264c9
9d9e3d4
 
58264c9
55fe803
9d9e3d4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import torch
import spaces
import gradio as gr
from diffusers import DiffusionPipeline
import diffusers
import io
import sys
import logging

# ------------------------
# GLOBAL LOG BUFFER
# ------------------------
log_buffer = io.StringIO()

def log(msg):
    print(msg)
    log_buffer.write(msg + "\n")

# Enable diffusers debug logs
diffusers.utils.logging.set_verbosity_info()

log("Loading Z-Image-Turbo pipeline...")

pipe = DiffusionPipeline.from_pretrained(
    "Tongyi-MAI/Z-Image-Turbo",
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=False,
    attn_implementation="kernels-community/vllm-flash-attn3",
)

pipe.to("cuda")


#pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"] #spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")

# ------------------------
# ATTENTION + PIPE INFO
# ------------------------
def pipeline_debug_info(pipe):
    info = []

    # Transformer config
    try:
        info.append(f"Transformer attention backend: {pipe.transformer.config.attn_implementation}")
    except:
        info.append("No transformer.attn_implementation")

    # Processor class
    try:
        proc = pipe.transformer.blocks[0].attn.processor
        info.append(f"Processor type: {type(proc)}")
    except Exception as e:
        info.append(f"Processor error: {e}")

    return "\n".join(info)


# ------------------------
# IMAGE GENERATOR
# ------------------------
@spaces.GPU
def generate_image(prompt, height, width, num_inference_steps, seed, randomize_seed, num_images):
    log_buffer.truncate(0)
    log_buffer.seek(0)

    log("=== NEW GENERATION REQUEST ===")
    log(f"Prompt: {prompt}")
    log(f"Height: {height}, Width: {width}")
    log(f"Inference Steps: {num_inference_steps}")
    log(f"Num Images: {num_images}")

    if randomize_seed:
        seed = torch.randint(0, 2**32 - 1, (1,)).item()
        log(f"Randomized Seed → {seed}")
    else:
        log(f"Seed: {seed}")

    # Clamp images
    num_images = min(max(1, int(num_images)), 3)

    # Debug pipe info
    log(pipeline_debug_info())

    generator = torch.Generator("cuda").manual_seed(int(seed))

    log("Running pipeline forward()...")
    result = pipe(
        prompt=prompt,
        height=int(height),
        width=int(width),
        num_inference_steps=int(num_inference_steps),
        guidance_scale=0.0,
        generator=generator,
        max_sequence_length=1024,
        num_images_per_prompt=num_images,
        output_type="pil",
    )

    # Tensor diagnostics (shapes only)
    try:
        latent_shape = pipe.unet.config.sample_size
        log(f"UNet latent resolution (approx): {latent_shape}")
    except:
        pass

    log("Pipeline finished.")
    log("Returning images...")

    return result.images, seed, log_buffer.getvalue()

# ------------------------
# GRADIO UI
# ------------------------
examples = [
    ["Young Chinese woman in red Hanfu, intricate embroidery..."],
    ["A majestic dragon soaring through clouds at sunset..."],
    ["Cozy coffee shop interior, warm lighting, rain on windows..."],
    ["Astronaut riding a horse on Mars, cinematic lighting..."],
    ["Portrait of a wise old wizard..."],
]

with gr.Blocks(title="Z-Image-Turbo Debug Demo") as demo:
    gr.Markdown("# 🎨 Z-Image-Turbo — Multi Image + Full Debug Logs")

    with gr.Row():
        with gr.Column(scale=1):
            prompt = gr.Textbox(label="Prompt", lines=4)

            with gr.Row():
                height = gr.Slider(512, 2048, 1024, step=64, label="Height")
                width = gr.Slider(512, 2048, 1024, step=64, label="Width")

            num_images = gr.Slider(1, 3, 2, step=1, label="Number of Images")

            num_inference_steps = gr.Slider(
                1, 20, 9, step=1, label="Inference Steps",
                info="9 steps = 8 DiT forward passes",
            )

            with gr.Row():
                seed = gr.Number(label="Seed", value=42, precision=0)
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=False)

            generate_btn = gr.Button("🚀 Generate", variant="primary")

        with gr.Column(scale=1):
            output_images = gr.Gallery(label="Generated Images")
            used_seed = gr.Number(label="Seed Used", interactive=False)
            debug_log = gr.Textbox(
                label="Debug Log Output",
                lines=25,
                interactive=False
            )

    gr.Examples(examples=examples, inputs=[prompt], cache_examples=False)

    generate_btn.click(
        fn=generate_image,
        inputs=[prompt, height, width, num_inference_steps, seed, randomize_seed, num_images],
        outputs=[output_images, used_seed, debug_log],
    )

if __name__ == "__main__":
    demo.launch()