import spaces import os import torch import random import gc import os import gradio as gr from diffusers import DiffusionPipeline import diffusers import io import sys import logging # ------------------------ # GLOBAL LOG BUFFER # ------------------------ log_buffer = io.StringIO() def log(msg): print(msg) log_buffer.write(msg + "\n") # Enable diffusers debug logs diffusers.utils.logging.set_verbosity_info() log("Loading Z-Image-Turbo pipeline...") pipe = DiffusionPipeline.from_pretrained( "Tongyi-MAI/Z-Image-Turbo", torch_dtype=torch.bfloat16, low_cpu_mem_usage=False, attn_implementation="kernels-community/vllm-flash-attn3", ) pipe.to("cuda") #pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"] #spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3") # ------------------------ # ATTENTION + PIPE INFO # ------------------------ def pipeline_debug_info(pipe): info = [] info.append("=== PIPELINE DEBUG INFO ===") try: tr = pipe.transformer.config info.append(f"Transformer Class: {pipe.transformer.__class__.__name__}") # Z-Image-Turbo correct keys info.append(f"Hidden dim: {tr.get('hidden_dim')}") info.append(f"Attention heads: {tr.get('num_heads')}") info.append(f"Depth (layers): {tr.get('depth')}") info.append(f"Patch size: {tr.get('patch_size')}") info.append(f"MLP ratio: {tr.get('mlp_ratio')}") info.append(f"Attention backend: {tr.get('attn_implementation')}") except Exception as e: info.append(f"Transformer diagnostics failed: {e}") # VAE info try: vae = pipe.vae.config info.append(f"VAE latent channels: {vae.latent_channels}") info.append(f"VAE scaling factor: {vae.scaling_factor}") except Exception as e: info.append(f"VAE diagnostics failed: {e}") return "\n".join(info) def latent_shape_info(h, w, pipe): try: c = pipe.vae.config.latent_channels s = pipe.vae.config.scaling_factor h_lat = int(h * s) w_lat = int(w * s) return f"Latent shape → ({c}, {h_lat}, {w_lat})" except Exception as e: return f"Latent shape calc failed: {e}" HF_MODEL = os.environ.get("HF_UPLOAD_REPO", "rahul7star/Zimg-Feb2026") # --- CPU-only upload function --- def upload_image_and_prompt_cpu(input_image, prompt_text) -> str: from datetime import datetime import tempfile, os, uuid, shutil from huggingface_hub import HfApi # Instantiate the HfApi class api = HfApi() today_str = datetime.now().strftime("%Y-%m-%d") unique_subfolder = f"Upload-Image-{uuid.uuid4().hex[:8]}" hf_folder = f"{today_str}/{unique_subfolder}" # Save image temporarily with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_img: if isinstance(input_image, str): shutil.copy(input_image, tmp_img.name) else: input_image.save(tmp_img.name, format="PNG") tmp_img_path = tmp_img.name # Upload image using HfApi instance api.upload_file( path_or_fileobj=tmp_img_path, path_in_repo=f"{hf_folder}/input_image.png", repo_id=HF_MODEL, repo_type="model", token=os.environ.get("HUGGINGFACE_HUB_TOKEN") ) # Save prompt as summary.txt summary_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt").name with open(summary_file, "w", encoding="utf-8") as f: f.write(prompt_text) api.upload_file( path_or_fileobj=summary_file, path_in_repo=f"{hf_folder}/summary.txt", repo_id=HF_MODEL, repo_type="model", token=os.environ.get("HF_TOKEN") ) # Cleanup os.remove(tmp_img_path) os.remove(summary_file) return hf_folder # ------------------------ # IMAGE GENERATOR # ------------------------ @spaces.GPU def generate_image(prompt, height, width, num_inference_steps, seed, randomize_seed, num_images): log_buffer.truncate(0) log_buffer.seek(0) log("=== NEW GENERATION REQUEST ===") print(prompt) log(f"Height: {height}, Width: {width}") log(f"Inference Steps: {num_inference_steps}") log(f"Num Images: {num_images}") if randomize_seed: seed = torch.randint(0, 2**32 - 1, (1,)).item() log(f"Randomized Seed → {seed}") else: log(f"Seed: {seed}") # Clamp images num_images = min(max(1, int(num_images)), 3) # Debug pipe info log(pipeline_debug_info(pipe)) generator = torch.Generator("cuda").manual_seed(int(seed)) log("Running pipeline forward()...") result = pipe( prompt=prompt, height=int(height), width=int(width), num_inference_steps=int(num_inference_steps), guidance_scale=0.0, generator=generator, max_sequence_length=1024, num_images_per_prompt=num_images, output_type="pil", ) # Correct latent diagnostics (Z-Image uses VAE + Transformer) try: log(f"VAE latent channels: {pipe.vae.config.latent_channels}") log(f"VAE scaling factor: {pipe.vae.config.scaling_factor}") log(f"Transformer latent size: {pipe.transformer.config.sample_size}") log(latent_shape_info(height, width, pipe)) except Exception as e: log(f"Latent diagnostics error: {e}") log("Pipeline finished.") log("Returning images...") upload_image_and_prompt_cpu(result.images[0], prompt) return result.images, seed, log_buffer.getvalue() # ------------------------ # GRADIO UI # ------------------------ examples = [ ["Young Chinese woman in red Hanfu, intricate embroidery..."], ["A majestic dragon soaring through clouds at sunset..."], ["Cozy coffee shop interior, warm lighting, rain on windows..."], ["Astronaut riding a horse on Mars, cinematic lighting..."], ["Portrait of a wise old wizard..."], ] with gr.Blocks(title="Z-Image-Turbo Multi Image Demo") as demo: gr.Markdown("# 🎨 Z-Image-Turbo — Multi Image ") with gr.Row(): with gr.Column(scale=1): prompt = gr.Textbox(label="Prompt", lines=4) with gr.Row(): height = gr.Slider(512, 2048, 1024, step=64, label="Height") width = gr.Slider(512, 2048, 1024, step=64, label="Width") num_images = gr.Slider(1, 3, 2, step=1, label="Number of Images") num_inference_steps = gr.Slider( 1, 20, 9, step=1, label="Inference Steps", info="9 steps = 8 DiT forward passes", ) with gr.Row(): seed = gr.Number(label="Seed", value=42, precision=0) randomize_seed = gr.Checkbox(label="Randomize Seed", value=False) generate_btn = gr.Button("🚀 Generate", variant="primary") with gr.Column(scale=1): output_images = gr.Gallery(label="Generated Images") used_seed = gr.Number(label="Seed Used", interactive=False) debug_log = gr.Textbox( label="Debug Log Output", lines=25, interactive=False ) gr.Examples(examples=examples, inputs=[prompt], cache_examples=False) generate_btn.click( fn=generate_image, inputs=[prompt, height, width, num_inference_steps, seed, randomize_seed, num_images], outputs=[output_images, used_seed, debug_log], ) if __name__ == "__main__": demo.launch()