Spaces:
Runtime error
Runtime error
| import spaces | |
| from dataclasses import dataclass | |
| import json | |
| import logging | |
| import os | |
| import random | |
| import re | |
| import sys | |
| import warnings | |
| from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModel, AutoTokenizer | |
| sys.path.append(os.path.dirname(os.path.abspath(__file__))) | |
| from diffusers import ZImagePipeline | |
| from diffusers.models.transformers.transformer_z_image import ZImageTransformer2DModel | |
| from pe import prompt_template | |
| # ==================== Environment Variables ================================== | |
| MODEL_PATH = os.environ.get("MODEL_PATH", "Tongyi-MAI/Z-Image-Turbo") | |
| ENABLE_COMPILE = os.environ.get("ENABLE_COMPILE", "true").lower() == "true" | |
| ENABLE_WARMUP = os.environ.get("ENABLE_WARMUP", "true").lower() == "true" | |
| ATTENTION_BACKEND = os.environ.get("ATTENTION_BACKEND", "flash_3") | |
| DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY") | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| # ============================================================================= | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| warnings.filterwarnings("ignore") | |
| logging.getLogger("transformers").setLevel(logging.ERROR) | |
| RESOLUTIONS = [ | |
| # Square | |
| "1024x1024 ( 1:1 )", | |
| # Landscape (wide to narrow) | |
| "1344x576 ( 21:9 )", | |
| "1280x720 ( 16:9 )", | |
| "1248x832 ( 3:2 )", | |
| "1152x864 ( 4:3 )", | |
| # Portrait (tall to short) | |
| "576x1344 ( 9:21 )", | |
| "720x1280 ( 9:16 )", | |
| "832x1248 ( 2:3 )", | |
| "864x1152 ( 3:4 )", | |
| ] | |
| EXAMPLE_PROMPTS = [ | |
| ["ไธไฝ็ทๅฃซๅไป็่ดตๅฎพ็ฌ็ฉฟ็้ ๅฅ็ๆ่ฃ ๅๅ ็็็ง๏ผๅฎคๅ ็ฏๅ ๏ผ่ๆฏไธญๆ่งไผใ"], | |
| [ | |
| "ๆๅ ทๆฐๅดๆ็ๆ่ฐไบบๅ๏ผไธไฝไผ้ ็ไธญๅฝ็พๅฅณๅจ้ปๆ็ๆฟ้ด้ใไธๆๅผบๅ ้่ฟ้ฎๅ ๆฟ๏ผๅจๅฅน็่ธไธๆๅฐๅบไธไธชๆธ ๆฐ็้ช็ตๅฝข็ถ็ๅ ๅฝฑ๏ผๆญฃๅฅฝ็ งไบฎไธๅช็ผ็ใ้ซๅฏนๆฏๅบฆ๏ผๆๆไบค็ๆธ ๆฐ๏ผ็ฅ็งๆ๏ผ่ฑๅก็ธๆบ่ฒ่ฐใ" | |
| ], | |
| [ | |
| "ไธๅผ ไธญๆฏๆๆบ่ชๆ็ ง็ๆๆไบไธไฝ็็้ฟ้ปๅ็ๅนด่ฝปไธไบๅฅณๅญๅจ็ฏๅ ๆไบฎ็็ตๆขฏๅ ๅฏน็้ๅญ่ชๆใๅฅน็ฉฟ็ไธไปถๅธฆๆ็ฝ่ฒ่ฑๆตๅพๆก็้ป่ฒ้ฒ่ฉ็ญไธ่กฃๅๆทฑ่ฒ็ไป่ฃคใๅฅน็ๅคดๅพฎๅพฎๅพๆ๏ผๅดๅๅ่ตทๅไบฒๅป็ถ๏ผ้ๅธธๅฏ็ฑไฟ็ฎใๅฅนๅณๆๆฟ็ไธ้จๆทฑ็ฐ่ฒๆบ่ฝๆๆบ๏ผ้ฎไฝไบ้จๅ่ธ๏ผๅ็ฝฎๆๅๅคด้ๅคดๅฏน็้ๅญ" | |
| ], | |
| [ | |
| "Young Chinese woman in red Hanfu, intricate embroidery. Impeccable makeup, red floral forehead pattern. Elaborate high bun, golden phoenix headdress, red flowers, beads. Holds round folding fan with lady, trees, bird. Neon lightning-bolt lamp (โก๏ธ), bright yellow glow, above extended left palm. Soft-lit outdoor night background, silhouetted tiered pagoda (่ฅฟๅฎๅคง้ๅก), blurred colorful distant lights." | |
| ], | |
| [ | |
| '''A vertical digital illustration depicting a serene and majestic Chinese landscape, rendered in a style reminiscent of traditional Shanshui painting but with a modern, clean aesthetic. The scene is dominated by towering, steep cliffs in various shades of blue and teal, which frame a central valley. In the distance, layers of mountains fade into a light blue and white mist, creating a strong sense of atmospheric perspective and depth. A calm, turquoise river flows through the center of the composition, with a small, traditional Chinese boat, possibly a sampan, navigating its waters. The boat has a bright yellow canopy and a red hull, and it leaves a gentle wake behind it. It carries several indistinct figures of people. Sparse vegetation, including green trees and some bare-branched trees, clings to the rocky ledges and peaks. The overall lighting is soft and diffused, casting a tranquil glow over the entire scene. Centered in the image is overlaid text. At the top of the text block is a small, red, circular seal-like logo containing stylized characters. Below it, in a smaller, black, sans-serif font, are the words 'Zao-Xiang * East Beauty & West Fashion * Z-Image'. Directly beneath this, in a larger, elegant black serif font, is the word 'SHOW & SHARE CREATIVITY WITH THE WORLD'. Among them, there are "SHOW & SHARE", "CREATIVITY", and "WITH THE WORLD"''' | |
| ], | |
| [ | |
| """ไธๅผ ่ๆ็่ฑ่ฏญ็ตๅฝฑใๅๅฟไนๅณใ๏ผThe Taste of Memory๏ผ็็ตๅฝฑๆตทๆฅใๅบๆฏ่ฎพ็ฝฎๅจไธไธช่ดจๆด็19ไธ็บช้ฃๆ ผๅจๆฟ้ใ็ป้ขไธญๅคฎ๏ผไธไฝ็บขๆฃ่ฒๅคดๅใ็็ๅฐ่กๅญ็ไธญๅนด็ทๅญ๏ผๆผๅ้ฟ็ยทๅฝญๅๅฉๆ น้ฅฐ๏ผ็ซๅจไธๅผ ๆจๆกๅ๏ผไป่บซ็ฉฟ็ฝ่ฒ่กฌ่กซใ้ป่ฒ้ฉฌ็ฒๅ็ฑณ่ฒๅด่ฃ๏ผๆญฃ็็ไธไฝๅฅณๅฃซ๏ผๆไธญๆฟ็ไธๅคงๅ็็บข่๏ผไธๆนๆฏไธไธชๆจๅถๅ่ๆฟใๅจไป็ๅณ่พน๏ผไธไฝๆขณ็้ซ้ซป็้ปๅๅฅณๅญ๏ผๆผๅๅ่่ฏบยทไธๆฏ้ฅฐ๏ผๅ้ ๅจๆกๅญไธ๏ผๆธฉๆๅฐๅฏนไปๅพฎ็ฌใๅฅน็ฉฟ็ๆต ่ฒ่กฌ่กซๅไธๆกไธ็ฝไธ่็้ฟ่ฃใๆกไธ้คไบๆพๆๅ็ข็่ฑๅๅทๅฟ่ไธ็ๅ่ๆฟๅค๏ผ่ฟๆไธไธช็ฝ่ฒ้ถ็ท็ใๆฐ้ฒ้ฆ่๏ผๅทฆไพงไธไธชๆจ็ฎฑไธๆพ็ไธไธฒๆทฑ่ฒ่ก่ใ่ๆฏๆฏไธ้ข็ฒ็ณ็็ฐ็ฝ่ฒๆน็ฐๅข๏ผๅขไธๆ็ไธๅน ้ฃๆฏ็ปใๆๅณ่พน็ไธไธชๅฐ้ขไธๆพ็ไธ็ๅคๅคๆฒน็ฏใๆตทๆฅไธๆๅคง้็ๆๅญไฟกๆฏใๅทฆไธ่งๆฏ็ฝ่ฒ็ๆ ่กฌ็บฟๅญไฝ"ARTISAN FILMS PRESENTS"๏ผๅ ถไธๆนๆฏ"ELEANOR VANCE"ๅ"ACADEMY AWARDยฎ WINNER"ใๅณไธ่งๅ็"ARTHUR PENHALIGON"ๅ"GOLDEN GLOBEยฎ AWARD WINNER"ใ้กถ้จไธญๅคฎๆฏๅฃไธนๆฏ็ตๅฝฑ่็ๆกๅ ๆ ๅฟ๏ผไธๆนๅ็"SUNDANCE FILM FESTIVAL GRAND JURY PRIZE 2024"ใไธปๆ ้ข"THE TASTE OF MEMORY"ไปฅ็ฝ่ฒ็ๅคงๅท่กฌ็บฟๅญไฝ้็ฎๅฐๆพ็คบๅจไธๅ้จๅใๆ ้ขไธๆนๆณจๆไบ"A FILM BY Tongyi Interaction Lab"ใๅบ้จๅบๅ็จ็ฝ่ฒๅฐๅญๅๅบไบๅฎๆด็ๆผ่ๅๅๅ๏ผๅ ๆฌ"SCREENPLAY BY ANNA REID"ใ"CULINARY DIRECTION BY JAMES CARTER"ไปฅๅArtisan FilmsใRiverstone PicturesๅHeritage Media็ญไผๅคๅบๅๅ ฌๅธๆ ๅฟใๆดไฝ้ฃๆ ผๆฏๅๅฎไธปไน๏ผ้็จๆธฉๆๆๅ็็ฏๅ ๆนๆก๏ผ่ฅ้ ๅบไธ็งไบฒๅฏ็ๆฐๅดใ่ฒ่ฐไปฅๆฃ่ฒใ็ฑณ่ฒๅๆๅ็็ปฟ่ฒ็ญๅคงๅฐ่ฒ็ณปไธบไธปใไธคไฝๆผๅ็่บซไฝ้ฝๅจ่ ฐ้จ่ขซๆชๆญใ""" | |
| ], | |
| [ | |
| """ไธๅผ ๆนๅฝขๆๅพ็็นๅ็ ง็๏ผไธปไฝๆฏไธ็ๅทจๅคง็ใ้ฒ็ปฟ่ฒ็ๆค็ฉๅถ็๏ผๅนถๅ ๅ ไบๆๅญ๏ผไฝฟๅ ถๅ ทๆๆตทๆฅๆๆๅฟๅฐ้ข็ๅค่งใไธป่ฆๆๆๅฏน่ฑกๆฏไธ็ๅๅฎใๆ่ก่ดจๆ็ๅถๅญ๏ผไปๅทฆไธ่งๅฐๅณไธ่งๅๅฏน่ง็บฟๅผฏๆฒ็ฉฟ่ฟ็ป้ขใๅ ถ่กจ้ขๅๅ ๆงๅพๅผบ๏ผๆๆๅฐไธไธชๆไบฎ็็ดๅฐๅ ๆบ๏ผๅฝขๆไบไธ้็ชๅบ็้ซๅ ๏ผไบฎ้ขไธๆพ้ฒๅบๅนณ่ก็็ฒพ็ปๅถ่ใ่ๆฏ็ฑๅ ถไปๆทฑ็ปฟ่ฒ็ๅถๅญ็ปๆ๏ผ่ฟไบๅถๅญ่ฝปๅพฎๅคฑ็ฆ๏ผ่ฅ้ ๅบๆต ๆฏๆทฑๆๆ๏ผ็ชๅบไบๅๆฏ็ไธปๅถ็ใๆดไฝ้ฃๆ ผๆฏๅๅฎๆๅฝฑ๏ผๆไบฎ็ๅถ็ไธ้ปๆ็้ดๅฝฑ่ๆฏไน้ดๅฝขๆ้ซๅฏนๆฏๅบฆใๅพๅไธๆๅคๅคๆธฒๆๆๅญใๅทฆไธ่งๆฏ็ฝ่ฒ็่กฌ็บฟๅญไฝๆๅญ"PIXEL-PEEPERS GUILD Presents"ใๅณไธ่งๅๆ ทๆฏ็ฝ่ฒ่กฌ็บฟๅญไฝ็ๆๅญ"[Instant Noodle] ๆณก้ข่ฐๆๅ "ใๅทฆไพงๅ็ดๆๅ็ๆ ้ข"Render Distance: Max"๏ผไธบ็ฝ่ฒ่กฌ็บฟๅญไฝใๅทฆไธ่งๆฏไบไธช็กๅคง็็ฝ่ฒๅฎไฝๆฑๅญ"ๆพๅกๅจ...็็ง"ใๅณไธ่งๆฏ่พๅฐ็็ฝ่ฒ่กฌ็บฟๅญไฝๆๅญ"Leica Glowโข Unobtanium X-1"๏ผๅ ถๆญฃไธๆนๆฏ็จ็ฝ่ฒๅฎไฝๅญไนฆๅ็ๅๅญ"่กๅ "ใ่ฏๅซๅบ็ๆ ธๅฟๅฎไฝๅ ๆฌๅ็ๅ็ด ๅท็ชฅ่ ๅไผใๅ ถไบงๅ็บฟๆณก้ข่ฐๆๅ ใ็ธๆบๅๅทไนฐไธๅฐโข X-1ไปฅๅๆๅฝฑๅธๅๅญ้ ็ธใ""" | |
| ], | |
| ] | |
| def get_resolution(resolution): | |
| match = re.search(r"(\d+)\s*[รx]\s*(\d+)", resolution) | |
| if match: | |
| return int(match.group(1)), int(match.group(2)) | |
| return 1024, 1024 | |
| def load_models(model_path, enable_compile=False, attention_backend="native"): | |
| print(f"Loading models from {model_path}...") | |
| use_auth_token = HF_TOKEN if HF_TOKEN else True | |
| if not os.path.exists(model_path): | |
| vae = AutoencoderKL.from_pretrained( | |
| f"{model_path}", | |
| subfolder="vae", | |
| torch_dtype=torch.bfloat16, | |
| device_map="cuda", | |
| use_auth_token=use_auth_token, | |
| ) | |
| text_encoder = AutoModel.from_pretrained( | |
| f"{model_path}", | |
| subfolder="text_encoder", | |
| torch_dtype=torch.bfloat16, | |
| device_map="cuda", | |
| use_auth_token=use_auth_token, | |
| ).eval() | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| f"{model_path}", subfolder="tokenizer", use_auth_token=use_auth_token | |
| ) | |
| else: | |
| vae = AutoencoderKL.from_pretrained( | |
| os.path.join(model_path, "vae"), | |
| torch_dtype=torch.bfloat16, | |
| device_map="cuda", | |
| ) | |
| text_encoder = AutoModel.from_pretrained( | |
| os.path.join(model_path, "text_encoder"), | |
| torch_dtype=torch.bfloat16, | |
| device_map="cuda", | |
| ).eval() | |
| tokenizer = AutoTokenizer.from_pretrained(os.path.join(model_path, "tokenizer")) | |
| tokenizer.padding_side = "left" | |
| if enable_compile: | |
| print("Enabling torch.compile optimizations...") | |
| torch._inductor.config.conv_1x1_as_mm = True | |
| torch._inductor.config.coordinate_descent_tuning = True | |
| torch._inductor.config.epilogue_fusion = False | |
| torch._inductor.config.coordinate_descent_check_all_directions = True | |
| torch._inductor.config.max_autotune_gemm = True | |
| torch._inductor.config.max_autotune_gemm_backends = "TRITON,ATEN" | |
| torch._inductor.config.triton.cudagraphs = False | |
| pipe = ZImagePipeline( | |
| scheduler=None, | |
| vae=vae, | |
| text_encoder=text_encoder, | |
| tokenizer=tokenizer, | |
| transformer=None, | |
| ) | |
| if enable_compile: | |
| pipe.vae.disable_tiling() | |
| if not os.path.exists(model_path): | |
| transformer = ZImageTransformer2DModel.from_pretrained( | |
| f"{model_path}", subfolder="transformer", use_auth_token=use_auth_token | |
| ).to("cuda", torch.bfloat16) | |
| else: | |
| transformer = ZImageTransformer2DModel.from_pretrained( | |
| os.path.join(model_path, "transformer") | |
| ).to("cuda", torch.bfloat16) | |
| pipe.transformer = transformer | |
| pipe.transformer.set_attention_backend(attention_backend) | |
| if enable_compile: | |
| print("Compiling transformer...") | |
| pipe.transformer = torch.compile( | |
| pipe.transformer, mode="max-autotune-no-cudagraphs", fullgraph=False | |
| ) | |
| pipe.to("cuda", torch.bfloat16) | |
| return pipe | |
| def generate_image( | |
| pipe, | |
| prompt, | |
| resolution="1024x1024", | |
| seed=42, | |
| guidance_scale=5.0, | |
| num_inference_steps=50, | |
| shift=3.0, | |
| max_sequence_length=512, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| width, height = get_resolution(resolution) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| scheduler = FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=shift) | |
| pipe.scheduler = scheduler | |
| image = pipe( | |
| prompt=prompt, | |
| height=height, | |
| width=width, | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=num_inference_steps, | |
| generator=generator, | |
| max_sequence_length=max_sequence_length, | |
| ).images[0] | |
| return image | |
| def warmup_model(pipe, resolutions): | |
| print("Starting warmup phase...") | |
| dummy_prompt = "warmup" | |
| for res_str in resolutions: | |
| print(f"Warming up for resolution: {res_str}") | |
| try: | |
| for i in range(3): | |
| generate_image( | |
| pipe, | |
| prompt=dummy_prompt, | |
| resolution=res_str, | |
| num_inference_steps=9, | |
| guidance_scale=0.0, | |
| seed=42 + i, | |
| ) | |
| except Exception as e: | |
| print(f"Warmup failed for {res_str}: {e}") | |
| print("Warmup completed.") | |
| # ==================== Prompt Expander ==================== | |
| class PromptOutput: | |
| status: bool | |
| prompt: str | |
| seed: int | |
| system_prompt: str | |
| message: str | |
| class PromptExpander: | |
| def __init__(self, backend="api", **kwargs): | |
| self.backend = backend | |
| def decide_system_prompt(self, template_name=None): | |
| return prompt_template | |
| class APIPromptExpander(PromptExpander): | |
| def __init__(self, api_config=None, **kwargs): | |
| super().__init__(backend="api", **kwargs) | |
| self.api_config = api_config or {} | |
| self.client = self._init_api_client() | |
| def _init_api_client(self): | |
| try: | |
| from openai import OpenAI | |
| api_key = self.api_config.get("api_key") or DASHSCOPE_API_KEY | |
| base_url = self.api_config.get( | |
| "base_url", "https://dashscope.aliyuncs.com/compatible-mode/v1" | |
| ) | |
| if not api_key: | |
| print("Warning: DASHSCOPE_API_KEY not found.") | |
| return None | |
| return OpenAI(api_key=api_key, base_url=base_url) | |
| except ImportError: | |
| print("Please install openai: pip install openai") | |
| return None | |
| except Exception as e: | |
| print(f"Failed to initialize API client: {e}") | |
| return None | |
| def __call__(self, prompt, system_prompt=None, seed=-1, **kwargs): | |
| return self.extend(prompt, system_prompt, seed, **kwargs) | |
| def extend(self, prompt, system_prompt=None, seed=-1, **kwargs): | |
| if self.client is None: | |
| return PromptOutput( | |
| False, "", seed, system_prompt, "API client not initialized" | |
| ) | |
| if system_prompt is None: | |
| system_prompt = self.decide_system_prompt() | |
| if "{prompt}" in system_prompt: | |
| system_prompt = system_prompt.format(prompt=prompt) | |
| prompt = " " | |
| try: | |
| model = self.api_config.get("model", "qwen3-max-preview") | |
| response = self.client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=0.7, | |
| top_p=0.8, | |
| ) | |
| content = response.choices[0].message.content | |
| json_start = content.find("```json") | |
| if json_start != -1: | |
| json_end = content.find("```", json_start + 7) | |
| try: | |
| json_str = content[json_start + 7 : json_end].strip() | |
| data = json.loads(json_str) | |
| expanded_prompt = data.get("revised_prompt", content) | |
| except: | |
| expanded_prompt = content | |
| else: | |
| expanded_prompt = content | |
| return PromptOutput( | |
| status=True, | |
| prompt=expanded_prompt, | |
| seed=seed, | |
| system_prompt=system_prompt, | |
| message=content, | |
| ) | |
| except Exception as e: | |
| return PromptOutput(False, "", seed, system_prompt, str(e)) | |
| def create_prompt_expander(backend="api", **kwargs): | |
| if backend == "api": | |
| return APIPromptExpander(**kwargs) | |
| raise ValueError("Only 'api' backend is supported.") | |
| pipe = None | |
| prompt_expander = None | |
| def init_app(): | |
| global pipe, prompt_expander | |
| try: | |
| pipe = load_models( | |
| MODEL_PATH, | |
| enable_compile=ENABLE_COMPILE, | |
| attention_backend=ATTENTION_BACKEND, | |
| ) | |
| print(f"Model loaded. Compile: {ENABLE_COMPILE}, Backend: {ATTENTION_BACKEND}") | |
| if ENABLE_WARMUP: | |
| warmup_model(pipe, RESOLUTIONS) | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| pipe = None | |
| try: | |
| prompt_expander = create_prompt_expander( | |
| backend="api", api_config={"model": "qwen3-max-preview"} | |
| ) | |
| print("Prompt expander initialized.") | |
| except Exception as e: | |
| print(f"Error initializing prompt expander: {e}") | |
| prompt_expander = None | |
| def prompt_enhance(prompt, enable_enhance): | |
| if not enable_enhance or not prompt_expander: | |
| return prompt, "Enhancement disabled or not available." | |
| if not prompt.strip(): | |
| return "", "Please enter a prompt." | |
| try: | |
| result = prompt_expander(prompt) | |
| if result.status: | |
| return result.prompt, result.message | |
| else: | |
| return prompt, f"Enhancement failed: {result.message}" | |
| except Exception as e: | |
| return prompt, f"Error: {str(e)}" | |
| def generate( | |
| prompt: str, | |
| resolution: str = "1024x1024 ( 1:1 )", | |
| seed: int = 42, | |
| steps: int = 9, | |
| shift: float = 3.0, | |
| enhance: bool = False, | |
| random_seed: bool = True, | |
| progress: gr.Progress = gr.Progress(track_tqdm=True), | |
| ): | |
| """ | |
| Generate a single image using the Z-Image model based on the provided prompt and settings. | |
| This function is exposed as a Gradio/MCP tool via the main 'Generate' button. | |
| It optionally enhances the prompt, configures generation parameters, and | |
| returns exactly one image plus the seed used. | |
| Args: | |
| prompt: Text prompt describing the desired image content. | |
| resolution: Output resolution in format "WIDTHxHEIGHT ( RATIO )". | |
| seed: Seed for reproducible generation. Ignored if random_seed is True. | |
| steps: Number of inference steps for the diffusion process. | |
| shift: Time shift parameter for the flow matching scheduler. | |
| enhance: (Currently disabled in the UI) Whether to enhance the prompt. | |
| random_seed: If True, a new random seed will be sampled. | |
| progress: Gradio progress tracker (automatically provided by Gradio). | |
| Returns: | |
| tuple[object, str, int]: (image, seed_str, seed_int) | |
| """ | |
| if pipe is None: | |
| raise gr.Error("Model not loaded.") | |
| final_prompt = prompt | |
| if enhance: | |
| final_prompt, _ = prompt_enhance(prompt, True) | |
| print(f"Enhanced prompt: {final_prompt}") | |
| if random_seed: | |
| new_seed = random.randint(1, 1000000) | |
| else: | |
| new_seed = seed if seed != -1 else random.randint(1, 1000000) | |
| try: | |
| resolution_str = resolution.split(" ")[0] | |
| except Exception: | |
| resolution_str = "1024x1024" | |
| image = generate_image( | |
| pipe=pipe, | |
| prompt=final_prompt, | |
| resolution=resolution_str, | |
| seed=new_seed, | |
| guidance_scale=0.0, | |
| num_inference_steps=int(steps + 1), | |
| shift=shift, | |
| ) | |
| return image, str(new_seed), int(new_seed) | |
| init_app() | |
| # ==================== AoTI (Ahead of Time Inductor compilation) ==================== | |
| pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"] | |
| spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3") | |
| with gr.Blocks(title="Z-Image Generation MCP") as demo: | |
| gr.Markdown( | |
| """<div align="center"> | |
| # Z-Image Generation MCP | |
| <a href="https://huggingface.co/settings/mcp?add=victor/Z-Image-Turbo-MCP" target="_blank" | |
| style="display: inline-block; padding: 8px 20px; background: #22c55e; | |
| color: white; text-decoration: none; border-radius: 9999px; font-weight: 600;"> | |
| Use via MCP | |
| </a> | |
| *An Efficient Image Generation Foundation Model with Single-Stream Diffusion Transformer* | |
| </div>""" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| prompt_input = gr.Textbox( | |
| label="Prompt", lines=3, placeholder="Enter your prompt here..." | |
| ) | |
| # PE components (Temporarily disabled) | |
| # with gr.Row(): | |
| # enable_enhance = gr.Checkbox(label="Enhance Prompt (DashScope)", value=False) | |
| # enhance_btn = gr.Button("Enhance Only") | |
| resolution = gr.Dropdown( | |
| value=RESOLUTIONS[0], | |
| choices=RESOLUTIONS, | |
| label="Resolution", | |
| ) | |
| with gr.Row(): | |
| seed = gr.Number(label="Seed", value=42, precision=0) | |
| random_seed = gr.Checkbox(label="Random Seed", value=True) | |
| with gr.Row(): | |
| steps = gr.Slider( | |
| label="Steps", | |
| minimum=1, | |
| maximum=100, | |
| value=8, | |
| step=1, | |
| interactive=False, | |
| ) | |
| shift = gr.Slider( | |
| label="Time Shift", minimum=1.0, maximum=10.0, value=3.0, step=0.1 | |
| ) | |
| generate_btn = gr.Button("Generate", variant="primary") | |
| # Example prompts | |
| gr.Markdown("### ๐ Example Prompts") | |
| gr.Examples(examples=EXAMPLE_PROMPTS, inputs=prompt_input, label=None) | |
| with gr.Column(scale=1): | |
| # Switched from Gallery -> single Image for MCP-friendly output | |
| output_image = gr.Image( | |
| label="Generated Image", | |
| format="png", | |
| height=600, | |
| interactive=False, | |
| ) | |
| used_seed = gr.Textbox(label="Seed Used", interactive=False) | |
| # Dummy enable_enhance variable set to False | |
| enable_enhance = gr.State(value=False) | |
| generate_btn.click( | |
| generate, | |
| inputs=[ | |
| prompt_input, | |
| resolution, | |
| seed, | |
| steps, | |
| shift, | |
| enable_enhance, | |
| random_seed, | |
| ], | |
| outputs=[output_image, used_seed, seed], | |
| api_visibility="public", # exposed as MCP tool | |
| api_name="generate_image", # nice, stable name for tool clients | |
| ) | |
| css = """ | |
| .fillable{max-width: 1230px !important} | |
| """ | |
| if __name__ == "__main__": | |
| demo.launch(css=css, mcp_server=True) | |