Z-Image-Turbo-controlnet

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 8 days ago

Commit

663212e

verified ·

1 Parent(s): bca471a

Update app.py

Browse files

Files changed (1) hide show

app.py +191 -331

app.py CHANGED Viewed

@@ -3,105 +3,107 @@ import gradio as gr
 import torch
 import numpy as np
 import random
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
 from transformers import AutoTokenizer, Qwen3ForCausalLM
 from controlnet_aux.processor import Processor
 from PIL import Image
-# Try to import ControlNet components, fall back to basic pipeline if unavailable
 try:
-    from videox_fun.pipeline import ZImageControlPipeline
-    from videox_fun.models import ZImageControlTransformer2DModel
-    CONTROLNET_AVAILABLE = True
 except ImportError:
-    from diffusers import ZImagePipeline
-    CONTROLNET_AVAILABLE = False
-    print("ControlNet components not available. Running in basic mode.")
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1280
-# Configuration
-MODEL_REPO = "Tongyi-MAI/Z-Image-Turbo"
-CONTROLNET_WEIGHTS = "Z-Image-Turbo-Fun-Controlnet-Union.safetensors"  # Optional local path
 print("Loading Z-Image Turbo model...")
-print("This may take a few minutes on first run...")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 weight_dtype = torch.bfloat16
-# Load models
-if CONTROLNET_AVAILABLE:
-    print("Loading with ControlNet support...")
-    # Load transformer with control layers
-    transformer = ZImageControlTransformer2DModel.from_pretrained(
-        MODEL_REPO,
-        subfolder="transformer",
-        transformer_additional_kwargs={
-            "control_layers_places": [0, 5, 10, 15, 20, 25],
-            "control_in_dim": 16
-        },
-    ).to(device, weight_dtype)
-    # Optionally load ControlNet weights if available
     try:
-        from safetensors.torch import load_file
-        import os
-        if os.path.exists(CONTROLNET_WEIGHTS):
-            print(f"Loading ControlNet weights from {CONTROLNET_WEIGHTS}")
-            state_dict = load_file(CONTROLNET_WEIGHTS)
-            state_dict = state_dict.get("state_dict", state_dict)
-            m, u = transformer.load_state_dict(state_dict, strict=False)
-            print(f"Loaded ControlNet: {len(m)} missing keys, {len(u)} unexpected keys")
     except Exception as e:
-        print(f"Could not load ControlNet weights: {e}")
-    # Load other components
-    vae = AutoencoderKL.from_pretrained(
-        MODEL_REPO,
-        subfolder="vae",
-    ).to(device, weight_dtype)
-    tokenizer = AutoTokenizer.from_pretrained(
-        MODEL_REPO,
-        subfolder="tokenizer"
-    )
-    text_encoder = Qwen3ForCausalLM.from_pretrained(
-        MODEL_REPO,
-        subfolder="text_encoder",
-        torch_dtype=weight_dtype,
-    ).to(device)
-    scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
-        MODEL_REPO,
-        subfolder="scheduler"
-    )
-    pipe = ZImageControlPipeline(
-        vae=vae,
-        tokenizer=tokenizer,
-        text_encoder=text_encoder,
-        transformer=transformer,
-        scheduler=scheduler,
-    )
-    pipe.to(device, weight_dtype)
 else:
-    print("Loading basic Z-Image Turbo (no ControlNet)...")
-    pipe = ZImagePipeline.from_pretrained(
-        MODEL_REPO,
-        torch_dtype=weight_dtype,
-        low_cpu_mem_usage=False,
-    )
-    pipe.to(device)
 print(f"Model loaded successfully on {device}!")
 def rescale_image(image, scale, divisible_by=16):
     """Rescale image and ensure dimensions are divisible by specified value."""
     width, height = image.size
     new_width = int(width * scale)
     new_height = int(height * scale)
@@ -150,43 +152,36 @@ def generate_image(
     guidance_scale=1.0,
     seed=42,
     randomize_seed=True,
     progress=gr.Progress(track_tqdm=True)
 ):
-    """Generate image with optional ControlNet guidance."""
     if not prompt.strip():
         raise gr.Error("Please enter a prompt to generate an image.")
-    # Set seed
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device).manual_seed(seed)
-    # Basic generation (no control image)
-    if input_image is None or not CONTROLNET_AVAILABLE:
-        if input_image is not None and not CONTROLNET_AVAILABLE:
-            gr.Warning("ControlNet not available. Generating without control image.")
-        progress(0.1, desc="Generating image...")
-        result = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt if negative_prompt else None,
-            height=1024,
-            width=1024,
-            num_inference_steps=num_inference_steps,
-            guidance_scale=0.0 if not CONTROLNET_AVAILABLE else guidance_scale,
-            generator=generator,
-        )
-        image = result.images[0]
-        progress(1.0, desc="Complete!")
-        return image, seed, None
-    # ControlNet generation
-    progress(0.1, desc="Processing control image...")
-    # Map control mode to processor
     processor_map = {
         'Canny': 'canny',
         'HED': 'softedge_hed',
@@ -194,49 +189,56 @@ def generate_image(
         'MLSD': 'mlsd',
         'Pose': 'openpose_full'
     }
     processor_id = processor_map.get(control_mode, 'canny')
-    processor = Processor(processor_id)
-    # Process control image
-    control_image, width, height = rescale_image(input_image, image_scale, 16)
-    control_image_1024 = control_image.resize((1024, 1024))
-    progress(0.3, desc=f"Applying {control_mode} detection...")
-    control_image_processed = processor(control_image_1024, to_pil=True)
-    control_image_processed = control_image_processed.resize((width, height))
-    # Convert to latent
-    progress(0.5, desc="Converting to latent space...")
-    control_image_torch = get_image_latent(
-        control_image_processed,
         sample_size=[height, width]
     )[:, :, 0]
-    # Generate with control
-    progress(0.6, desc="Generating controlled image...")
     try:
         result = pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt if negative_prompt else None,
             height=height,
             width=width,
             generator=generator,
             guidance_scale=guidance_scale,
-            control_image=control_image_torch,
             num_inference_steps=num_inference_steps,
             control_context_scale=control_context_scale,
         )
         image = result.images[0]
         progress(1.0, desc="Complete!")
-        return image, seed, control_image_processed
     except Exception as e:
         raise gr.Error(f"Generation failed: {str(e)}")
-# Apple-style CSS
 apple_css = """
 .gradio-container {
     max-width: 1200px !important;
@@ -244,269 +246,127 @@ apple_css = """
     padding: 48px 20px !important;
     font-family: -apple-system, BlinkMacSystemFont, 'Inter', 'Segoe UI', sans-serif !important;
 }
-.header-container {
-    text-align: center;
-    margin-bottom: 48px;
-}
 .main-title {
-    font-size: 56px !important;
-    font-weight: 600 !important;
-    letter-spacing: -0.02em !important;
-    color: #1d1d1f !important;
     margin: 0 0 12px 0 !important;
 }
 .subtitle {
-    font-size: 21px !important;
-    color: #6e6e73 !important;
     margin: 0 0 24px 0 !important;
 }
 .info-badge {
-    display: inline-block;
-    background: #0071e3;
-    color: white;
-    padding: 6px 16px;
-    border-radius: 20px;
-    font-size: 14px;
-    font-weight: 500;
-    margin-bottom: 16px;
 }
 textarea {
-    font-size: 17px !important;
-    border-radius: 12px !important;
-    border: 1px solid #d2d2d7 !important;
-    padding: 12px 16px !important;
 }
 textarea:focus {
-    border-color: #0071e3 !important;
-    box-shadow: 0 0 0 4px rgba(0, 113, 227, 0.15) !important;
     outline: none !important;
 }
 button.primary {
-    font-size: 17px !important;
-    padding: 12px 32px !important;
-    border-radius: 980px !important;
-    background: #0071e3 !important;
-    border: none !important;
-    color: #ffffff !important;
     transition: all 0.2s ease !important;
 }
 button.primary:hover {
-    background: #0077ed !important;
-    transform: scale(1.02) !important;
 }
 .footer-text {
-    text-align: center;
-    margin-top: 48px;
-    font-size: 14px !important;
     color: #86868b !important;
 }
-@media (max-width: 768px) {
-    .main-title { font-size: 40px !important; }
-    .subtitle { font-size: 19px !important; }
-}
 """
-# Create interface
-with gr.Blocks(title="Z-Image Turbo with ControlNet") as demo:
-    # Header
-    gr.HTML(f"""
         <div class="header-container">
-            <div class="info-badge">{'✓ ControlNet Enabled' if CONTROLNET_AVAILABLE else '⚠ Basic Mode'}</div>
             <h1 class="main-title">Z-Image Turbo</h1>
-            <p class="subtitle">Transform your ideas into stunning visuals with AI-powered control</p>
         </div>
     """)
     with gr.Row():
-        # Left column - Inputs
         with gr.Column(scale=1):
             prompt = gr.Textbox(
                 label="Prompt",
                 placeholder="Describe the image you want to create...",
-                lines=3,
-                max_lines=6,
             )
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
-                placeholder="What to avoid in the image...",
                 value="blurry, ugly, bad quality",
-                lines=2,
             )
-            if CONTROLNET_AVAILABLE:
-                input_image = gr.Image(
-                    label="Control Image (Optional)",
-                    type="pil",
-                    sources=['upload', 'clipboard'],
-                    height=290,
-                )
-                control_mode = gr.Radio(
-                    choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
-                    value="Canny",
-                    label="Control Mode",
-                    info="Choose edge/depth/pose detection method"
-                )
             with gr.Accordion("Advanced Settings", open=False):
-                num_inference_steps = gr.Slider(
-                    label="Inference Steps",
-                    minimum=1,
-                    maximum=30,
-                    step=1,
-                    value=9,
-                    info="More steps = higher quality but slower"
-                )
-                guidance_scale = gr.Slider(
-                    label="Guidance Scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=1.0,
-                    info="How closely to follow the prompt"
-                )
-                if CONTROLNET_AVAILABLE:
-                    control_context_scale = gr.Slider(
-                        label="Control Strength",
-                        minimum=0.0,
-                        maximum=1.0,
-                        step=0.01,
-                        value=0.75,
-                        info="0.65-0.80 recommended for best results"
-                    )
-                    image_scale = gr.Slider(
-                        label="Image Scale",
-                        minimum=0.5,
-                        maximum=2.0,
-                        step=0.1,
-                        value=1.0,
-                        info="Resize control image"
-                    )
-                seed = gr.Slider(
-                    label="Seed",
-                    minimum=0,
-                    maximum=MAX_SEED,
-                    step=1,
-                    value=42,
-                )
-                randomize_seed = gr.Checkbox(
-                    label="Randomize Seed",
-                    value=True
-                )
-            generate_btn = gr.Button(
-                "Generate Image",
-                variant="primary",
-                size="lg",
-                elem_classes="primary"
-            )
-        # Right column - Outputs
         with gr.Column(scale=1):
-            output_image = gr.Image(
-                label="Generated Image",
-                type="pil",
-                show_label=True,
-            )
-            seed_output = gr.Number(
-                label="Used Seed",
-                precision=0,
-            )
-            if CONTROLNET_AVAILABLE:
-                with gr.Accordion("Preprocessor Output", open=False):
-                    control_output = gr.Image(
-                        label="Processed Control Image",
-                        type="pil",
-                    )
     # Footer
     gr.HTML("""
         <div class="footer-text">
-            <p style="margin-bottom: 8px;">Powered by Z-Image Turbo from Tongyi-MAI</p>
-            <p style="font-size: 13px;">
-                <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
-                    Model Card
-                </a> •
-                <a href="https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
-                    ControlNet
-                </a> •
-                <a href="https://github.com/aigc-apps/VideoX-Fun" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
-                    GitHub
-                </a>
-            </p>
         </div>
     """)
-    # Event handlers
-    generate_inputs = [
-        prompt,
-        negative_prompt,
-    ]
-    if CONTROLNET_AVAILABLE:
-        generate_inputs.extend([
-            input_image,
-            control_mode,
-            control_context_scale,
-            image_scale,
-        ])
-        generate_inputs.extend([
-            num_inference_steps,
-            guidance_scale,
-            seed,
-            randomize_seed,
-        ])
-        generate_outputs = [output_image, seed_output, control_output]
-    else:
-        # Add None placeholders for missing ControlNet params
-        generate_inputs.extend([
-            gr.State(None),  # input_image
-            gr.State("Canny"),  # control_mode
-            gr.State(0.75),  # control_context_scale
-            gr.State(1.0),  # image_scale
-        ])
-        generate_inputs.extend([
-            num_inference_steps,
-            guidance_scale,
-            seed,
-            randomize_seed,
-        ])
-        generate_outputs = [output_image, seed_output, gr.State(None)]
     generate_btn.click(
         fn=generate_image,
-        inputs=generate_inputs,
-        outputs=generate_outputs,
-    )
-    prompt.submit(
-        fn=generate_image,
-        inputs=generate_inputs,
-        outputs=generate_outputs,
     )
 if __name__ == "__main__":
-    demo.launch(
-        share=False,
-        show_error=True,
-        css=apple_css,
-    )

 import torch
 import numpy as np
 import random
+import time
+import os
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
 from transformers import AutoTokenizer, Qwen3ForCausalLM
 from controlnet_aux.processor import Processor
 from PIL import Image
+from safetensors.torch import load_file
+# Import pipeline and model
+# Ensure videox_fun is in your python path
+from videox_fun.pipeline import ZImageControlPipeline
+from videox_fun.models import ZImageControlTransformer2DModel
+# Try to import prompt utility, define fallback if missing
 try:
+    from utils.prompt_utils import polish_prompt
 except ImportError:
+    print("utils.prompt_utils not found. Using passthrough for prompt polishing.")
+    def polish_prompt(prompt):
+        return prompt
+# Configuration
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1280
+# Paths
+MODEL_LOCAL = "models/Z-Image-Turbo/" # Local path or HuggingFace ID
+# We prioritize the local safetensors file for ControlNet weights
+CONTROLNET_WEIGHTS = "models/Z-Image-Turbo-Fun-Controlnet-Union.safetensors"
 print("Loading Z-Image Turbo model...")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 weight_dtype = torch.bfloat16
+# 1. Load Transformer with Control Config
+print("Initializing Transformer...")
+transformer = ZImageControlTransformer2DModel.from_pretrained(
+    MODEL_LOCAL,
+    subfolder="transformer",
+    transformer_additional_kwargs={
+        "control_layers_places": [0, 5, 10, 15, 20, 25],
+        "control_in_dim": 16
+    },
+).to(device, weight_dtype)
+# 2. Load ControlNet Weights manually
+if os.path.exists(CONTROLNET_WEIGHTS):
+    print(f"Loading ControlNet weights from {CONTROLNET_WEIGHTS}")
     try:
+        state_dict = load_file(CONTROLNET_WEIGHTS)
+        # Handle potential nesting of state_dict
+        state_dict = state_dict.get("state_dict", state_dict)
+        m, u = transformer.load_state_dict(state_dict, strict=False)
+        print(f"ControlNet Weights Loaded - Missing keys: {len(m)}, Unexpected keys: {len(u)}")
     except Exception as e:
+        print(f"Error loading ControlNet weights: {e}")
 else:
+    print(f"Warning: ControlNet weights not found at {CONTROLNET_WEIGHTS}. Trying to run without them or using base weights.")
+# 3. Load VAE, Tokenizer, Encoder, Scheduler
+print("Loading core components...")
+vae = AutoencoderKL.from_pretrained(
+    MODEL_LOCAL,
+    subfolder="vae",
+).to(device, weight_dtype)
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_LOCAL,
+    subfolder="tokenizer"
+)
+text_encoder = Qwen3ForCausalLM.from_pretrained(
+    MODEL_LOCAL,
+    subfolder="text_encoder",
+    torch_dtype=weight_dtype,
+).to(device)
+scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
+    MODEL_LOCAL,
+    subfolder="scheduler"
+)
+# 4. Assemble Pipeline
+pipe = ZImageControlPipeline(
+    vae=vae,
+    tokenizer=tokenizer,
+    text_encoder=text_encoder,
+    transformer=transformer,
+    scheduler=scheduler,
+)
+pipe.to(device, weight_dtype)
 print(f"Model loaded successfully on {device}!")
+# --- Helper Functions ---
 def rescale_image(image, scale, divisible_by=16):
     """Rescale image and ensure dimensions are divisible by specified value."""
+    if image is None:
+        return None, 1024, 1024
     width, height = image.size
     new_width = int(width * scale)
     new_height = int(height * scale)
     guidance_scale=1.0,
     seed=42,
     randomize_seed=True,
+    is_polish_prompt=True,
     progress=gr.Progress(track_tqdm=True)
 ):
+    timestamp = time.time()
     if not prompt.strip():
         raise gr.Error("Please enter a prompt to generate an image.")
+    # 1. Polish Prompt
+    final_prompt = prompt
+    if is_polish_prompt:
+        progress(0.1, desc="Polishing prompt...")
+        try:
+            final_prompt = polish_prompt(prompt)
+        except Exception as e:
+            print(f"Prompt polish failed: {e}")
+            final_prompt = prompt
+    # 2. Set Seed
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device).manual_seed(seed)
+    # 3. Process Control Image
+    if input_image is None:
+        raise gr.Error("Please upload a control image.")
+    progress(0.2, desc=f"Processing {control_mode}...")
+    # Map control mode to processor ID
     processor_map = {
         'Canny': 'canny',
         'HED': 'softedge_hed',
         'MLSD': 'mlsd',
         'Pose': 'openpose_full'
     }
     processor_id = processor_map.get(control_mode, 'canny')
+    # Initialize processor
+    try:
+        processor = Processor(processor_id)
+    except Exception as e:
+        print(f"Failed to load processor {processor_id}, falling back to Canny. Error: {e}")
+        processor = Processor('canny')
+    # Resize input for processing
+    control_image_rescaled, width, height = rescale_image(input_image, image_scale, 16)
+    # Run Processor (requires resizing to 1024x1024 typically for best results with these models, then back)
+    temp_image = control_image_rescaled.resize((1024, 1024))
+    processed_image_pil = processor(temp_image, to_pil=True)
+    processed_image_pil = processed_image_pil.resize((width, height))
+    # Convert to Latent
+    progress(0.4, desc="Encoding control image...")
+    control_image_latent = get_image_latent(
+        processed_image_pil,
         sample_size=[height, width]
     )[:, :, 0]
+    # 4. Generate
+    progress(0.5, desc="Generating...")
     try:
         result = pipe(
+            prompt=final_prompt,
+            negative_prompt=negative_prompt,
             height=height,
             width=width,
             generator=generator,
             guidance_scale=guidance_scale,
+            control_image=control_image_latent,
             num_inference_steps=num_inference_steps,
             control_context_scale=control_context_scale,
         )
         image = result.images[0]
         progress(1.0, desc="Complete!")
+        return image, seed, processed_image_pil, final_prompt
     except Exception as e:
         raise gr.Error(f"Generation failed: {str(e)}")
+# --- UI Configuration (Apple Style) ---
 apple_css = """
 .gradio-container {
     max-width: 1200px !important;
     padding: 48px 20px !important;
     font-family: -apple-system, BlinkMacSystemFont, 'Inter', 'Segoe UI', sans-serif !important;
 }
+.header-container { text-align: center; margin-bottom: 48px; }
 .main-title {
+    font-size: 56px !important; font-weight: 600 !important;
+    letter-spacing: -0.02em !important; color: #1d1d1f !important;
     margin: 0 0 12px 0 !important;
 }
 .subtitle {
+    font-size: 21px !important; color: #6e6e73 !important;
     margin: 0 0 24px 0 !important;
 }
 .info-badge {
+    display: inline-block; background: #0071e3; color: white;
+    padding: 6px 16px; border-radius: 20px; font-size: 14px;
+    font-weight: 500; margin-bottom: 16px;
 }
 textarea {
+    font-size: 17px !important; border-radius: 12px !important;
+    border: 1px solid #d2d2d7 !important; padding: 12px 16px !important;
 }
 textarea:focus {
+    border-color: #0071e3 !important; box-shadow: 0 0 0 4px rgba(0, 113, 227, 0.15) !important;
     outline: none !important;
 }
 button.primary {
+    font-size: 17px !important; padding: 12px 32px !important;
+    border-radius: 980px !important; background: #0071e3 !important;
+    border: none !important; color: #ffffff !important;
     transition: all 0.2s ease !important;
 }
 button.primary:hover {
+    background: #0077ed !important; transform: scale(1.02) !important;
 }
 .footer-text {
+    text-align: center; margin-top: 48px; font-size: 14px !important;
     color: #86868b !important;
 }
 """
+with gr.Blocks(title="Z-Image Turbo ControlNet", css=apple_css) as demo:
+    gr.HTML("""
         <div class="header-container">
+            <div class="info-badge">✓ ControlNet Union</div>
             <h1 class="main-title">Z-Image Turbo</h1>
+            <p class="subtitle">Multi-Control Generation with LLM Prompt Polishing</p>
         </div>
     """)
     with gr.Row():
+        # Left Input Column
         with gr.Column(scale=1):
             prompt = gr.Textbox(
                 label="Prompt",
                 placeholder="Describe the image you want to create...",
+                lines=3
             )
+            with gr.Row():
+                is_polish_prompt = gr.Checkbox(label="Polish Prompt with LLM", value=True)
+                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
             negative_prompt = gr.Textbox(
                 label="Negative Prompt",
                 value="blurry, ugly, bad quality",
+                lines=1
             )
+            input_image = gr.Image(
+                label="Control Image (Required)",
+                type="pil",
+                sources=['upload', 'clipboard'],
+                height=300
+            )
+            control_mode = gr.Radio(
+                choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
+                value="Canny",
+                label="Control Mode",
+                info="Select the type of structure to extract"
+            )
             with gr.Accordion("Advanced Settings", open=False):
+                with gr.Row():
+                    num_inference_steps = gr.Slider(label="Steps", minimum=1, maximum=30, step=1, value=9)
+                    guidance_scale = gr.Slider(label="Guidance", minimum=0.0, maximum=10.0, step=0.1, value=1.0)
+                with gr.Row():
+                    control_context_scale = gr.Slider(label="Control Strength", minimum=0.0, maximum=1.0, step=0.01, value=0.75)
+                    image_scale = gr.Slider(label="Image Scale", minimum=0.5, maximum=2.0, step=0.1, value=1.0)
+                seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
+            generate_btn = gr.Button("Generate Image", variant="primary", elem_classes="primary")
+        # Right Output Column
         with gr.Column(scale=1):
+            output_image = gr.Image(label="Generated Image", type="pil")
+            with gr.Accordion("Details & Debug", open=True):
+                polished_prompt_output = gr.Textbox(label="Actual Polished Prompt", interactive=False, lines=2)
+                with gr.Row():
+                    seed_output = gr.Number(label="Seed Used", precision=0)
+                control_output = gr.Image(label="Preprocessor Output", type="pil")
     # Footer
     gr.HTML("""
         <div class="footer-text">
+            Powered by Z-Image Turbo • VideoX-Fun • Tongyi-MAI
         </div>
     """)
+    # Event Wiring
     generate_btn.click(
         fn=generate_image,
+        inputs=[
+            prompt, negative_prompt, input_image, control_mode,
+            control_context_scale, image_scale, num_inference_steps,
+            guidance_scale, seed, randomize_seed, is_polish_prompt
+        ],
+        outputs=[output_image, seed_output, control_output, polished_prompt_output]
     )
 if __name__ == "__main__":
+    demo.launch(share=False)