akhaliq HF Staff commited on
Commit
1fcab49
·
verified ·
1 Parent(s): 939bf35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +381 -230
app.py CHANGED
@@ -1,90 +1,250 @@
1
  import spaces
2
  import gradio as gr
3
  import torch
4
- from diffusers import ZImagePipeline
5
- import os
6
- from pathlib import Path
 
 
 
7
 
8
- # Load the model directly at startup
9
- print("Loading Z-Image Turbo model...")
10
- print("This may take a few minutes on first run while the model downloads...")
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
 
 
12
 
13
- # Load the pipeline with optimal settings
14
- pipe = ZImagePipeline.from_pretrained(
15
- "Tongyi-MAI/Z-Image-Turbo",
16
- torch_dtype=torch.bfloat16,
17
- low_cpu_mem_usage=False,
18
- )
19
-
20
- # Move to GPU if available
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
- pipe.to(device)
23
- print(f"Model loaded on {device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- print("Model loaded successfully!")
 
 
 
 
26
 
27
  @spaces.GPU()
28
  def generate_image(
29
  prompt,
 
 
 
 
 
 
 
 
 
30
  progress=gr.Progress(track_tqdm=True)
31
  ):
32
- """
33
- Generate an image using Z-Image Turbo model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- Args:
36
- prompt: Text description of the desired image
37
 
38
- Returns:
39
- Generated PIL Image
40
- """
41
- global pipe
 
 
 
 
42
 
43
- if pipe is None:
44
- raise gr.Error("Model failed to load on startup. Please restart the application.")
45
 
46
- if not prompt.strip():
47
- raise gr.Error("Please enter a prompt to generate an image.")
 
48
 
49
- # Determine device
50
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
51
 
52
- # Set random seed for reproducibility
53
- generator = torch.Generator(device).manual_seed(42)
 
 
 
 
54
 
55
- # Generate the image with optimal settings
56
- progress(0.1, desc="Generating image...")
57
 
58
  try:
59
  result = pipe(
60
  prompt=prompt,
61
- negative_prompt=None,
62
- height=1024,
63
- width=1024,
64
- num_inference_steps=9,
65
- guidance_scale=0.0,
66
  generator=generator,
 
 
 
 
67
  )
68
 
69
  image = result.images[0]
70
  progress(1.0, desc="Complete!")
 
71
 
72
- return image
73
-
74
  except Exception as e:
75
  raise gr.Error(f"Generation failed: {str(e)}")
76
 
77
  # Apple-style CSS
78
  apple_css = """
79
- /* Global Styles */
80
  .gradio-container {
81
- max-width: 980px !important;
82
  margin: 0 auto !important;
83
  padding: 48px 20px !important;
84
- font-family: -apple-system, BlinkMacSystemFont, 'Inter', 'Segoe UI', 'Roboto', sans-serif !important;
85
  }
86
 
87
- /* Header */
88
  .header-container {
89
  text-align: center;
90
  margin-bottom: 48px;
@@ -94,52 +254,32 @@ apple_css = """
94
  font-size: 56px !important;
95
  font-weight: 600 !important;
96
  letter-spacing: -0.02em !important;
97
- line-height: 1.07 !important;
98
  color: #1d1d1f !important;
99
  margin: 0 0 12px 0 !important;
100
  }
101
 
102
  .subtitle {
103
  font-size: 21px !important;
104
- font-weight: 400 !important;
105
- line-height: 1.38 !important;
106
  color: #6e6e73 !important;
107
  margin: 0 0 24px 0 !important;
108
  }
109
 
110
- .attribution-link {
111
  display: inline-block;
112
- font-size: 14px !important;
113
- color: #0071e3 !important;
114
- text-decoration: none !important;
115
- font-weight: 400 !important;
116
- transition: color 0.2s ease !important;
117
- }
118
-
119
- .attribution-link:hover {
120
- color: #0077ed !important;
121
- text-decoration: underline !important;
122
- }
123
-
124
- /* Input Section */
125
- .input-section {
126
- background: #ffffff;
127
- border-radius: 18px;
128
- padding: 32px;
129
- margin-bottom: 24px;
130
- box-shadow: 0 2px 12px rgba(0, 0, 0, 0.08);
131
  }
132
 
133
- /* Textbox */
134
  textarea {
135
  font-size: 17px !important;
136
- line-height: 1.47 !important;
137
  border-radius: 12px !important;
138
  border: 1px solid #d2d2d7 !important;
139
  padding: 12px 16px !important;
140
- transition: all 0.2s ease !important;
141
- background: #ffffff !important;
142
- font-family: -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
143
  }
144
 
145
  textarea:focus {
@@ -148,23 +288,14 @@ textarea:focus {
148
  outline: none !important;
149
  }
150
 
151
- textarea::placeholder {
152
- color: #86868b !important;
153
- }
154
-
155
- /* Button */
156
  button.primary {
157
  font-size: 17px !important;
158
- font-weight: 400 !important;
159
  padding: 12px 32px !important;
160
  border-radius: 980px !important;
161
  background: #0071e3 !important;
162
  border: none !important;
163
  color: #ffffff !important;
164
- min-height: 44px !important;
165
  transition: all 0.2s ease !important;
166
- letter-spacing: -0.01em !important;
167
- cursor: pointer !important;
168
  }
169
 
170
  button.primary:hover {
@@ -172,189 +303,209 @@ button.primary:hover {
172
  transform: scale(1.02) !important;
173
  }
174
 
175
- button.primary:active {
176
- transform: scale(0.98) !important;
177
- }
178
-
179
- /* Output Section */
180
- .output-section {
181
- background: #ffffff;
182
- border-radius: 18px;
183
- padding: 32px;
184
- box-shadow: 0 2px 12px rgba(0, 0, 0, 0.08);
185
- overflow: hidden;
186
- }
187
-
188
- .output-section img {
189
- border-radius: 12px !important;
190
- width: 100% !important;
191
- height: auto !important;
192
- }
193
-
194
- /* Footer */
195
  .footer-text {
196
  text-align: center;
197
  margin-top: 48px;
198
  font-size: 14px !important;
199
  color: #86868b !important;
200
- line-height: 1.43 !important;
201
- }
202
-
203
- /* Progress */
204
- .progress-bar {
205
- background: #0071e3 !important;
206
- border-radius: 4px !important;
207
- }
208
-
209
- /* Dark Mode */
210
- .dark .main-title {
211
- color: #f5f5f7 !important;
212
- }
213
-
214
- .dark .subtitle {
215
- color: #a1a1a6 !important;
216
- }
217
-
218
- .dark .input-section,
219
- .dark .output-section {
220
- background: #1d1d1f;
221
- box-shadow: 0 2px 12px rgba(0, 0, 0, 0.4);
222
- }
223
-
224
- .dark textarea {
225
- background: #1d1d1f !important;
226
- border-color: #424245 !important;
227
- color: #f5f5f7 !important;
228
- }
229
-
230
- .dark textarea::placeholder {
231
- color: #86868b !important;
232
- }
233
-
234
- /* Responsive */
235
- @media (max-width: 734px) {
236
- .main-title {
237
- font-size: 40px !important;
238
- }
239
-
240
- .subtitle {
241
- font-size: 19px !important;
242
- }
243
-
244
- .gradio-container {
245
- padding: 32px 16px !important;
246
- }
247
-
248
- .input-section,
249
- .output-section {
250
- padding: 24px !important;
251
- }
252
  }
253
 
254
- /* Remove default Gradio styling */
255
- .contain {
256
- padding: 0 !important;
257
  }
258
  """
259
 
260
- # Create the interface
261
- with gr.Blocks(
262
- title="Z-Image Turbo",
263
- fill_height=False,
264
- ) as demo:
265
 
266
  # Header
267
- gr.HTML("""
268
  <div class="header-container">
 
269
  <h1 class="main-title">Z-Image Turbo</h1>
270
- <p class="subtitle">Transform your ideas into stunning visuals with AI</p>
271
- <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" class="attribution-link">
272
- Built with anycoder
273
- </a>
274
  </div>
275
  """)
276
 
277
- # Input Section
278
- with gr.Column(elem_classes="input-section"):
279
- prompt = gr.Textbox(
280
- placeholder="Describe the image you want to create...",
281
- lines=3,
282
- max_lines=6,
283
- label="",
284
- show_label=False,
285
- container=False,
286
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- generate_btn = gr.Button(
289
- "Generate",
290
- variant="primary",
291
- size="lg",
292
- elem_classes="primary"
293
- )
294
-
295
- # Output Section
296
- with gr.Column(elem_classes="output-section"):
297
- output_image = gr.Image(
298
- type="pil",
299
- label="",
300
- show_label=False,
301
- container=False,
302
- buttons=["download"],
303
- )
 
 
 
304
 
305
  # Footer
306
  gr.HTML("""
307
  <div class="footer-text">
308
- <p>Powered by Z-Image Turbo from Tongyi-MAI</p>
 
 
 
 
 
 
 
 
 
 
 
309
  </div>
310
  """)
311
 
312
  # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  generate_btn.click(
314
  fn=generate_image,
315
- inputs=prompt,
316
- outputs=output_image,
317
- api_visibility="public"
318
  )
319
 
320
  prompt.submit(
321
  fn=generate_image,
322
- inputs=prompt,
323
- outputs=output_image,
324
- api_visibility="public"
325
  )
326
 
327
  if __name__ == "__main__":
328
  demo.launch(
329
  share=False,
330
  show_error=True,
331
- theme=gr.themes.Soft(
332
- primary_hue=gr.themes.colors.blue,
333
- secondary_hue=gr.themes.colors.slate,
334
- neutral_hue=gr.themes.colors.gray,
335
- spacing_size=gr.themes.sizes.spacing_lg,
336
- radius_size=gr.themes.sizes.radius_lg,
337
- text_size=gr.themes.sizes.text_md,
338
- font=[gr.themes.GoogleFont("Inter"), "SF Pro Display", "-apple-system", "BlinkMacSystemFont", "system-ui", "sans-serif"],
339
- font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "SF Mono", "ui-monospace", "monospace"],
340
- ).set(
341
- body_background_fill='#f5f5f7',
342
- body_background_fill_dark='#000000',
343
- button_primary_background_fill='#0071e3',
344
- button_primary_background_fill_hover='#0077ed',
345
- button_primary_text_color='#ffffff',
346
- block_background_fill='#ffffff',
347
- block_background_fill_dark='#1d1d1f',
348
- block_border_width='0px',
349
- block_shadow='0 2px 12px rgba(0, 0, 0, 0.08)',
350
- block_shadow_dark='0 2px 12px rgba(0, 0, 0, 0.4)',
351
- input_background_fill='#ffffff',
352
- input_background_fill_dark='#1d1d1f',
353
- input_border_width='1px',
354
- input_border_color='#d2d2d7',
355
- input_border_color_dark='#424245',
356
- input_shadow='none',
357
- input_shadow_focus='0 0 0 4px rgba(0, 113, 227, 0.15)',
358
- ),
359
- css=apple_css,
360
  )
 
1
  import spaces
2
  import gradio as gr
3
  import torch
4
+ import numpy as np
5
+ import random
6
+ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
7
+ from transformers import AutoTokenizer, Qwen3ForCausalLM
8
+ from controlnet_aux.processor import Processor
9
+ from PIL import Image
10
 
11
+ # Try to import ControlNet components, fall back to basic pipeline if unavailable
12
+ try:
13
+ from videox_fun.pipeline import ZImageControlPipeline
14
+ from videox_fun.models import ZImageControlTransformer2DModel
15
+ CONTROLNET_AVAILABLE = True
16
+ except ImportError:
17
+ from diffusers import ZImagePipeline
18
+ CONTROLNET_AVAILABLE = False
19
+ print("ControlNet components not available. Running in basic mode.")
20
+
21
+ MAX_SEED = np.iinfo(np.int32).max
22
+ MAX_IMAGE_SIZE = 1280
23
+
24
+ # Configuration
25
+ MODEL_REPO = "Tongyi-MAI/Z-Image-Turbo"
26
+ CONTROLNET_WEIGHTS = "Z-Image-Turbo-Fun-Controlnet-Union.safetensors" # Optional local path
27
 
28
+ print("Loading Z-Image Turbo model...")
29
+ print("This may take a few minutes on first run...")
30
 
 
 
 
 
 
 
 
 
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
+ weight_dtype = torch.bfloat16
33
+
34
+ # Load models
35
+ if CONTROLNET_AVAILABLE:
36
+ print("Loading with ControlNet support...")
37
+
38
+ # Load transformer with control layers
39
+ transformer = ZImageControlTransformer2DModel.from_pretrained(
40
+ MODEL_REPO,
41
+ subfolder="transformer",
42
+ transformer_additional_kwargs={
43
+ "control_layers_places": [0, 5, 10, 15, 20, 25],
44
+ "control_in_dim": 16
45
+ },
46
+ ).to(device, weight_dtype)
47
+
48
+ # Optionally load ControlNet weights if available
49
+ try:
50
+ from safetensors.torch import load_file
51
+ import os
52
+ if os.path.exists(CONTROLNET_WEIGHTS):
53
+ print(f"Loading ControlNet weights from {CONTROLNET_WEIGHTS}")
54
+ state_dict = load_file(CONTROLNET_WEIGHTS)
55
+ state_dict = state_dict.get("state_dict", state_dict)
56
+ m, u = transformer.load_state_dict(state_dict, strict=False)
57
+ print(f"Loaded ControlNet: {len(m)} missing keys, {len(u)} unexpected keys")
58
+ except Exception as e:
59
+ print(f"Could not load ControlNet weights: {e}")
60
+
61
+ # Load other components
62
+ vae = AutoencoderKL.from_pretrained(
63
+ MODEL_REPO,
64
+ subfolder="vae",
65
+ ).to(device, weight_dtype)
66
+
67
+ tokenizer = AutoTokenizer.from_pretrained(
68
+ MODEL_REPO,
69
+ subfolder="tokenizer"
70
+ )
71
+
72
+ text_encoder = Qwen3ForCausalLM.from_pretrained(
73
+ MODEL_REPO,
74
+ subfolder="text_encoder",
75
+ torch_dtype=weight_dtype,
76
+ ).to(device)
77
+
78
+ scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
79
+ MODEL_REPO,
80
+ subfolder="scheduler"
81
+ )
82
+
83
+ pipe = ZImageControlPipeline(
84
+ vae=vae,
85
+ tokenizer=tokenizer,
86
+ text_encoder=text_encoder,
87
+ transformer=transformer,
88
+ scheduler=scheduler,
89
+ )
90
+ pipe.to(device, weight_dtype)
91
+
92
+ else:
93
+ print("Loading basic Z-Image Turbo (no ControlNet)...")
94
+ pipe = ZImagePipeline.from_pretrained(
95
+ MODEL_REPO,
96
+ torch_dtype=weight_dtype,
97
+ low_cpu_mem_usage=False,
98
+ )
99
+ pipe.to(device)
100
+
101
+ print(f"Model loaded successfully on {device}!")
102
+
103
+ def rescale_image(image, scale, divisible_by=16):
104
+ """Rescale image and ensure dimensions are divisible by specified value."""
105
+ width, height = image.size
106
+ new_width = int(width * scale)
107
+ new_height = int(height * scale)
108
+
109
+ # Make dimensions divisible by divisible_by
110
+ new_width = (new_width // divisible_by) * divisible_by
111
+ new_height = (new_height // divisible_by) * divisible_by
112
+
113
+ # Clamp to max size
114
+ if new_width > MAX_IMAGE_SIZE:
115
+ new_width = MAX_IMAGE_SIZE
116
+ if new_height > MAX_IMAGE_SIZE:
117
+ new_height = MAX_IMAGE_SIZE
118
+
119
+ resized = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
120
+ return resized, new_width, new_height
121
+
122
+ def get_image_latent(image, sample_size):
123
+ """Convert PIL image to VAE latent representation."""
124
+ import torchvision.transforms as transforms
125
+
126
+ # Normalize image
127
+ transform = transforms.Compose([
128
+ transforms.ToTensor(),
129
+ transforms.Normalize([0.5], [0.5])
130
+ ])
131
+
132
+ img_tensor = transform(image).unsqueeze(0).unsqueeze(2) # [B, C, 1, H, W]
133
+ img_tensor = img_tensor.to(device, weight_dtype)
134
 
135
+ with torch.no_grad():
136
+ latent = pipe.vae.encode(img_tensor).latent_dist.sample()
137
+ latent = latent * pipe.vae.config.scaling_factor
138
+
139
+ return latent
140
 
141
  @spaces.GPU()
142
  def generate_image(
143
  prompt,
144
+ negative_prompt="blurry, ugly, bad quality",
145
+ input_image=None,
146
+ control_mode="Canny",
147
+ control_context_scale=0.75,
148
+ image_scale=1.0,
149
+ num_inference_steps=9,
150
+ guidance_scale=1.0,
151
+ seed=42,
152
+ randomize_seed=True,
153
  progress=gr.Progress(track_tqdm=True)
154
  ):
155
+ """Generate image with optional ControlNet guidance."""
156
+
157
+ if not prompt.strip():
158
+ raise gr.Error("Please enter a prompt to generate an image.")
159
+
160
+ # Set seed
161
+ if randomize_seed:
162
+ seed = random.randint(0, MAX_SEED)
163
+ generator = torch.Generator(device).manual_seed(seed)
164
+
165
+ # Basic generation (no control image)
166
+ if input_image is None or not CONTROLNET_AVAILABLE:
167
+ if input_image is not None and not CONTROLNET_AVAILABLE:
168
+ gr.Warning("ControlNet not available. Generating without control image.")
169
+
170
+ progress(0.1, desc="Generating image...")
171
+
172
+ result = pipe(
173
+ prompt=prompt,
174
+ negative_prompt=negative_prompt if negative_prompt else None,
175
+ height=1024,
176
+ width=1024,
177
+ num_inference_steps=num_inference_steps,
178
+ guidance_scale=0.0 if not CONTROLNET_AVAILABLE else guidance_scale,
179
+ generator=generator,
180
+ )
181
+
182
+ image = result.images[0]
183
+ progress(1.0, desc="Complete!")
184
+ return image, seed, None
185
 
186
+ # ControlNet generation
187
+ progress(0.1, desc="Processing control image...")
188
 
189
+ # Map control mode to processor
190
+ processor_map = {
191
+ 'Canny': 'canny',
192
+ 'HED': 'softedge_hed',
193
+ 'Depth': 'depth_midas',
194
+ 'MLSD': 'mlsd',
195
+ 'Pose': 'openpose_full'
196
+ }
197
 
198
+ processor_id = processor_map.get(control_mode, 'canny')
199
+ processor = Processor(processor_id)
200
 
201
+ # Process control image
202
+ control_image, width, height = rescale_image(input_image, image_scale, 16)
203
+ control_image_1024 = control_image.resize((1024, 1024))
204
 
205
+ progress(0.3, desc=f"Applying {control_mode} detection...")
206
+ control_image_processed = processor(control_image_1024, to_pil=True)
207
+ control_image_processed = control_image_processed.resize((width, height))
208
 
209
+ # Convert to latent
210
+ progress(0.5, desc="Converting to latent space...")
211
+ control_image_torch = get_image_latent(
212
+ control_image_processed,
213
+ sample_size=[height, width]
214
+ )[:, :, 0]
215
 
216
+ # Generate with control
217
+ progress(0.6, desc="Generating controlled image...")
218
 
219
  try:
220
  result = pipe(
221
  prompt=prompt,
222
+ negative_prompt=negative_prompt if negative_prompt else None,
223
+ height=height,
224
+ width=width,
 
 
225
  generator=generator,
226
+ guidance_scale=guidance_scale,
227
+ control_image=control_image_torch,
228
+ num_inference_steps=num_inference_steps,
229
+ control_context_scale=control_context_scale,
230
  )
231
 
232
  image = result.images[0]
233
  progress(1.0, desc="Complete!")
234
+ return image, seed, control_image_processed
235
 
 
 
236
  except Exception as e:
237
  raise gr.Error(f"Generation failed: {str(e)}")
238
 
239
  # Apple-style CSS
240
  apple_css = """
 
241
  .gradio-container {
242
+ max-width: 1200px !important;
243
  margin: 0 auto !important;
244
  padding: 48px 20px !important;
245
+ font-family: -apple-system, BlinkMacSystemFont, 'Inter', 'Segoe UI', sans-serif !important;
246
  }
247
 
 
248
  .header-container {
249
  text-align: center;
250
  margin-bottom: 48px;
 
254
  font-size: 56px !important;
255
  font-weight: 600 !important;
256
  letter-spacing: -0.02em !important;
 
257
  color: #1d1d1f !important;
258
  margin: 0 0 12px 0 !important;
259
  }
260
 
261
  .subtitle {
262
  font-size: 21px !important;
 
 
263
  color: #6e6e73 !important;
264
  margin: 0 0 24px 0 !important;
265
  }
266
 
267
+ .info-badge {
268
  display: inline-block;
269
+ background: #0071e3;
270
+ color: white;
271
+ padding: 6px 16px;
272
+ border-radius: 20px;
273
+ font-size: 14px;
274
+ font-weight: 500;
275
+ margin-bottom: 16px;
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
 
 
278
  textarea {
279
  font-size: 17px !important;
 
280
  border-radius: 12px !important;
281
  border: 1px solid #d2d2d7 !important;
282
  padding: 12px 16px !important;
 
 
 
283
  }
284
 
285
  textarea:focus {
 
288
  outline: none !important;
289
  }
290
 
 
 
 
 
 
291
  button.primary {
292
  font-size: 17px !important;
 
293
  padding: 12px 32px !important;
294
  border-radius: 980px !important;
295
  background: #0071e3 !important;
296
  border: none !important;
297
  color: #ffffff !important;
 
298
  transition: all 0.2s ease !important;
 
 
299
  }
300
 
301
  button.primary:hover {
 
303
  transform: scale(1.02) !important;
304
  }
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  .footer-text {
307
  text-align: center;
308
  margin-top: 48px;
309
  font-size: 14px !important;
310
  color: #86868b !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  }
312
 
313
+ @media (max-width: 768px) {
314
+ .main-title { font-size: 40px !important; }
315
+ .subtitle { font-size: 19px !important; }
316
  }
317
  """
318
 
319
+ # Create interface
320
+ with gr.Blocks(css=apple_css, title="Z-Image Turbo with ControlNet") as demo:
 
 
 
321
 
322
  # Header
323
+ gr.HTML(f"""
324
  <div class="header-container">
325
+ <div class="info-badge">{'✓ ControlNet Enabled' if CONTROLNET_AVAILABLE else '⚠ Basic Mode'}</div>
326
  <h1 class="main-title">Z-Image Turbo</h1>
327
+ <p class="subtitle">Transform your ideas into stunning visuals with AI-powered control</p>
 
 
 
328
  </div>
329
  """)
330
 
331
+ with gr.Row():
332
+ # Left column - Inputs
333
+ with gr.Column(scale=1):
334
+ prompt = gr.Textbox(
335
+ label="Prompt",
336
+ placeholder="Describe the image you want to create...",
337
+ lines=3,
338
+ max_lines=6,
339
+ )
340
+
341
+ negative_prompt = gr.Textbox(
342
+ label="Negative Prompt",
343
+ placeholder="What to avoid in the image...",
344
+ value="blurry, ugly, bad quality",
345
+ lines=2,
346
+ )
347
+
348
+ if CONTROLNET_AVAILABLE:
349
+ input_image = gr.Image(
350
+ label="Control Image (Optional)",
351
+ type="pil",
352
+ sources=['upload', 'clipboard'],
353
+ height=290,
354
+ )
355
+
356
+ control_mode = gr.Radio(
357
+ choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
358
+ value="Canny",
359
+ label="Control Mode",
360
+ info="Choose edge/depth/pose detection method"
361
+ )
362
+
363
+ with gr.Accordion("Advanced Settings", open=False):
364
+ num_inference_steps = gr.Slider(
365
+ label="Inference Steps",
366
+ minimum=1,
367
+ maximum=30,
368
+ step=1,
369
+ value=9,
370
+ info="More steps = higher quality but slower"
371
+ )
372
+
373
+ guidance_scale = gr.Slider(
374
+ label="Guidance Scale",
375
+ minimum=0.0,
376
+ maximum=10.0,
377
+ step=0.1,
378
+ value=1.0,
379
+ info="How closely to follow the prompt"
380
+ )
381
+
382
+ if CONTROLNET_AVAILABLE:
383
+ control_context_scale = gr.Slider(
384
+ label="Control Strength",
385
+ minimum=0.0,
386
+ maximum=1.0,
387
+ step=0.01,
388
+ value=0.75,
389
+ info="0.65-0.80 recommended for best results"
390
+ )
391
+
392
+ image_scale = gr.Slider(
393
+ label="Image Scale",
394
+ minimum=0.5,
395
+ maximum=2.0,
396
+ step=0.1,
397
+ value=1.0,
398
+ info="Resize control image"
399
+ )
400
+
401
+ seed = gr.Slider(
402
+ label="Seed",
403
+ minimum=0,
404
+ maximum=MAX_SEED,
405
+ step=1,
406
+ value=42,
407
+ )
408
+
409
+ randomize_seed = gr.Checkbox(
410
+ label="Randomize Seed",
411
+ value=True
412
+ )
413
+
414
+ generate_btn = gr.Button(
415
+ "Generate Image",
416
+ variant="primary",
417
+ size="lg",
418
+ elem_classes="primary"
419
+ )
420
 
421
+ # Right column - Outputs
422
+ with gr.Column(scale=1):
423
+ output_image = gr.Image(
424
+ label="Generated Image",
425
+ type="pil",
426
+ show_label=True,
427
+ )
428
+
429
+ seed_output = gr.Number(
430
+ label="Used Seed",
431
+ precision=0,
432
+ )
433
+
434
+ if CONTROLNET_AVAILABLE:
435
+ with gr.Accordion("Preprocessor Output", open=False):
436
+ control_output = gr.Image(
437
+ label="Processed Control Image",
438
+ type="pil",
439
+ )
440
 
441
  # Footer
442
  gr.HTML("""
443
  <div class="footer-text">
444
+ <p style="margin-bottom: 8px;">Powered by Z-Image Turbo from Tongyi-MAI</p>
445
+ <p style="font-size: 13px;">
446
+ <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
447
+ Model Card
448
+ </a> •
449
+ <a href="https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
450
+ ControlNet
451
+ </a> •
452
+ <a href="https://github.com/aigc-apps/VideoX-Fun" style="color: #0071e3; text-decoration: none; margin: 0 8px;">
453
+ GitHub
454
+ </a>
455
+ </p>
456
  </div>
457
  """)
458
 
459
  # Event handlers
460
+ generate_inputs = [
461
+ prompt,
462
+ negative_prompt,
463
+ ]
464
+
465
+ if CONTROLNET_AVAILABLE:
466
+ generate_inputs.extend([
467
+ input_image,
468
+ control_mode,
469
+ control_context_scale,
470
+ image_scale,
471
+ ])
472
+ generate_inputs.extend([
473
+ num_inference_steps,
474
+ guidance_scale,
475
+ seed,
476
+ randomize_seed,
477
+ ])
478
+ generate_outputs = [output_image, seed_output, control_output]
479
+ else:
480
+ # Add None placeholders for missing ControlNet params
481
+ generate_inputs.extend([
482
+ gr.State(None), # input_image
483
+ gr.State("Canny"), # control_mode
484
+ gr.State(0.75), # control_context_scale
485
+ gr.State(1.0), # image_scale
486
+ ])
487
+ generate_inputs.extend([
488
+ num_inference_steps,
489
+ guidance_scale,
490
+ seed,
491
+ randomize_seed,
492
+ ])
493
+ generate_outputs = [output_image, seed_output, gr.State(None)]
494
+
495
  generate_btn.click(
496
  fn=generate_image,
497
+ inputs=generate_inputs,
498
+ outputs=generate_outputs,
 
499
  )
500
 
501
  prompt.submit(
502
  fn=generate_image,
503
+ inputs=generate_inputs,
504
+ outputs=generate_outputs,
 
505
  )
506
 
507
  if __name__ == "__main__":
508
  demo.launch(
509
  share=False,
510
  show_error=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  )