multimodalart HF Staff commited on
Commit
7c11b42
Β·
verified Β·
1 Parent(s): 3527e65

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +424 -0
app.py ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Z-Image-i2L Gradio Demo
3
+ ========================
4
+ A web interface for the Z-Image-i2L model that converts images to LoRA models.
5
+
6
+ Setup Instructions:
7
+ 1. Install dependencies:
8
+ pip install -r requirements.txt
9
+
10
+ 2. Run this demo (DiffSynth-Studio will be auto-installed):
11
+ python app.py
12
+
13
+ Note: This requires a GPU with sufficient VRAM (recommended 24GB+)
14
+ """
15
+
16
+ import spaces
17
+ import gradio as gr
18
+ import torch
19
+ from PIL import Image
20
+ import os
21
+ import sys
22
+ import subprocess
23
+ import tempfile
24
+ from pathlib import Path
25
+
26
+ # Default negative prompts
27
+ NEGATIVE_PROMPT_CN = "ζ³›ι»„οΌŒε‘η»ΏοΌŒζ¨‘η³ŠοΌŒδ½Žεˆ†θΎ¨ηŽ‡οΌŒδ½Žθ΄¨ι‡ε›ΎεƒοΌŒζ‰­ζ›²ηš„θ‚’δ½“οΌŒθ―‘εΌ‚ηš„ε€–θ§‚οΌŒδΈ‘ι™‹οΌŒAIζ„ŸοΌŒε™ͺη‚ΉοΌŒη½‘ζ Όζ„ŸοΌŒJPEGεŽ‹ηΌ©ζ‘ηΊΉοΌŒεΌ‚εΈΈηš„θ‚’δ½“οΌŒζ°΄ε°οΌŒδΉ±η οΌŒζ„δΉ‰δΈζ˜Žηš„ε­—η¬¦"
28
+ NEGATIVE_PROMPT_EN = "Yellowed, green-tinted, blurry, low-resolution, low-quality image, distorted limbs, eerie appearance, ugly, AI-looking, noise, grid-like artifacts, JPEG compression artifacts, abnormal limbs, watermark, garbled text, meaningless characters"
29
+
30
+
31
+ def install_diffsynth_studio():
32
+ """Clone and install DiffSynth-Studio if not already installed."""
33
+ # Check if already installed
34
+ try:
35
+ from diffsynth.pipelines.z_image import ZImagePipeline
36
+ return True, "βœ… DiffSynth-Studio is already installed."
37
+ except ImportError:
38
+ pass
39
+
40
+ # Define paths
41
+ repo_dir = Path(__file__).parent / "DiffSynth-Studio"
42
+
43
+ try:
44
+ # Clone the repository if it doesn't exist
45
+ if not repo_dir.exists():
46
+ print("πŸ“₯ Cloning DiffSynth-Studio repository...")
47
+ result = subprocess.run(
48
+ ["git", "clone", "https://github.com/modelscope/DiffSynth-Studio.git", str(repo_dir)],
49
+ capture_output=True,
50
+ text=True,
51
+ check=True
52
+ )
53
+ print("βœ… Repository cloned successfully.")
54
+ else:
55
+ print("πŸ“ DiffSynth-Studio directory already exists, pulling latest...")
56
+ result = subprocess.run(
57
+ ["git", "-C", str(repo_dir), "pull"],
58
+ capture_output=True,
59
+ text=True
60
+ )
61
+
62
+ # Install in editable mode
63
+ print("πŸ“¦ Installing DiffSynth-Studio...")
64
+ result = subprocess.run(
65
+ [sys.executable, "-m", "pip", "install", "-e", str(repo_dir)],
66
+ capture_output=True,
67
+ text=True,
68
+ check=True
69
+ )
70
+ print("βœ… DiffSynth-Studio installed successfully.")
71
+
72
+ # Add to path and try importing again
73
+ sys.path.insert(0, str(repo_dir))
74
+
75
+ from diffsynth.pipelines.z_image import ZImagePipeline
76
+ return True, "βœ… DiffSynth-Studio installed successfully!"
77
+
78
+ except subprocess.CalledProcessError as e:
79
+ error_msg = f"❌ Installation failed: {e.stderr}"
80
+ print(error_msg)
81
+ return False, error_msg
82
+ except Exception as e:
83
+ error_msg = f"❌ Error during installation: {str(e)}"
84
+ print(error_msg)
85
+ return False, error_msg
86
+
87
+
88
+ # =============================================================================
89
+ # Pipeline Initialization (runs at module load time)
90
+ # =============================================================================
91
+
92
+ print("=" * 50)
93
+ print(" Z-Image-i2L Gradio Demo - Initializing")
94
+ print("=" * 50)
95
+ print()
96
+
97
+ # Ensure DiffSynth-Studio is installed
98
+ print("πŸ” Checking DiffSynth-Studio installation...")
99
+ success, message = install_diffsynth_studio()
100
+ print(message)
101
+
102
+ if not success:
103
+ raise RuntimeError("Failed to install DiffSynth-Studio. Cannot continue.")
104
+
105
+ # Import required modules
106
+ from diffsynth.pipelines.z_image import (
107
+ ZImagePipeline, ModelConfig,
108
+ ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
109
+ )
110
+ from safetensors.torch import save_file, load_file
111
+
112
+ # Configure VRAM settings
113
+ print("βš™οΈ Configuring VRAM settings...")
114
+ vram_config = {
115
+ "offload_dtype": torch.bfloat16,
116
+ "offload_device": "cuda",
117
+ "onload_dtype": torch.bfloat16,
118
+ "onload_device": "cuda",
119
+ "preparing_dtype": torch.bfloat16,
120
+ "preparing_device": "cuda",
121
+ "computation_dtype": torch.bfloat16,
122
+ "computation_device": "cuda",
123
+ }
124
+
125
+ # Load the pipeline
126
+ print("πŸš€ Loading Z-Image pipeline...")
127
+ print(" This may take a few minutes on first run (downloading models)...")
128
+
129
+ pipe = ZImagePipeline.from_pretrained(
130
+ torch_dtype=torch.bfloat16,
131
+ device="cuda",
132
+ model_configs=[
133
+ ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors", **vram_config),
134
+ ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors"),
135
+ ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
136
+ ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors"),
137
+ ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors"),
138
+ ModelConfig(model_id="DiffSynth-Studio/Z-Image-i2L", origin_file_pattern="model.safetensors"),
139
+ ],
140
+ tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
141
+ )
142
+
143
+ print("βœ… Pipeline loaded successfully!")
144
+ print()
145
+
146
+ # =============================================================================
147
+ # Gradio Functions
148
+ # =============================================================================
149
+
150
+ @spaces.GPU(duration=120)
151
+ def image_to_lora(images, progress=gr.Progress()):
152
+ """Convert input images to a LoRA model."""
153
+ if images is None or len(images) == 0:
154
+ return None, "❌ Please upload at least one image!"
155
+
156
+ try:
157
+ progress(0.1, desc="Processing images...")
158
+
159
+ # Convert uploaded images to PIL
160
+ pil_images = []
161
+ for img in images:
162
+ if isinstance(img, str):
163
+ pil_images.append(Image.open(img).convert("RGB"))
164
+ elif isinstance(img, tuple):
165
+ # Gradio gallery returns tuples (filepath, caption)
166
+ pil_images.append(Image.open(img[0]).convert("RGB"))
167
+ else:
168
+ pil_images.append(Image.fromarray(img).convert("RGB"))
169
+
170
+ progress(0.3, desc="Encoding images to LoRA...")
171
+
172
+ with torch.no_grad():
173
+ embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=pil_images)
174
+ progress(0.7, desc="Decoding LoRA weights...")
175
+ lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
176
+
177
+ progress(0.9, desc="Saving LoRA file...")
178
+
179
+ # Save to temporary file
180
+ temp_dir = tempfile.mkdtemp()
181
+ lora_path = os.path.join(temp_dir, "generated_lora.safetensors")
182
+ save_file(lora, lora_path)
183
+
184
+ progress(1.0, desc="Done!")
185
+
186
+ return lora_path, f"βœ… LoRA generated successfully from {len(pil_images)} images!"
187
+
188
+ except Exception as e:
189
+ return None, f"❌ Error generating LoRA: {str(e)}"
190
+
191
+
192
+ @spaces.GPU(duration=60)
193
+ def generate_image(
194
+ lora_file,
195
+ prompt,
196
+ negative_prompt,
197
+ seed,
198
+ cfg_scale,
199
+ sigma_shift,
200
+ num_steps,
201
+ progress=gr.Progress()
202
+ ):
203
+ """Generate an image using the created LoRA."""
204
+ if lora_file is None:
205
+ return None, "❌ Please generate or upload a LoRA file first!"
206
+
207
+ try:
208
+ progress(0.1, desc="Loading LoRA...")
209
+
210
+ lora = load_file(lora_file)
211
+
212
+ progress(0.3, desc="Generating image...")
213
+
214
+ image = pipe(
215
+ prompt=prompt,
216
+ negative_prompt=negative_prompt,
217
+ seed=int(seed),
218
+ cfg_scale=cfg_scale,
219
+ num_inference_steps=int(num_steps),
220
+ positive_only_lora=lora,
221
+ sigma_shift=sigma_shift
222
+ )
223
+
224
+ progress(1.0, desc="Done!")
225
+
226
+ return image, "βœ… Image generated successfully!"
227
+
228
+ except Exception as e:
229
+ return None, f"❌ Error generating image: {str(e)}"
230
+
231
+
232
+ def create_demo():
233
+ """Create the Gradio interface."""
234
+
235
+ with gr.Blocks(
236
+ title="Z-Image-i2L Demo",
237
+ theme=gr.themes.Soft(),
238
+ css="""
239
+ .gradio-container { max-width: 1200px !important; }
240
+ .status-box { padding: 10px; border-radius: 5px; margin: 10px 0; }
241
+ """
242
+ ) as demo:
243
+ gr.Markdown("""
244
+ # 🎨 Z-Image-i2L: Image to LoRA Demo
245
+
246
+ Convert your images into style LoRA models and generate new images with that style!
247
+
248
+ **How it works:**
249
+ 1. **Upload Images**: Add 1-6 images with a consistent style
250
+ 2. **Generate LoRA**: Convert your images into a LoRA model
251
+ 3. **Generate Images**: Use the LoRA to create new images with your style
252
+
253
+ > πŸ’‘ **Tip**: For best results, use 4-6 images with a consistent artistic style.
254
+
255
+ βœ… **Pipeline is pre-loaded and ready to use!**
256
+ """)
257
+
258
+ with gr.Tabs():
259
+ # Tab 1: Image to LoRA
260
+ with gr.TabItem("πŸ“Έ Step 1: Image to LoRA"):
261
+ with gr.Row():
262
+ with gr.Column(scale=1):
263
+ input_gallery = gr.Gallery(
264
+ label="Upload Style Images (1-6 images)",
265
+ file_types=["image"],
266
+ columns=3,
267
+ height=300,
268
+ interactive=True
269
+ )
270
+
271
+ gr.Markdown("""
272
+ **Guidelines for input images:**
273
+ - Upload 1-6 images with a consistent style
274
+ - Higher quality images produce better results
275
+ - Mix of subjects (people, objects, scenes) helps generalization
276
+ """)
277
+
278
+ generate_lora_btn = gr.Button("🎯 Generate LoRA", variant="primary")
279
+
280
+ with gr.Column(scale=1):
281
+ lora_output = gr.File(
282
+ label="Generated LoRA File",
283
+ file_types=[".safetensors"],
284
+ interactive=False
285
+ )
286
+ lora_status = gr.Textbox(
287
+ label="LoRA Generation Status",
288
+ interactive=False,
289
+ lines=2
290
+ )
291
+
292
+ # Tab 2: Generate Images
293
+ with gr.TabItem("πŸ–ΌοΈ Step 2: Generate Images"):
294
+ with gr.Row():
295
+ with gr.Column(scale=1):
296
+ lora_input = gr.File(
297
+ label="LoRA File (from Step 1 or upload)",
298
+ file_types=[".safetensors"]
299
+ )
300
+
301
+ prompt = gr.Textbox(
302
+ label="Prompt",
303
+ placeholder="Describe what you want to generate...",
304
+ value="a cat",
305
+ lines=2
306
+ )
307
+
308
+ with gr.Accordion("Negative Prompt", open=False):
309
+ negative_prompt = gr.Textbox(
310
+ label="Negative Prompt",
311
+ value=NEGATIVE_PROMPT_CN,
312
+ lines=3
313
+ )
314
+ with gr.Row():
315
+ use_cn_neg = gr.Button("Use Chinese", size="sm")
316
+ use_en_neg = gr.Button("Use English", size="sm")
317
+
318
+ with gr.Accordion("Advanced Settings", open=False):
319
+ seed = gr.Number(
320
+ label="Seed",
321
+ value=0,
322
+ precision=0
323
+ )
324
+ cfg_scale = gr.Slider(
325
+ label="CFG Scale",
326
+ minimum=1,
327
+ maximum=10,
328
+ value=4,
329
+ step=0.5
330
+ )
331
+ sigma_shift = gr.Slider(
332
+ label="Sigma Shift",
333
+ minimum=1,
334
+ maximum=15,
335
+ value=8,
336
+ step=1
337
+ )
338
+ num_steps = gr.Slider(
339
+ label="Number of Steps",
340
+ minimum=20,
341
+ maximum=100,
342
+ value=50,
343
+ step=5
344
+ )
345
+
346
+ generate_btn = gr.Button("✨ Generate Image", variant="primary")
347
+
348
+ with gr.Column(scale=1):
349
+ output_image = gr.Image(
350
+ label="Generated Image",
351
+ type="pil",
352
+ height=512
353
+ )
354
+ gen_status = gr.Textbox(
355
+ label="Generation Status",
356
+ interactive=False,
357
+ lines=2
358
+ )
359
+
360
+ gr.Markdown("""
361
+ ---
362
+ ### πŸ“š Resources
363
+ - [Z-Image-i2L Model](https://modelscope.cn/models/DiffSynth-Studio/Z-Image-i2L)
364
+ - [DiffSynth-Studio GitHub](https://github.com/modelscope/DiffSynth-Studio)
365
+ - [Online Demo](https://modelscope.cn/studios/DiffSynth-Studio/Z-Image-i2L)
366
+
367
+ ### βš™οΈ Recommended Settings
368
+ - **CFG Scale**: 4
369
+ - **Sigma Shift**: 8
370
+ - **Steps**: 50
371
+ - Use negative prompts for better quality
372
+ """)
373
+
374
+ # Event handlers
375
+ generate_lora_btn.click(
376
+ fn=image_to_lora,
377
+ inputs=[input_gallery],
378
+ outputs=[lora_output, lora_status]
379
+ )
380
+
381
+ # Auto-update lora_input when lora is generated
382
+ lora_output.change(
383
+ fn=lambda x: x,
384
+ inputs=[lora_output],
385
+ outputs=[lora_input]
386
+ )
387
+
388
+ generate_btn.click(
389
+ fn=generate_image,
390
+ inputs=[
391
+ lora_input,
392
+ prompt,
393
+ negative_prompt,
394
+ seed,
395
+ cfg_scale,
396
+ sigma_shift,
397
+ num_steps
398
+ ],
399
+ outputs=[output_image, gen_status]
400
+ )
401
+
402
+ # Negative prompt presets
403
+ use_cn_neg.click(
404
+ fn=lambda: NEGATIVE_PROMPT_CN,
405
+ outputs=[negative_prompt]
406
+ )
407
+ use_en_neg.click(
408
+ fn=lambda: NEGATIVE_PROMPT_EN,
409
+ outputs=[negative_prompt]
410
+ )
411
+
412
+ return demo
413
+
414
+
415
+ if __name__ == "__main__":
416
+ print("Starting Gradio server...")
417
+ print()
418
+
419
+ demo = create_demo()
420
+ demo.launch(
421
+ server_name="0.0.0.0",
422
+ server_port=7860,
423
+ share=False
424
+ )