Spaces:

DiffSynth-Studio
/

Z-Image-i2L

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 7 days ago

Commit

7c11b42

verified ·

1 Parent(s): 3527e65

Create app.py

Browse files

Files changed (1) hide show

app.py +424 -0

app.py ADDED Viewed

	@@ -0,0 +1,424 @@

+"""
+Z-Image-i2L Gradio Demo
+========================
+A web interface for the Z-Image-i2L model that converts images to LoRA models.
+Setup Instructions:
+1. Install dependencies:
+   pip install -r requirements.txt
+2. Run this demo (DiffSynth-Studio will be auto-installed):
+   python app.py
+Note: This requires a GPU with sufficient VRAM (recommended 24GB+)
+"""
+import spaces
+import gradio as gr
+import torch
+from PIL import Image
+import os
+import sys
+import subprocess
+import tempfile
+from pathlib import Path
+# Default negative prompts
+NEGATIVE_PROMPT_CN = "泛黄，发绿，模糊，低分辨率，低质量图像，扭曲的肢体，诡异的外观，丑陋，AI感，噪点，网格感，JPEG压缩条纹，异常的肢体，水印，乱码，意义不明的字符"
+NEGATIVE_PROMPT_EN = "Yellowed, green-tinted, blurry, low-resolution, low-quality image, distorted limbs, eerie appearance, ugly, AI-looking, noise, grid-like artifacts, JPEG compression artifacts, abnormal limbs, watermark, garbled text, meaningless characters"
+def install_diffsynth_studio():
+    """Clone and install DiffSynth-Studio if not already installed."""
+    # Check if already installed
+    try:
+        from diffsynth.pipelines.z_image import ZImagePipeline
+        return True, "✅ DiffSynth-Studio is already installed."
+    except ImportError:
+        pass
+    # Define paths
+    repo_dir = Path(__file__).parent / "DiffSynth-Studio"
+    try:
+        # Clone the repository if it doesn't exist
+        if not repo_dir.exists():
+            print("📥 Cloning DiffSynth-Studio repository...")
+            result = subprocess.run(
+                ["git", "clone", "https://github.com/modelscope/DiffSynth-Studio.git", str(repo_dir)],
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            print("✅ Repository cloned successfully.")
+        else:
+            print("📁 DiffSynth-Studio directory already exists, pulling latest...")
+            result = subprocess.run(
+                ["git", "-C", str(repo_dir), "pull"],
+                capture_output=True,
+                text=True
+            )
+        # Install in editable mode
+        print("📦 Installing DiffSynth-Studio...")
+        result = subprocess.run(
+            [sys.executable, "-m", "pip", "install", "-e", str(repo_dir)],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        print("✅ DiffSynth-Studio installed successfully.")
+        # Add to path and try importing again
+        sys.path.insert(0, str(repo_dir))
+        from diffsynth.pipelines.z_image import ZImagePipeline
+        return True, "✅ DiffSynth-Studio installed successfully!"
+    except subprocess.CalledProcessError as e:
+        error_msg = f"❌ Installation failed: {e.stderr}"
+        print(error_msg)
+        return False, error_msg
+    except Exception as e:
+        error_msg = f"❌ Error during installation: {str(e)}"
+        print(error_msg)
+        return False, error_msg
+# =============================================================================
+# Pipeline Initialization (runs at module load time)
+# =============================================================================
+print("=" * 50)
+print("  Z-Image-i2L Gradio Demo - Initializing")
+print("=" * 50)
+print()
+# Ensure DiffSynth-Studio is installed
+print("🔍 Checking DiffSynth-Studio installation...")
+success, message = install_diffsynth_studio()
+print(message)
+if not success:
+    raise RuntimeError("Failed to install DiffSynth-Studio. Cannot continue.")
+# Import required modules
+from diffsynth.pipelines.z_image import (
+    ZImagePipeline, ModelConfig,
+    ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
+)
+from safetensors.torch import save_file, load_file
+# Configure VRAM settings
+print("⚙️  Configuring VRAM settings...")
+vram_config = {
+    "offload_dtype": torch.bfloat16,
+    "offload_device": "cuda",
+    "onload_dtype": torch.bfloat16,
+    "onload_device": "cuda",
+    "preparing_dtype": torch.bfloat16,
+    "preparing_device": "cuda",
+    "computation_dtype": torch.bfloat16,
+    "computation_device": "cuda",
+}
+# Load the pipeline
+print("🚀 Loading Z-Image pipeline...")
+print("   This may take a few minutes on first run (downloading models)...")
+pipe = ZImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors", **vram_config),
+        ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors"),
+        ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+        ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors"),
+        ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors"),
+        ModelConfig(model_id="DiffSynth-Studio/Z-Image-i2L", origin_file_pattern="model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
+)
+print("✅ Pipeline loaded successfully!")
+print()
+# =============================================================================
+# Gradio Functions
+# =============================================================================
+@spaces.GPU(duration=120)
+def image_to_lora(images, progress=gr.Progress()):
+    """Convert input images to a LoRA model."""
+    if images is None or len(images) == 0:
+        return None, "❌ Please upload at least one image!"
+    try:
+        progress(0.1, desc="Processing images...")
+        # Convert uploaded images to PIL
+        pil_images = []
+        for img in images:
+            if isinstance(img, str):
+                pil_images.append(Image.open(img).convert("RGB"))
+            elif isinstance(img, tuple):
+                # Gradio gallery returns tuples (filepath, caption)
+                pil_images.append(Image.open(img[0]).convert("RGB"))
+            else:
+                pil_images.append(Image.fromarray(img).convert("RGB"))
+        progress(0.3, desc="Encoding images to LoRA...")
+        with torch.no_grad():
+            embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=pil_images)
+            progress(0.7, desc="Decoding LoRA weights...")
+            lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
+        progress(0.9, desc="Saving LoRA file...")
+        # Save to temporary file
+        temp_dir = tempfile.mkdtemp()
+        lora_path = os.path.join(temp_dir, "generated_lora.safetensors")
+        save_file(lora, lora_path)
+        progress(1.0, desc="Done!")
+        return lora_path, f"✅ LoRA generated successfully from {len(pil_images)} images!"
+    except Exception as e:
+        return None, f"❌ Error generating LoRA: {str(e)}"
+@spaces.GPU(duration=60)
+def generate_image(
+    lora_file,
+    prompt,
+    negative_prompt,
+    seed,
+    cfg_scale,
+    sigma_shift,
+    num_steps,
+    progress=gr.Progress()
+):
+    """Generate an image using the created LoRA."""
+    if lora_file is None:
+        return None, "❌ Please generate or upload a LoRA file first!"
+    try:
+        progress(0.1, desc="Loading LoRA...")
+        lora = load_file(lora_file)
+        progress(0.3, desc="Generating image...")
+        image = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            seed=int(seed),
+            cfg_scale=cfg_scale,
+            num_inference_steps=int(num_steps),
+            positive_only_lora=lora,
+            sigma_shift=sigma_shift
+        )
+        progress(1.0, desc="Done!")
+        return image, "✅ Image generated successfully!"
+    except Exception as e:
+        return None, f"❌ Error generating image: {str(e)}"
+def create_demo():
+    """Create the Gradio interface."""
+    with gr.Blocks(
+        title="Z-Image-i2L Demo",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container { max-width: 1200px !important; }
+        .status-box { padding: 10px; border-radius: 5px; margin: 10px 0; }
+        """
+    ) as demo:
+        gr.Markdown("""
+        # 🎨 Z-Image-i2L: Image to LoRA Demo
+        Convert your images into style LoRA models and generate new images with that style!
+        **How it works:**
+        1. **Upload Images**: Add 1-6 images with a consistent style
+        2. **Generate LoRA**: Convert your images into a LoRA model
+        3. **Generate Images**: Use the LoRA to create new images with your style
+        > 💡 **Tip**: For best results, use 4-6 images with a consistent artistic style.
+        ✅ **Pipeline is pre-loaded and ready to use!**
+        """)
+        with gr.Tabs():
+            # Tab 1: Image to LoRA
+            with gr.TabItem("📸 Step 1: Image to LoRA"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        input_gallery = gr.Gallery(
+                            label="Upload Style Images (1-6 images)",
+                            file_types=["image"],
+                            columns=3,
+                            height=300,
+                            interactive=True
+                        )
+                        gr.Markdown("""
+                        **Guidelines for input images:**
+                        - Upload 1-6 images with a consistent style
+                        - Higher quality images produce better results
+                        - Mix of subjects (people, objects, scenes) helps generalization
+                        """)
+                        generate_lora_btn = gr.Button("🎯 Generate LoRA", variant="primary")
+                    with gr.Column(scale=1):
+                        lora_output = gr.File(
+                            label="Generated LoRA File",
+                            file_types=[".safetensors"],
+                            interactive=False
+                        )
+                        lora_status = gr.Textbox(
+                            label="LoRA Generation Status",
+                            interactive=False,
+                            lines=2
+                        )
+            # Tab 2: Generate Images
+            with gr.TabItem("🖼️ Step 2: Generate Images"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        lora_input = gr.File(
+                            label="LoRA File (from Step 1 or upload)",
+                            file_types=[".safetensors"]
+                        )
+                        prompt = gr.Textbox(
+                            label="Prompt",
+                            placeholder="Describe what you want to generate...",
+                            value="a cat",
+                            lines=2
+                        )
+                        with gr.Accordion("Negative Prompt", open=False):
+                            negative_prompt = gr.Textbox(
+                                label="Negative Prompt",
+                                value=NEGATIVE_PROMPT_CN,
+                                lines=3
+                            )
+                            with gr.Row():
+                                use_cn_neg = gr.Button("Use Chinese", size="sm")
+                                use_en_neg = gr.Button("Use English", size="sm")
+                        with gr.Accordion("Advanced Settings", open=False):
+                            seed = gr.Number(
+                                label="Seed",
+                                value=0,
+                                precision=0
+                            )
+                            cfg_scale = gr.Slider(
+                                label="CFG Scale",
+                                minimum=1,
+                                maximum=10,
+                                value=4,
+                                step=0.5
+                            )
+                            sigma_shift = gr.Slider(
+                                label="Sigma Shift",
+                                minimum=1,
+                                maximum=15,
+                                value=8,
+                                step=1
+                            )
+                            num_steps = gr.Slider(
+                                label="Number of Steps",
+                                minimum=20,
+                                maximum=100,
+                                value=50,
+                                step=5
+                            )
+                        generate_btn = gr.Button("✨ Generate Image", variant="primary")
+                    with gr.Column(scale=1):
+                        output_image = gr.Image(
+                            label="Generated Image",
+                            type="pil",
+                            height=512
+                        )
+                        gen_status = gr.Textbox(
+                            label="Generation Status",
+                            interactive=False,
+                            lines=2
+                        )
+        gr.Markdown("""
+        ---
+        ### 📚 Resources
+        - [Z-Image-i2L Model](https://modelscope.cn/models/DiffSynth-Studio/Z-Image-i2L)
+        - [DiffSynth-Studio GitHub](https://github.com/modelscope/DiffSynth-Studio)
+        - [Online Demo](https://modelscope.cn/studios/DiffSynth-Studio/Z-Image-i2L)
+        ### ⚙️ Recommended Settings
+        - **CFG Scale**: 4
+        - **Sigma Shift**: 8
+        - **Steps**: 50
+        - Use negative prompts for better quality
+        """)
+        # Event handlers
+        generate_lora_btn.click(
+            fn=image_to_lora,
+            inputs=[input_gallery],
+            outputs=[lora_output, lora_status]
+        )
+        # Auto-update lora_input when lora is generated
+        lora_output.change(
+            fn=lambda x: x,
+            inputs=[lora_output],
+            outputs=[lora_input]
+        )
+        generate_btn.click(
+            fn=generate_image,
+            inputs=[
+                lora_input,
+                prompt,
+                negative_prompt,
+                seed,
+                cfg_scale,
+                sigma_shift,
+                num_steps
+            ],
+            outputs=[output_image, gen_status]
+        )
+        # Negative prompt presets
+        use_cn_neg.click(
+            fn=lambda: NEGATIVE_PROMPT_CN,
+            outputs=[negative_prompt]
+        )
+        use_en_neg.click(
+            fn=lambda: NEGATIVE_PROMPT_EN,
+            outputs=[negative_prompt]
+        )
+    return demo
+if __name__ == "__main__":
+    print("Starting Gradio server...")
+    print()
+    demo = create_demo()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )