Spaces:

DiffSynth-Studio
/

Z-Image-i2L

Runtime error

App Files Files Community

kelseye commited on Jan 29

Commit

4a60f19

verified ·

1 Parent(s): 331282e

Update app.py

Browse files

Files changed (1) hide show

app.py +441 -158

app.py CHANGED Viewed

@@ -1,20 +1,193 @@
 import spaces
-import os
-os.system("pip install diffsynth==2.0.3")
-from modelscope.hub.api import HubApi
-api = HubApi()
-api.login(os.environ["MODELSCOPE_TOKEN"])
-os.environ["DIFFSYNTH_MODEL_BASE_PATH"] = "/mnt/workspace/models"
-os.environ["DIFFSYNTH_DOWNLOAD_SOURCE"] = "huggingface"
 import gradio as gr
 import torch
 from diffsynth.pipelines.z_image import (
     ZImagePipeline, ModelConfig,
     ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
 )
-# Use `vram_config` to enable LoRA hot-loading
 vram_config = {
     "offload_dtype": torch.bfloat16,
     "offload_device": "cuda",
@@ -26,166 +199,276 @@ vram_config = {
     "computation_device": "cuda",
 }
-# Load models
 pipe = ZImagePipeline.from_pretrained(
     torch_dtype=torch.bfloat16,
     device="cuda",
-    model_configs=[
-        ModelConfig(model_id="Tongyi-MAI/Z-Image", origin_file_pattern="transformer/*.safetensors", **vram_config),
-        ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="text_encoder/*.safetensors"),
-        ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
-        ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors"),
-        ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors"),
-        ModelConfig(model_id="DiffSynth-Studio/Z-Image-i2L", origin_file_pattern="model.safetensors"),
-    ],
-    tokenizer_config=ModelConfig(model_id="Tongyi-MAI/Z-Image-Turbo", origin_file_pattern="tokenizer/"),
 )
-@spaces.GPU(duration=60)
-def run_inference(style_image_1, style_image_2, style_image_3, style_image_4, style_image_5, style_image_6, prompt, negative_prompt, cfg_scale, sigma_shift, seed, num_inference_steps, height, width, progress=gr.Progress()):
-    # Filter out None values and collect valid images
-    style_images = [img for img in [style_image_1, style_image_2, style_image_3, style_image_4, style_image_5, style_image_6] if img is not None]
-    # Image to LoRA
-    with torch.no_grad():
-        embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=style_images)
-        lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
-    # Generate images
-    progress(0, desc="Generating image")
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        seed=None if seed == -1 else seed,
-        cfg_scale=cfg_scale, num_inference_steps=num_inference_steps,
-        positive_only_lora=lora,
-        sigma_shift=sigma_shift,
-        height=height,
-        width=width,
-        progress_bar_cmd=progress.tqdm
-    )
-    return image
-with gr.Blocks(title="Z-Image-Omni-Base") as demo:
-    gr.Markdown("Model: https://modelscope.cn/models/DiffSynth-Studio/Z-Image-i2L")
-    gr.Markdown("GitHub: https://github.com/modelscope/DiffSynth-Studio")
-    gr.Markdown("Upload images and generate new images based on text prompts")
-    with gr.Row():
-        # 第一列：输入的6张图
-        with gr.Column():
-            gr.Markdown("Input Images (upload 1-6 images)")
-            with gr.Row():
-                style_image_1 = gr.Image(label="Image 1", type="pil")
-                style_image_2 = gr.Image(label="Image 2", type="pil")
-            with gr.Row():
-                style_image_3 = gr.Image(label="Image 3", type="pil")
-                style_image_4 = gr.Image(label="Image 4", type="pil")
-            with gr.Row():
-                style_image_5 = gr.Image(label="Image 5", type="pil")
-                style_image_6 = gr.Image(label="Image 6", type="pil")
-        # 第二列：提示词等控件
-        with gr.Column():
-            gr.Markdown("Settings")
-            prompt = gr.Textbox(
-                label="Prompt",
-                placeholder="Enter your prompt here...",
-                value="a cat",
-                lines=3
-            )
-            seed = gr.Number(
-                label="Seed",
-                value=-1,
-                precision=0
-            )
-            height = gr.Slider(
-                label="Height",
-                minimum=512,
-                maximum=1536,
-                step=64,
-                value=1024
-            )
-            width = gr.Slider(
-                label="Width",
-                minimum=512,
-                maximum=1536,
-                step=64,
-                value=1024
-            )
-            num_inference_steps = gr.Slider(
-                label="Inference Steps",
-                minimum=10,
-                maximum=50,
-                step=1,
-                value=30
-            )
-            # Advanced settings (collapsed by default)
-            with gr.Accordion("Advanced Settings", open=True):
-                negative_prompt = gr.Textbox(
-                    label="Negative Prompt",
-                    placeholder="Enter negative prompt here...",
-                    value="泛黄，发绿，模糊，低分辨率，低质量图像，扭曲的肢体，诡异的外观，丑陋，AI感，噪点，网格感，JPEG压缩条纹，异常的肢体，水印，乱码，意义不明的字符"
-                )
-                cfg_scale = gr.Slider(
-                    label="CFG Scale",
-                    minimum=1.0,
-                    maximum=10.0,
-                    step=0.5,
-                    value=4.0
-                )
-                sigma_shift = gr.Slider(
-                    label="Sigma Shift",
-                    minimum=1.0,
-                    maximum=20.0,
-                    step=0.5,
-                    value=8.0
-                )
-            generate_btn = gr.Button("Generate Image")
-        # 第三列：输出图
-        with gr.Column():
-            gr.Markdown("Output")
-            output_image = gr.Image(
-                label="Generated Image",
-                interactive=False
-            )
-    generate_btn.click(
-        fn=run_inference,
-        inputs=[style_image_1, style_image_2, style_image_3, style_image_4, style_image_5, style_image_6, prompt, negative_prompt, cfg_scale, sigma_shift, seed, num_inference_steps, height, width],
-        outputs=[output_image],
-    )
-    gr.Examples(
-        examples=[
-            [
-                "assets/style/1/0.jpg", "assets/style/1/1.jpg", "assets/style/1/2.jpg", "assets/style/1/3.jpg", None, None,
-                "a cat", "泛黄，发绿，模糊，低分辨率，低质量图像，扭曲的肢体，诡异的外观，丑陋，AI感，噪点，网格感，JPEG压缩条纹，异常的肢体，水印，乱码，意义不明的字符",
-                4, 8, 0, 30, 1024, 1024
-            ],
-            [
-                "assets/style/4/0.jpg", "assets/style/4/1.jpg", "assets/style/4/2.jpg", "assets/style/4/3.jpg", "assets/style/4/4.jpg", "assets/style/4/5.jpg",
-                "a dog", "泛黄，发绿，模糊，低分辨率，低质量图像，扭曲的肢体，诡异的外观，丑陋，AI感，噪点，网格感，JPEG压缩条纹，异常的肢体，水印，乱码，意义不明的字符",
-                4, 8, 2, 30, 1024, 1024
-            ],
-            [
-                "assets/style/3/0.jpg", "assets/style/3/1.jpg", "assets/style/3/2.jpg", "assets/style/3/3.jpg", None, None,
-                "a girl", "泛黄，发绿，模糊，低分辨率，低质量图像，扭曲的肢体，诡异的外观，丑陋，AI感，噪点，网格感，JPEG压缩条纹，异常的肢体，水印，乱码，意义不明的字符",
-                4, 8, 1, 30, 1024, 1024
-            ],
-        ],
-        fn=run_inference,
-        inputs=[style_image_1, style_image_2, style_image_3, style_image_4, style_image_5, style_image_6, prompt, negative_prompt, cfg_scale, sigma_shift, seed, num_inference_steps, height, width],
-        outputs=[output_image],
-        cache_examples=True
-    )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 import spaces
 import gradio as gr
 import torch
+from PIL import Image
+import os
+import sys
+import subprocess
+import tempfile
+from pathlib import Path
+import glob
+# Default negative prompts
+NEGATIVE_PROMPT_CN = "泛黄，发绿，模糊，低分辨率，低质量图像，扭曲的肢体，诡异的外观，丑陋，AI感，噪点，网格感，JPEG压缩条纹，异常的肢体，水印，乱码，意义不明的字符"
+NEGATIVE_PROMPT_EN = "Yellowed, green-tinted, blurry, low-resolution, low-quality image, distorted limbs, eerie appearance, ugly, AI-looking, noise, grid-like artifacts, JPEG compression artifacts, abnormal limbs, watermark, garbled text, meaningless characters"
+# Model paths - can be overridden via environment variables
+MODELS_DIR = Path(os.environ.get("ZIMAGE_MODELS_DIR", "./models"))
+# =============================================================================
+# Model Download Functions
+# =============================================================================
+def download_hf_models(output_dir: Path) -> dict:
+    """
+    Download required models from Hugging Face using huggingface_hub.
+    Downloads:
+    - DiffSynth-Studio/Z-Image-i2L
+    - Tongyi-MAI/Z-Image
+    - DiffSynth-Studio/General-Image-Encoders
+    - Tongyi-MAI/Z-Image-Turbo
+    Returns dict with paths to downloaded models.
+    """
+    from huggingface_hub import snapshot_download
+    output_dir.mkdir(parents=True, exist_ok=True)
+    models = [
+        {
+            "repo_id": "DiffSynth-Studio/General-Image-Encoders",
+            "description": "General Image Encoders (SigLIP2-G384, DINOv3-7B)",
+            "allow_patterns": None,
+        },
+        {
+            "repo_id": "Tongyi-MAI/Z-Image-Turbo",
+            "description": "Z-Image Turbo (text encoder, VAE, tokenizer)",
+            "allow_patterns": [
+                "text_encoder/*.safetensors",
+                "vae/*.safetensors",
+                "tokenizer/*",
+            ],
+        },
+        {
+            "repo_id": "Tongyi-MAI/Z-Image",
+            "description": "Z-Image base model (transformer)",
+            "allow_patterns": ["transformer/*.safetensors"],
+        },
+        {
+            "repo_id": "DiffSynth-Studio/Z-Image-i2L",
+            "description": "Z-Image-i2L (Image to LoRA model)",
+            "allow_patterns": ["*.safetensors"],
+        },
+    ]
+    downloaded_paths = {}
+    for model in models:
+        repo_id = model["repo_id"]
+        local_dir = output_dir / repo_id
+        # Check if already downloaded
+        if local_dir.exists() and any(local_dir.rglob("*.safetensors")):
+            print(f"   ✓ {repo_id} (already downloaded)")
+            downloaded_paths[repo_id] = local_dir
+            continue
+        print(f"   📥 Downloading {repo_id}...")
+        print(f"      {model['description']}")
+        try:
+            result_path = snapshot_download(
+                repo_id=repo_id,
+                local_dir=str(local_dir),
+                allow_patterns=model["allow_patterns"],
+                local_dir_use_symlinks=False,
+                resume_download=True,
+            )
+            downloaded_paths[repo_id] = Path(result_path)
+            print(f"   ✓ {repo_id}")
+        except Exception as e:
+            print(f"   ❌ Error downloading {repo_id}: {e}")
+            raise
+    return downloaded_paths
+def get_model_files(base_path: Path, pattern: str) -> list:
+    """Get list of files matching a glob pattern."""
+    full_pattern = str(base_path / pattern)
+    files = sorted(glob.glob(full_pattern))
+    return files
+def install_diffsynth_studio():
+    """Clone and install DiffSynth-Studio if not already installed."""
+    try:
+        from diffsynth.pipelines.z_image import ZImagePipeline
+        return True, "✅ DiffSynth-Studio is already installed."
+    except ImportError:
+        pass
+    repo_dir = Path(__file__).parent / "DiffSynth-Studio"
+    try:
+        if not repo_dir.exists():
+            print("📥 Cloning DiffSynth-Studio repository...")
+            subprocess.run(
+                ["git", "clone", "https://github.com/modelscope/DiffSynth-Studio.git", str(repo_dir)],
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            print("✅ Repository cloned successfully.")
+        else:
+            print("📁 DiffSynth-Studio directory already exists, pulling latest...")
+            subprocess.run(
+                ["git", "-C", str(repo_dir), "pull"],
+                capture_output=True,
+                text=True
+            )
+        print("📦 Installing DiffSynth-Studio...")
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "-e", str(repo_dir)],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        print("✅ DiffSynth-Studio installed successfully.")
+        sys.path.insert(0, str(repo_dir))
+        from diffsynth.pipelines.z_image import ZImagePipeline
+        return True, "✅ DiffSynth-Studio installed successfully!"
+    except subprocess.CalledProcessError as e:
+        error_msg = f"❌ Installation failed: {e.stderr}"
+        print(error_msg)
+        return False, error_msg
+    except Exception as e:
+        error_msg = f"❌ Error during installation: {str(e)}"
+        print(error_msg)
+        return False, error_msg
+# =============================================================================
+# Pipeline Initialization
+# =============================================================================
+print("=" * 60)
+print("  Z-Image-i2L Gradio Demo - Initializing")
+print("=" * 60)
+print()
+# Step 1: Install DiffSynth-Studio
+print("🔍 Step 1: Checking DiffSynth-Studio installation...")
+success, message = install_diffsynth_studio()
+print(message)
+if not success:
+    raise RuntimeError("Failed to install DiffSynth-Studio. Cannot continue.")
+# Step 2: Download HuggingFace models
+print()
+print("🔍 Step 2: Downloading models from HuggingFace...")
+print(f"   Models directory: {MODELS_DIR.absolute()}")
+downloaded_paths = download_hf_models(MODELS_DIR)
+# Import required modules
 from diffsynth.pipelines.z_image import (
     ZImagePipeline, ModelConfig,
     ZImageUnit_Image2LoRAEncode, ZImageUnit_Image2LoRADecode
 )
+from safetensors.torch import save_file, load_file
+# Step 3: Configure VRAM settings
+print()
+print("⚙️  Step 3: Configuring VRAM settings...")
 vram_config = {
     "offload_dtype": torch.bfloat16,
     "offload_device": "cuda",
     "computation_device": "cuda",
 }
+# Step 4: Resolve local model paths
+print()
+print("📂 Step 4: Resolving model paths...")
+# Z-Image transformer
+zimage_path = MODELS_DIR / "Tongyi-MAI" / "Z-Image"
+zimage_transformer_files = get_model_files(zimage_path, "transformer/*.safetensors")
+# Z-Image-Turbo
+zimage_turbo_path = MODELS_DIR / "Tongyi-MAI" / "Z-Image-Turbo"
+text_encoder_files = get_model_files(zimage_turbo_path, "text_encoder/*.safetensors")
+vae_file = get_model_files(zimage_turbo_path, "vae/diffusion_pytorch_model.safetensors")
+tokenizer_path = zimage_turbo_path / "tokenizer"
+# General Image Encoders
+encoders_path = MODELS_DIR / "DiffSynth-Studio" / "General-Image-Encoders"
+siglip_file = get_model_files(encoders_path, "SigLIP2-G384/model.safetensors")
+dino_file = get_model_files(encoders_path, "DINOv3-7B/model.safetensors")
+# Z-Image-i2L from HuggingFace
+zimage_i2l_path = MODELS_DIR / "DiffSynth-Studio" / "Z-Image-i2L"
+zimage_i2l_file = get_model_files(zimage_i2l_path, "model.safetensors")
+print(f"   Z-Image transformer: {len(zimage_transformer_files)} file(s)")
+print(f"   Text encoder: {len(text_encoder_files)} file(s)")
+print(f"   VAE: {len(vae_file)} file(s)")
+print(f"   Tokenizer: {tokenizer_path}")
+print(f"   SigLIP2: {len(siglip_file)} file(s)")
+print(f"   DINOv3: {len(dino_file)} file(s)")
+print(f"   Z-Image-i2L: {len(zimage_i2l_file)} file(s)")
+# Validate files
+missing = []
+if not zimage_transformer_files: missing.append("Z-Image transformer")
+if not text_encoder_files: missing.append("Text encoder")
+if not vae_file: missing.append("VAE")
+if not tokenizer_path.exists(): missing.append("Tokenizer")
+if not siglip_file: missing.append("SigLIP2")
+if not dino_file: missing.append("DINOv3")
+if not zimage_i2l_file: missing.append("Z-Image-i2L")
+if missing:
+    raise FileNotFoundError(f"Missing model files: {', '.join(missing)}")
+# Step 5: Load pipeline
+print()
+print("🚀 Step 5: Loading Z-Image pipeline...")
+print("   All models loaded from HuggingFace local paths")
+model_configs = [
+    # All models from HuggingFace - use path= for local files
+    ModelConfig(path=zimage_transformer_files, **vram_config),
+    ModelConfig(path=text_encoder_files),
+    ModelConfig(path=vae_file),
+    ModelConfig(path=siglip_file),
+    ModelConfig(path=dino_file),
+    ModelConfig(path=zimage_i2l_file),
+]
 pipe = ZImagePipeline.from_pretrained(
     torch_dtype=torch.bfloat16,
     device="cuda",
+    model_configs=model_configs,
+    tokenizer_config=ModelConfig(path=str(tokenizer_path)),
 )
+print()
+print("✅ Pipeline loaded successfully!")
+print("=" * 60)
+print()
+# =============================================================================
+# Gradio Functions
+# =============================================================================
+@spaces.GPU(duration=120)
+def image_to_lora(images, progress=gr.Progress()):
+    """Convert input images to a LoRA model."""
+    if images is None or len(images) == 0:
+        return None, "❌ Please upload at least one image!"
+    try:
+        progress(0.1, desc="Processing images...")
+        pil_images = []
+        for img in images:
+            if isinstance(img, str):
+                pil_images.append(Image.open(img).convert("RGB"))
+            elif isinstance(img, tuple):
+                pil_images.append(Image.open(img[0]).convert("RGB"))
+            else:
+                pil_images.append(Image.fromarray(img).convert("RGB"))
+        progress(0.3, desc="Encoding images to LoRA...")
+        with torch.no_grad():
+            embs = ZImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=pil_images)
+            progress(0.7, desc="Decoding LoRA weights...")
+            lora = ZImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]
+        progress(0.9, desc="Saving LoRA file...")
+        temp_dir = tempfile.mkdtemp()
+        lora_path = os.path.join(temp_dir, "generated_lora.safetensors")
+        save_file(lora, lora_path)
+        progress(1.0, desc="Done!")
+        return lora_path, f"✅ LoRA generated successfully from {len(pil_images)} image(s)!"
+    except Exception as e:
+        return None, f"❌ Error generating LoRA: {str(e)}"
+@spaces.GPU(duration=60)
+def generate_image(
+    lora_file,
+    prompt,
+    negative_prompt,
+    seed,
+    cfg_scale,
+    sigma_shift,
+    num_steps,
+    progress=gr.Progress()
+):
+    """Generate an image using the created LoRA."""
+    if lora_file is None:
+        return None, "❌ Please generate or upload a LoRA file first!"
+    try:
+        progress(0.1, desc="Loading LoRA...")
+        lora = load_file(lora_file)
+        # Move LoRA tensors to CUDA with correct dtype
+        lora = {k: v.to(device="cuda", dtype=torch.bfloat16) for k, v in lora.items()}
+        progress(0.3, desc="Generating image...")
+        image = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            seed=int(seed),
+            cfg_scale=cfg_scale,
+            num_inference_steps=int(num_steps),
+            positive_only_lora=lora,
+            sigma_shift=sigma_shift
+        )
+        progress(1.0, desc="Done!")
+        return image, "✅ Image generated successfully!"
+    except Exception as e:
+        return None, f"❌ Error generating image: {str(e)}"
+def create_demo():
+    """Create the Gradio interface."""
+    with gr.Blocks(
+        title="Z-Image-i2L Demo",
+        theme=gr.themes.Soft(),
+        css=".gradio-container { max-width: 1200px !important; margin: 0 auto}"
+    ) as demo:
+        gr.Markdown("""
+        # 🎨 Z-Image-i2L: Image to LoRA Demo
+        > 💡 **Tip**: For best results, use 4-6 images with a consistent artistic style.
+        """)
+        with gr.Tabs():
+            with gr.TabItem("📸 Step 1: Image to LoRA"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        input_gallery = gr.Gallery(
+                            label="Upload Style Images (1-6 images)",
+                            file_types=["image"],
+                            columns=3,
+                            height=300,
+                            interactive=True
+                        )
+                        gr.Markdown("""
+                        **Guidelines:**
+                        - Upload 1-6 images with a consistent style
+                        - Higher quality images produce better results
+                        - Mix of subjects helps generalization
+                        """)
+                        generate_lora_btn = gr.Button("🎯 Generate LoRA", variant="primary")
+                    with gr.Column(scale=1):
+                        lora_output = gr.File(
+                            label="Generated LoRA File",
+                            file_types=[".safetensors"],
+                            interactive=False
+                        )
+                        lora_status = gr.Textbox(
+                            label="Status",
+                            interactive=False,
+                            lines=2
+                        )
+            with gr.TabItem("🖼️ Step 2: Generate Images"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        lora_input = gr.File(
+                            label="LoRA File (from Step 1 or upload)",
+                            file_types=[".safetensors"]
+                        )
+                        prompt = gr.Textbox(
+                            label="Prompt",
+                            placeholder="Describe what you want to generate...",
+                            value="a cat",
+                            lines=2
+                        )
+                        with gr.Accordion("Negative Prompt", open=False):
+                            negative_prompt = gr.Textbox(
+                                label="Negative Prompt",
+                                value=NEGATIVE_PROMPT_CN,
+                                lines=3
+                            )
+                            with gr.Row():
+                                use_cn_neg = gr.Button("Use Chinese", size="sm")
+                                use_en_neg = gr.Button("Use English", size="sm")
+                        with gr.Accordion("Advanced Settings", open=False):
+                            seed = gr.Number(label="Seed", value=0, precision=0)
+                            cfg_scale = gr.Slider(label="CFG Scale", minimum=1, maximum=10, value=4, step=0.5)
+                            sigma_shift = gr.Slider(label="Sigma Shift", minimum=1, maximum=15, value=8, step=1)
+                            num_steps = gr.Slider(label="Steps", minimum=20, maximum=100, value=50, step=5)
+                        generate_btn = gr.Button("✨ Generate Image", variant="primary")
+                    with gr.Column(scale=1):
+                        output_image = gr.Image(label="Generated Image", type="pil", height=512)
+                        gen_status = gr.Textbox(label="Status", interactive=False, lines=2)
+        gr.Markdown("""
+        ---
+        **Resources:** [Z-Image-i2L (HuggingFace)](https://huggingface.co/DiffSynth-Studio/Z-Image-i2L) |
+        [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) |
+        **Settings:** CFG=4, Sigma Shift=8, Steps=50
+        """)
+        # Event handlers
+        generate_lora_btn.click(
+            fn=image_to_lora,
+            inputs=[input_gallery],
+            outputs=[lora_output, lora_status]
+        )
+        lora_output.change(fn=lambda x: x, inputs=[lora_output], outputs=[lora_input])
+        generate_btn.click(
+            fn=generate_image,
+            inputs=[lora_input, prompt, negative_prompt, seed, cfg_scale, sigma_shift, num_steps],
+            outputs=[output_image, gen_status]
+        )
+        use_cn_neg.click(fn=lambda: NEGATIVE_PROMPT_CN, outputs=[negative_prompt])
+        use_en_neg.click(fn=lambda: NEGATIVE_PROMPT_EN, outputs=[negative_prompt])
+    return demo
 if __name__ == "__main__":
+    print("Starting Gradio server...")
+    demo = create_demo()
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)