Qwen-Image-Layered

Paused

App Files Files Community

johndoe321 commited on 15 days ago

Commit

5fcfa96

verified ·

1 Parent(s): 2015b5b

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -193

app.py CHANGED Viewed

@@ -1,228 +1,222 @@
-import os
-import uuid
 import numpy as np
-import random
-import tempfile
-import spaces
-from PIL import Image
-from diffusers import QwenImageLayeredPipeline
 import torch
-from pptx import Presentation
-import gradio as gr
-LOG_DIR = "/tmp/local"
 MAX_SEED = np.iinfo(np.int32).max
-from huggingface_hub import login
-login(token=os.environ.get('hf'))
-dtype = torch.bfloat16
-device = "cuda" if torch.cuda.is_available() else "cpu"
-pipeline = QwenImageLayeredPipeline.from_pretrained("Qwen/Qwen-Image-Layered", torch_dtype=dtype).to(device)
-# pipeline.set_progress_bar_config(disable=None)
-def ensure_dirname(path: str):
-    if path and not os.path.exists(path):
-        os.makedirs(path, exist_ok=True)
-def random_str(length=8):
-    return uuid.uuid4().hex[:length]
-def imagelist_to_pptx(img_files):
-    with Image.open(img_files[0]) as img:
-        img_width_px, img_height_px = img.size
-    def px_to_emu(px, dpi=96):
-        inch = px / dpi
-        emu = inch * 914400
-        return int(emu)
-    prs = Presentation()
-    prs.slide_width = px_to_emu(img_width_px)
-    prs.slide_height = px_to_emu(img_height_px)
-    slide = prs.slides.add_slide(prs.slide_layouts[6])
-    left = top = 0
-    for img_path in img_files:
-        slide.shapes.add_picture(img_path, left, top, width=px_to_emu(img_width_px), height=px_to_emu(img_height_px))
-    with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp:
-        prs.save(tmp.name)
-        return tmp.name
-def export_gallery(images):
-    # images: list of image file paths
-    images = [e[0] for e in images]
-    pptx_path = imagelist_to_pptx(images)
-    return pptx_path
-@spaces.GPU(duration=300)
-def infer(input_image,
-          seed=777,
-          randomize_seed=False,
-          prompt=None,
-          neg_prompt=" ",
-          true_guidance_scale=4.0,
-          num_inference_steps=50,
-          layer=4,
-          cfg_norm=True,
-          use_en_prompt=True):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    if isinstance(input_image, list):
-        input_image = input_image[0]
-    if isinstance(input_image, str):
-        pil_image = Image.open(input_image).convert("RGB").convert("RGBA")
-    elif isinstance(input_image, Image.Image):
-        pil_image = input_image.convert("RGB").convert("RGBA")
-    elif isinstance(input_image, np.ndarray):
-        pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA")
-    else:
-        raise ValueError("Unsupported input_image type: %s" % type(input_image))
     inputs = {
-        "image": pil_image,
-        "generator": torch.Generator(device='cuda').manual_seed(seed),
-        "true_cfg_scale": true_guidance_scale,
         "prompt": prompt,
-        "negative_prompt": neg_prompt,
         "num_inference_steps": num_inference_steps,
         "num_images_per_prompt": 1,
-        "layers": layer,
-        "resolution": 640,      # Using different bucket (640, 1024) to determine the resolution. For this version, 640 is recommended
-        "cfg_normalize": cfg_norm,  # Whether enable cfg normalization.
-        "use_en_prompt": use_en_prompt,
     }
-    print(inputs)
-    with torch.inference_mode():
-        output = pipeline(**inputs)
-        output_images = output.images[0]
-    output = []
-    for i, image in enumerate(output_images):
-        output.append(image)
-    return output
-ensure_dirname(LOG_DIR)
-examples = [
-            "assets/test_images/1.png",
-            "assets/test_images/2.png",
-            "assets/test_images/3.png",
-            "assets/test_images/4.png",
-            "assets/test_images/5.png",
-            "assets/test_images/6.png",
-            "assets/test_images/7.png",
-            "assets/test_images/8.png",
-            "assets/test_images/9.png",
-            "assets/test_images/10.png",
-            "assets/test_images/11.png",
-            "assets/test_images/12.png",
-            "assets/test_images/13.png",
-            ]
-with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML('<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">')
         with gr.Row():
-            with gr.Column(scale=1):
-                input_image = gr.Image(label="Input Image", image_mode="RGBA")
                 prompt = gr.Textbox(
-                    label="Prompt (Optional)",
-                    placeholder="Please enter the prompt to guide the decomposition (Optional)",
-                    value="",
                     lines=2,
                 )
                 with gr.Accordion("Advanced Settings", open=False):
-                    neg_prompt = gr.Textbox(
-                        label="Negative Prompt (Optional)",
-                        placeholder="Please enter the negative prompt",
-                        value=" ",
-                        lines=2,
-                    )
                     seed = gr.Slider(
-                        label="Seed",
                         minimum=0,
                         maximum=MAX_SEED,
                         step=1,
                         value=0,
                     )
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    true_guidance_scale = gr.Slider(
-                        label="True guidance scale",
-                        minimum=1.0,
-                        maximum=10.0,
-                        step=0.1,
-                        value=4.0
-                    )
                     num_inference_steps = gr.Slider(
-                        label="Number of inference steps",
-                        minimum=1,
-                        maximum=50,
                         step=1,
-                        value=50,
                     )
-                    layer = gr.Slider(
-                        label="Layers",
-                        minimum=2,
-                        maximum=10,
-                        step=1,
-                        value=4,
                     )
-                    cfg_norm = gr.Checkbox(label="Whether enable CFG normalization", value=True)
-                    use_en_prompt = gr.Checkbox(label="Automatic caption language if no prompt provided, True for EN, False for ZH", value=True)
-                run_button = gr.Button("Decompose!", variant="primary")
-            with gr.Column(scale=1):
-                gallery = gr.Gallery(label="Layers", columns=4, rows=1, format="png")
-                export_btn = gr.Button("Export as PPTX")
-                export_file = gr.File(label="Download PPTX")
-    gr.Examples(examples=examples,
-                inputs=[input_image],
-                outputs=[gallery],
-                fn=infer,
-                examples_per_page=14,
-                cache_examples=False,
-                run_on_click=True
-    )
-    export_btn.click(
-        fn=export_gallery,
-        inputs=gallery,
-        outputs=export_file
-    )
-    run_button.click(
-        fn=infer,
         inputs=[
-            input_image,
-            seed,
-            randomize_seed,
             prompt,
-            neg_prompt,
-            true_guidance_scale,
             num_inference_steps,
-            layer,
-            cfg_norm,
-            use_en_prompt,
-        ],
-        outputs=gallery,
     )
 if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
 import numpy as np
 import torch
+from PIL import Image
+# `spaces` is required for Hugging Face Spaces to function correctly
+import spaces
+from diffusers import QwenImageEditPlusPipeline
+# --- Configuration ---
+# The model name on the Hugging Face Hub
+MODEL_NAME = "Qwen/Qwen-Image-Edit-2509"
+# --- Global Initialization ---
+# This code runs only once when the Space boots up, loading the model into memory.
+print(f"PyTorch version: {torch.__version__}")
+# We don't need to manually set the device. `device_map="auto"` will handle everything.
+# We only specify the data type for the weights.
+torch_dtype = torch.bfloat16
+print(f"Loading model: {MODEL_NAME}...")
+print("The model will be automatically sharded across all available GPUs (8x L40S).")
+try:
+    # This is the most important line for multi-GPU memory distribution.
+    # device_map="auto" instructs the accelerate library to split the model
+    # across all available GPUs, balancing the memory load.
+    pipe = QwenImageEditPlusPipeline.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=torch_dtype,
+        device_map="auto",
+        # Adding a safety checker can be memory-intensive, the base pipeline might not need it
+        # but if it fails, you can try to force disable it.
+        # safety_checker=None,
+    )
+    pipe.set_progress_bar_config(disable=None)
+    print("✅ Model loaded successfully and distributed across GPUs.")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    pipe = None # Ensure pipe is None if loading fails
 MAX_SEED = np.iinfo(np.int32).max
+# --- Gradio Function ---
+# The @spaces.GPU decorator is still needed on HF Spaces to indicate this function
+# uses the GPU. It helps the platform manage resources.
+@spaces.GPU
+def edit_images(
+    image1: Image.Image,
+    image2: Image.Image,
+    prompt: str,
+    negative_prompt: str,
+    seed: int,
+    num_inference_steps: int,
+    guidance_scale: float,
+    true_cfg_scale: float,
+    progress=gr.Progress(track_tqdm=True),
+):
+    if pipe is None:
+        gr.Error("The model is not available. Please check the Space logs for errors during startup.")
+        return None, seed
+    # 必须有 prompt，且至少有一张图片
+    if not prompt:
+        gr.Warning("Please provide a text prompt.")
+        return None, seed
+    images = []
+    if image1 is not None:
+        images.append(image1)
+    if image2 is not None:
+        images.append(image2)
+    if len(images) == 0:
+        gr.Warning("Please upload at least one image.")
+        return None, seed
+    num_inference_steps = int(num_inference_steps)
+    negative_prompt_value = negative_prompt if negative_prompt else " "
+    # If seed is 0, generate a random one for reproducibility
+    if seed == 0:
+        seed = np.random.randint(1, MAX_SEED)
+    # IMPORTANT: The pipeline is already sharded across GPUs due to device_map="auto".
+    # We create the generator on the primary device ('cuda' or 'cuda:0').
+    # PyTorch's distributed backend will handle the rest.
+    try:
+        generator = torch.Generator(device="cuda").manual_seed(seed)
+    except RuntimeError:
+        # Fallback if 'cuda' isn't the main device name in some environments
+        generator = torch.Generator(device="cuda:0").manual_seed(seed)
     inputs = {
+        "image": images,
         "prompt": prompt,
+        "generator": generator,
+        "true_cfg_scale": true_cfg_scale,
+        "negative_prompt": negative_prompt_value,
         "num_inference_steps": num_inference_steps,
+        "guidance_scale": guidance_scale,
         "num_images_per_prompt": 1,
     }
+    try:
+        with torch.inference_mode():
+            output = pipe(**inputs)
+            output_image = output.images[0]
+        return output_image, seed
+    except Exception as e:
+        print(f"An error occurred during inference: {e}")
+        gr.Error(f"Inference failed: {e}")
+        return None, seed
+# --- Gradio UI ---
+css = """
+#col-container {
+    margin: 0 auto;
+    max-width: 900px;
+}
+"""
+with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(
+            """
+            # Qwen Image Edit Plus
+            ### 8x L40S Memory-Sharded Inference
+            This application shards the model across all 8 GPUs to handle its large memory footprint for a single request.
+            """
+        )
+        if pipe is None:
+            gr.Markdown(
+                """
+                <span style="color: red;">**Model failed to load. Check the Space logs.**</span>
+                """
+            )
         with gr.Row():
+            with gr.Column():
+                image1 = gr.Image(
+                    label="Input Image 1 (Required)",
+                    type="pil",
+                )
+                image2 = gr.Image(
+                    label="Input Image 2 (Optional)",
+                    type="pil",
+                )
+            with gr.Column():
                 prompt = gr.Textbox(
+                    label="Text Prompt",
+                    lines=4,
+                    placeholder="Describe how the image(s) should be edited...",
+                )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt (optional)",
                     lines=2,
+                    placeholder="Describe what you want to avoid...",
                 )
                 with gr.Accordion("Advanced Settings", open=False):
                     seed = gr.Slider(
+                        label="Seed (0 for random)",
                         minimum=0,
                         maximum=MAX_SEED,
                         step=1,
                         value=0,
                     )
                     num_inference_steps = gr.Slider(
+                        label="Inference Steps",
+                        minimum=5,
+                        maximum=60,
                         step=1,
+                        value=40,
                     )
+                    guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        minimum=0.0,
+                        maximum=5.0,
+                        step=0.1,
+                        value=1.0,
                     )
+                    true_cfg_scale = gr.Slider(
+                        label="True CFG Scale",
+                        minimum=1.0,
+                        maximum=8.0,
+                        step=0.5,
+                        value=4.0,
+                    )
+                run_button = gr.Button("Generate", variant="primary")
+        result = gr.Image(label="Edited Image", show_label=True)
+    gr.on(
+        triggers=[run_button.click, prompt.submit],
+        fn=edit_images,
         inputs=[
+            image1,
+            image2,
             prompt,
+            negative_prompt,
+            seed,
             num_inference_steps,
+            guidance_scale,
+            true_cfg_scale,
+        ],
+        outputs=[result, seed],
     )
 if __name__ == "__main__":
+    # For HF Spaces, we use .queue() to manage user requests.
+    # Setting a concurrency_count allows handling multiple users.
+    # A value of 2-4 is safe even on 8xL40S, as each inference is heavy.
+    # A higher value risks OOM if all 8 GPUs are already maxed out by one sharded model.
+    demo.queue(concurrency_count=2).launch()