Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import uuid | |
| import numpy as np | |
| import random | |
| import tempfile | |
| import zipfile | |
| import spaces | |
| import torch | |
| import gradio as gr | |
| from PIL import Image | |
| from diffusers import QwenImageLayeredPipeline | |
| from pptx import Presentation | |
| LOG_DIR = "/tmp/local" | |
| MAX_SEED = np.iinfo(np.int32).max | |
| # Optional HF login (works in Spaces if you set HF token as secret env var "hf") | |
| from huggingface_hub import login | |
| login(token=os.environ.get("hf")) | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| pipeline = QwenImageLayeredPipeline.from_pretrained( | |
| "Qwen/Qwen-Image-Layered", torch_dtype=dtype | |
| ).to(device) | |
| def ensure_dirname(path: str): | |
| if path and not os.path.exists(path): | |
| os.makedirs(path, exist_ok=True) | |
| def random_str(length=8): | |
| return uuid.uuid4().hex[:length] | |
| def imagelist_to_pptx(img_files): | |
| with Image.open(img_files[0]) as img: | |
| img_width_px, img_height_px = img.size | |
| def px_to_emu(px, dpi=96): | |
| inch = px / dpi | |
| emu = inch * 914400 | |
| return int(emu) | |
| prs = Presentation() | |
| prs.slide_width = px_to_emu(img_width_px) | |
| prs.slide_height = px_to_emu(img_height_px) | |
| slide = prs.slides.add_slide(prs.slide_layouts[6]) | |
| left = top = 0 | |
| for img_path in img_files: | |
| slide.shapes.add_picture( | |
| img_path, | |
| left, | |
| top, | |
| width=px_to_emu(img_width_px), | |
| height=px_to_emu(img_height_px), | |
| ) | |
| with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as tmp: | |
| prs.save(tmp.name) | |
| return tmp.name | |
| def _clamp_int(x, default: int, lo: int, hi: int) -> int: | |
| try: | |
| v = int(x) | |
| except Exception: | |
| v = default | |
| return max(lo, min(hi, v)) | |
| # Dynamic duration callable: must accept the same args as infer(). It returns seconds. | |
| def get_duration( | |
| input_image, | |
| seed=777, | |
| randomize_seed=False, | |
| prompt=None, | |
| neg_prompt=" ", | |
| true_guidance_scale=4.0, | |
| num_inference_steps=50, | |
| layer=4, | |
| cfg_norm=True, | |
| use_en_prompt=True, | |
| resolution=640, | |
| gpu_duration=1000, # <-- NEW | |
| ): | |
| # Allow user override via UI (text field), but keep it sane | |
| return _clamp_int(gpu_duration, default=1000, lo=20, hi=1500) | |
| def infer( | |
| input_image, | |
| seed=777, | |
| randomize_seed=False, | |
| prompt=None, | |
| neg_prompt=" ", | |
| true_guidance_scale=4.0, | |
| num_inference_steps=50, | |
| layer=4, | |
| cfg_norm=True, | |
| use_en_prompt=True, | |
| resolution=640, | |
| gpu_duration=1000, # <-- NEW (must match get_duration signature) | |
| ): | |
| # Seed | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| # Normalize resolution input | |
| resolution = _clamp_int(resolution, default=640, lo=640, hi=1024) | |
| if resolution not in (640, 1024): | |
| resolution = 640 | |
| # Normalize image input | |
| if isinstance(input_image, list): | |
| input_image = input_image[0] | |
| if isinstance(input_image, str): | |
| pil_image = Image.open(input_image).convert("RGB").convert("RGBA") | |
| elif isinstance(input_image, Image.Image): | |
| pil_image = input_image.convert("RGB").convert("RGBA") | |
| elif isinstance(input_image, np.ndarray): | |
| pil_image = Image.fromarray(input_image).convert("RGB").convert("RGBA") | |
| else: | |
| raise ValueError(f"Unsupported input_image type: {type(input_image)}") | |
| gen_device = "cuda" if torch.cuda.is_available() else "cpu" | |
| inputs = { | |
| "image": pil_image, | |
| "generator": torch.Generator(device=gen_device).manual_seed(seed), | |
| "true_cfg_scale": true_guidance_scale, | |
| "prompt": prompt, | |
| "negative_prompt": neg_prompt, | |
| "num_inference_steps": num_inference_steps, | |
| "num_images_per_prompt": 1, | |
| "layers": layer, | |
| "resolution": resolution, # 640 or 1024 | |
| "cfg_normalize": cfg_norm, | |
| "use_en_prompt": use_en_prompt, | |
| } | |
| print("INFER INPUTS:", inputs) | |
| print("REQUESTED GPU DURATION:", gpu_duration) | |
| with torch.inference_mode(): | |
| out = pipeline(**inputs) | |
| output_images = out.images[0] # list of PIL images (layers) | |
| # Prepare gallery + export files | |
| gallery_out = [] | |
| temp_files = [] | |
| for img in output_images: | |
| gallery_out.append(img) | |
| tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False) | |
| img.save(tmp.name) | |
| temp_files.append(tmp.name) | |
| pptx_path = imagelist_to_pptx(temp_files) | |
| with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmpzip: | |
| with zipfile.ZipFile(tmpzip.name, "w", zipfile.ZIP_DEFLATED) as zipf: | |
| for i, img_path in enumerate(temp_files): | |
| zipf.write(img_path, f"layer_{i+1}.png") | |
| zip_path = tmpzip.name | |
| return gallery_out, pptx_path, zip_path | |
| ensure_dirname(LOG_DIR) | |
| examples = [ | |
| "assets/test_images/1.png", | |
| "assets/test_images/2.png", | |
| "assets/test_images/3.png", | |
| "assets/test_images/4.png", | |
| "assets/test_images/5.png", | |
| "assets/test_images/6.png", | |
| "assets/test_images/7.png", | |
| "assets/test_images/8.png", | |
| "assets/test_images/9.png", | |
| "assets/test_images/10.png", | |
| "assets/test_images/11.png", | |
| "assets/test_images/12.png", | |
| "assets/test_images/13.png", | |
| ] | |
| with gr.Blocks() as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.HTML( | |
| '<img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-Image/layered/qwen-image-layered-logo.png" ' | |
| 'alt="Qwen-Image-Layered Logo" width="600" style="display: block; margin: 0 auto;">' | |
| ) | |
| gr.Markdown( | |
| """ | |
| The text prompt is intended to describe the overall content of the input image—including elements that may be partially occluded (e.g., you may specify the text hidden behind a foreground object). It is not designed to control the semantic content of individual layers explicitly. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image(label="Input Image", image_mode="RGBA") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| prompt = gr.Textbox( | |
| label="Prompt (Optional)", | |
| placeholder="Please enter the prompt to descibe the image. (Optional)", | |
| value="", | |
| lines=2, | |
| ) | |
| neg_prompt = gr.Textbox( | |
| label="Negative Prompt (Optional)", | |
| placeholder="Please enter the negative prompt", | |
| value=" ", | |
| lines=2, | |
| ) | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0, | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
| true_guidance_scale = gr.Slider( | |
| label="True guidance scale", | |
| minimum=1.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=4.0, | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label="Number of inference steps", | |
| minimum=1, | |
| maximum=50, | |
| step=1, | |
| value=50, | |
| ) | |
| layer = gr.Slider( | |
| label="Layers", | |
| minimum=2, | |
| maximum=10, | |
| step=1, | |
| value=4, | |
| ) | |
| resolution = gr.Radio( | |
| label="Processing resolution", | |
| choices=[640, 1024], | |
| value=640, | |
| ) | |
| cfg_norm = gr.Checkbox( | |
| label="Whether enable CFG normalization", value=True | |
| ) | |
| use_en_prompt = gr.Checkbox( | |
| label="Automatic caption language if no prompt provided, True for EN, False for ZH", | |
| value=True, | |
| ) | |
| # NEW: text field for GPU duration override (seconds) | |
| gpu_duration = gr.Textbox( | |
| label="GPU duration override (seconds, 20..1500)", | |
| value="1000", | |
| lines=1, | |
| placeholder="e.g. 60, 120, 300, 1000, 1500", | |
| ) | |
| run_button = gr.Button("Decompose!", variant="primary") | |
| with gr.Column(scale=2): | |
| gallery = gr.Gallery(label="Layers", columns=4, rows=1, format="png") | |
| with gr.Row(): | |
| export_file = gr.File(label="Download PPTX") | |
| export_zip_file = gr.File(label="Download ZIP") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[input_image], | |
| outputs=[gallery, export_file, export_zip_file], | |
| fn=infer, | |
| examples_per_page=14, | |
| cache_examples=False, | |
| run_on_click=True, | |
| ) | |
| run_button.click( | |
| fn=infer, | |
| inputs=[ | |
| input_image, | |
| seed, | |
| randomize_seed, | |
| prompt, | |
| neg_prompt, | |
| true_guidance_scale, | |
| num_inference_steps, | |
| layer, | |
| cfg_norm, | |
| use_en_prompt, | |
| resolution, | |
| gpu_duration, # <-- NEW | |
| ], | |
| outputs=[gallery, export_file, export_zip_file], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |