RefineAnything

Runtime error

zhoudewei.666 commited on Apr 12

Commit

f291567

1 Parent(s): 12c29f6

fix: adapt for ZeroGPU - move model loading to module level, add missing deps

- Move pipeline init to module level so ZeroGPU handles CPU/GPU transfer
- Add torchvision, sentencepiece, protobuf to requirements
- Fix ImageSlider import from gradio_imageslider
- Remove model_dir/device/output_path UI controls (not applicable on Spaces)
- Set hardware: zero-gpu in README

Made-with: Cursor

Files changed (3) hide show

README.md +1 -0
app.py +53 -119
requirements.txt +4 -1

README.md CHANGED Viewed

@@ -8,6 +8,7 @@ sdk_version: 6.5.1
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: apache-2.0
+hardware: zero-gpu
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -3,21 +3,16 @@ import os
 import threading
 import time
 try:
     import spaces
     _HAS_SPACES = True
 except ImportError:
     _HAS_SPACES = False
-def setup_debug():
-    import debugpy
-    rank = int(os.environ.get("RANK", 0))
-    if rank == 0:
-        debugpy.listen(5679)
-        print("wait for debug")
-        debugpy.wait_for_client()
 def calculate_dimensions(target_area: int, ratio: float):
     width = math.sqrt(target_area * ratio)
     height = width / ratio
@@ -84,69 +79,60 @@ _HF_LORA_REPO = "limuloo1999/RefineAnything"
 _HF_LORA_FILENAME = "Qwen-Image-Edit-2511-RefineAny.safetensors"
 _HF_LORA_ADAPTER = "refine_anything"
-_PIPELINE = None
-_PIPELINE_KEY = None
-_LORA_LOADED = False
 _LIGHTNING_LOADED = False
 _PIPELINE_LOCK = threading.Lock()
-def _ensure_hf_lora() -> str:
-    """Download the LoRA weights from HuggingFace Hub and return the local path."""
-    from huggingface_hub import hf_hub_download
-    return hf_hub_download(repo_id=_HF_LORA_REPO, filename=_HF_LORA_FILENAME)
-def _get_pipeline(model_dir: str, device: str, load_lightning_lora: bool):
-    global _PIPELINE, _PIPELINE_KEY, _LORA_LOADED, _LIGHTNING_LOADED
-    base_key = (model_dir, device)
-    with _PIPELINE_LOCK:
-        if _PIPELINE is None or _PIPELINE_KEY != base_key:
-            import torch
-            from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
-            scheduler_config = {
-                "base_image_seq_len": 256,
-                "base_shift": math.log(3),
-                "invert_sigmas": False,
-                "max_image_seq_len": 8192,
-                "max_shift": math.log(3),
-                "num_train_timesteps": 1000,
-                "shift": 1.0,
-                "shift_terminal": None,
-                "stochastic_sampling": False,
-                "time_shift_type": "exponential",
-                "use_beta_sigmas": False,
-                "use_dynamic_shifting": True,
-                "use_exponential_sigmas": False,
-                "use_karras_sigmas": False,
-            }
-            scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
-            pipe = QwenImageEditPlusPipeline.from_pretrained(
-                model_dir,
-                torch_dtype=torch.bfloat16,
-                scheduler=scheduler,
-            )
-            pipe.to(device)
-            pipe.set_progress_bar_config(disable=None)
-            _PIPELINE = pipe
-            _PIPELINE_KEY = base_key
-            _LORA_LOADED = False
-            _LIGHTNING_LOADED = False
-        if not _LORA_LOADED:
-            local_path = _ensure_hf_lora()
-            lora_dir = os.path.dirname(local_path)
-            weight_name = os.path.basename(local_path)
-            _PIPELINE.load_lora_weights(lora_dir, weight_name=weight_name, adapter_name=_HF_LORA_ADAPTER)
-            _LORA_LOADED = True
         if load_lightning_lora and not _LIGHTNING_LOADED:
-            from huggingface_hub import hf_hub_download
             lightning_path = hf_hub_download(
                 repo_id="lightx2v/Qwen-Image-Edit-2511-Lightning",
                 filename="Qwen-Image-Edit-2511-Lightning-8steps-V1.0-bf16.safetensors",
@@ -171,11 +157,7 @@ def _get_pipeline(model_dir: str, device: str, load_lightning_lora: bool):
         return _PIPELINE
-def build_app(
-    *,
-    default_model_dir: str,
-    default_device: str,
-):
     import base64
     import gradio as gr
     import inspect
@@ -636,14 +618,11 @@ def build_app(
         mode,
         spatial_source,
         spatial_bbox_margin,
-        model_dir,
-        device,
         seed,
         steps,
         true_cfg_scale,
         guidance_scale,
         negative_prompt,
-        output_path,
         load_lightning_lora,
         paste_back_bbox,
         paste_back_mode,
@@ -653,8 +632,6 @@ def build_app(
         paste_blend_kernel,
         not_use_spatial_vae,
     ):
-        import torch
         prompt = (prompt or "").strip()
         if not prompt:
             raise gr.Error("prompt 为空")
@@ -721,25 +698,13 @@ def build_app(
         if mode == "仅生成prompt":
             return (img_pil, img_pil), prompt_for_model, info, vis, "完成"
-        model_dir = (model_dir or "").strip()
-        if not model_dir:
-            raise gr.Error("model_dir 不能为空")
-        if os.path.exists(model_dir) and not os.path.isdir(model_dir):
-            raise gr.Error(f"model_dir 不是目录: {model_dir}")
-        device = (device or "").strip() or "cuda"
         seed = int(seed) if seed is not None and str(seed).strip() else 0
         steps = int(steps) if steps is not None and str(steps).strip() else 8
         true_cfg_scale = float(true_cfg_scale) if true_cfg_scale is not None and str(true_cfg_scale).strip() else 4.0
         guidance_scale = float(guidance_scale) if guidance_scale is not None and str(guidance_scale).strip() else 1.0
         negative_prompt = negative_prompt if negative_prompt is not None else " "
-        pipe = _get_pipeline(
-            model_dir=model_dir,
-            device=device,
-            load_lightning_lora=bool(load_lightning_lora),
-        )
         img = img_for_model if image2_for_model is None else [img_for_model, image2_for_model]
         if spatial_mask_l is not None:
@@ -748,7 +713,7 @@ def build_app(
                 img = img + [spatial_rgb]
             else:
                 img = [img, spatial_rgb]
-        gen = torch.Generator(device=device)
         gen.manual_seed(seed)
         t0 = time.time()
@@ -765,10 +730,6 @@ def build_app(
                     num_images_per_prompt=1,
                     not_use_spatial_vae=bool(not_use_spatial_vae),
                 )
-                # img[0].save('input0.png')
-                # img[1].save('input1.png')
-                # print(img[0].size, img[1].size, out.images[0].size)
-                # out.images[0].save('./zdw_debug.png')
             except Exception as e:
                 raise gr.Error(f"推理失败: {type(e).__name__}: {e}")
         dt = time.time() - t0
@@ -794,26 +755,7 @@ def build_app(
                 else:
                     out_img = out_img_crop
-        output_path = (output_path or "").strip()
-        saved = ""
-        if output_path:
-            if os.path.isdir(output_path):
-                raise gr.Error(f"output_path 不能是目录: {output_path}")
-            parent = os.path.dirname(os.path.abspath(output_path)) or "."
-            if not os.path.isdir(parent):
-                raise gr.Error(f"output_path 的父目录不存在: {parent}")
-            out_img.save(output_path)
-            saved = os.path.abspath(output_path)
-            base, ext = os.path.splitext(saved)
-            img_pil.save(base + "_input" + ext)
-            if image2 is not None:
-                image2.save(base + "_ref" + ext)
-            if mask_pil_l is not None:
-                mask_pil_l.save(base + "_mask.png")
         status = f"完成 ({dt:.2f}s)"
-        if saved:
-            status += f" 已保存: {saved}"
         return (img_pil, out_img), prompt_for_model, info, vis, status
     if _HAS_SPACES:
@@ -844,14 +786,11 @@ def build_app(
     spatial_source = gr.Radio(["mask", "bbox"], value="mask", label="空间提示来源（作为 mask 输入模型）")
     spatial_bbox_margin = gr.Number(label="spatial_bbox_margin", value=0, precision=0)
-    model_dir = gr.Textbox(label="model_dir", value=default_model_dir)
-    device = gr.Textbox(label="device", value=default_device)
     seed = gr.Number(label="seed", value=0, precision=0)
     steps = gr.Number(label="num_inference_steps", value=8, precision=0)
     true_cfg_scale = gr.Number(label="true_cfg_scale", value=4.0)
     guidance_scale = gr.Number(label="guidance_scale", value=1.0)
     negative_prompt = gr.Textbox(label="negative_prompt", value=" ")
-    output_path = gr.Textbox(label="output_path（可空）", value="")
     load_lightning_lora = gr.Checkbox(label="加载加速 LoRA（Lightning）", value=False)
@@ -864,7 +803,8 @@ def build_app(
     not_use_spatial_vae = gr.Checkbox(label="不使用 spatial VAE（not_use_spatial_vae）", value=False)
-    out_image = gr.ImageSlider(label="对比：原图 vs 输出", show_label=True)
     replaced_prompt = gr.Textbox(label="实际使用的 prompt", lines=4)
     bbox_info = gr.Textbox(label="区域信息", lines=2)
     image1_vis = gr.Image(label="model_input(vit384) + 区域可视化", type="pil")
@@ -879,14 +819,11 @@ def build_app(
             mode,
             spatial_source,
             spatial_bbox_margin,
-            model_dir,
-            device,
             seed,
             steps,
             true_cfg_scale,
             guidance_scale,
             negative_prompt,
-            output_path,
             load_lightning_lora,
             paste_back_bbox,
             paste_back_mode,
@@ -897,15 +834,12 @@ def build_app(
             not_use_spatial_vae,
         ],
         outputs=[out_image, replaced_prompt, bbox_info, image1_vis, status],
-        title="Qwen-Image-Edit GUI Tester",
     )
     return demo
-demo = build_app(
-    default_model_dir=os.environ.get("MODEL_DIR", "Qwen/Qwen-Image-Edit-2511"),
-    default_device="cuda",
-)
 if __name__ == "__main__":
     demo.launch(show_error=True)

 import threading
 import time
+import torch
+from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
+from huggingface_hub import hf_hub_download
 try:
     import spaces
     _HAS_SPACES = True
 except ImportError:
     _HAS_SPACES = False
 def calculate_dimensions(target_area: int, ratio: float):
     width = math.sqrt(target_area * ratio)
     height = width / ratio
 _HF_LORA_FILENAME = "Qwen-Image-Edit-2511-RefineAny.safetensors"
 _HF_LORA_ADAPTER = "refine_anything"
 _LIGHTNING_LOADED = False
 _PIPELINE_LOCK = threading.Lock()
+def _build_pipeline(model_dir: str):
+    """Build the pipeline at module level. ZeroGPU intercepts .to('cuda')
+    and keeps the model on CPU until a @spaces.GPU function runs."""
+    scheduler_config = {
+        "base_image_seq_len": 256,
+        "base_shift": math.log(3),
+        "invert_sigmas": False,
+        "max_image_seq_len": 8192,
+        "max_shift": math.log(3),
+        "num_train_timesteps": 1000,
+        "shift": 1.0,
+        "shift_terminal": None,
+        "stochastic_sampling": False,
+        "time_shift_type": "exponential",
+        "use_beta_sigmas": False,
+        "use_dynamic_shifting": True,
+        "use_exponential_sigmas": False,
+        "use_karras_sigmas": False,
+    }
+    scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
+    pipe = QwenImageEditPlusPipeline.from_pretrained(
+        model_dir,
+        torch_dtype=torch.bfloat16,
+        scheduler=scheduler,
+    )
+    pipe.set_progress_bar_config(disable=None)
+    local_path = hf_hub_download(
+        repo_id=_HF_LORA_REPO,
+        filename=_HF_LORA_FILENAME,
+    )
+    lora_dir = os.path.dirname(local_path)
+    weight_name = os.path.basename(local_path)
+    pipe.load_lora_weights(lora_dir, weight_name=weight_name, adapter_name=_HF_LORA_ADAPTER)
+    pipe.to("cuda")
+    return pipe
+_DEFAULT_MODEL_DIR = os.environ.get("MODEL_DIR", "Qwen/Qwen-Image-Edit-2511")
+print(f"[startup] Loading pipeline from {_DEFAULT_MODEL_DIR} ...")
+_PIPELINE = _build_pipeline(_DEFAULT_MODEL_DIR)
+print("[startup] Pipeline ready.")
+def _get_pipeline(load_lightning_lora: bool):
+    global _LIGHTNING_LOADED
+    with _PIPELINE_LOCK:
         if load_lightning_lora and not _LIGHTNING_LOADED:
             lightning_path = hf_hub_download(
                 repo_id="lightx2v/Qwen-Image-Edit-2511-Lightning",
                 filename="Qwen-Image-Edit-2511-Lightning-8steps-V1.0-bf16.safetensors",
         return _PIPELINE
+def build_app():
     import base64
     import gradio as gr
     import inspect
         mode,
         spatial_source,
         spatial_bbox_margin,
         seed,
         steps,
         true_cfg_scale,
         guidance_scale,
         negative_prompt,
         load_lightning_lora,
         paste_back_bbox,
         paste_back_mode,
         paste_blend_kernel,
         not_use_spatial_vae,
     ):
         prompt = (prompt or "").strip()
         if not prompt:
             raise gr.Error("prompt 为空")
         if mode == "仅生成prompt":
             return (img_pil, img_pil), prompt_for_model, info, vis, "完成"
         seed = int(seed) if seed is not None and str(seed).strip() else 0
         steps = int(steps) if steps is not None and str(steps).strip() else 8
         true_cfg_scale = float(true_cfg_scale) if true_cfg_scale is not None and str(true_cfg_scale).strip() else 4.0
         guidance_scale = float(guidance_scale) if guidance_scale is not None and str(guidance_scale).strip() else 1.0
         negative_prompt = negative_prompt if negative_prompt is not None else " "
+        pipe = _get_pipeline(load_lightning_lora=bool(load_lightning_lora))
         img = img_for_model if image2_for_model is None else [img_for_model, image2_for_model]
         if spatial_mask_l is not None:
                 img = img + [spatial_rgb]
             else:
                 img = [img, spatial_rgb]
+        gen = torch.Generator(device="cuda")
         gen.manual_seed(seed)
         t0 = time.time()
                     num_images_per_prompt=1,
                     not_use_spatial_vae=bool(not_use_spatial_vae),
                 )
             except Exception as e:
                 raise gr.Error(f"推理失败: {type(e).__name__}: {e}")
         dt = time.time() - t0
                 else:
                     out_img = out_img_crop
         status = f"完成 ({dt:.2f}s)"
         return (img_pil, out_img), prompt_for_model, info, vis, status
     if _HAS_SPACES:
     spatial_source = gr.Radio(["mask", "bbox"], value="mask", label="空间提示来源（作为 mask 输入模型）")
     spatial_bbox_margin = gr.Number(label="spatial_bbox_margin", value=0, precision=0)
     seed = gr.Number(label="seed", value=0, precision=0)
     steps = gr.Number(label="num_inference_steps", value=8, precision=0)
     true_cfg_scale = gr.Number(label="true_cfg_scale", value=4.0)
     guidance_scale = gr.Number(label="guidance_scale", value=1.0)
     negative_prompt = gr.Textbox(label="negative_prompt", value=" ")
     load_lightning_lora = gr.Checkbox(label="加载加速 LoRA（Lightning）", value=False)
     not_use_spatial_vae = gr.Checkbox(label="不使用 spatial VAE（not_use_spatial_vae）", value=False)
+    from gradio_imageslider import ImageSlider
+    out_image = ImageSlider(label="对比：原图 vs 输出", show_label=True)
     replaced_prompt = gr.Textbox(label="实际使用的 prompt", lines=4)
     bbox_info = gr.Textbox(label="区域信息", lines=2)
     image1_vis = gr.Image(label="model_input(vit384) + 区域可视化", type="pil")
             mode,
             spatial_source,
             spatial_bbox_margin,
             seed,
             steps,
             true_cfg_scale,
             guidance_scale,
             negative_prompt,
             load_lightning_lora,
             paste_back_bbox,
             paste_back_mode,
             not_use_spatial_vae,
         ],
         outputs=[out_image, replaced_prompt, bbox_info, image1_vis, status],
+        title="RefineAnything - Qwen Image Edit",
     )
     return demo
+demo = build_app()
 if __name__ == "__main__":
     demo.launch(show_error=True)

requirements.txt CHANGED Viewed

@@ -5,5 +5,8 @@ attrs
 gradio_imageslider
 git+https://github.com/huggingface/diffusers
 torch
 accelerate
-safetensors

 gradio_imageslider
 git+https://github.com/huggingface/diffusers
 torch
+torchvision
 accelerate
+safetensors
+sentencepiece
+protobuf