""" Minimal Gradio wrapper for the given Qwen-Image-Edit inference script. Features: - Loads the model once and reuses it. - Inputs: image, edit prompt, cond_b, cond_delta, optional model path. - Matches your original settings (size 1024, steps=24, true_cfg_scale=4.0, fixed seed=42, and the same GRAG scale structure repeated 60 times). Run: pip install gradio pillow torch # plus your project deps providing hacked_models/* and model weights python gradio_qwen_edit_minimal.py Then open the local URL printed by Gradio. """ import os from typing import Optional import gradio as gr import torch from PIL import Image from huggingface_hub import snapshot_download import os # --- your project imports (as in the original script) --- from hacked_models.scheduler import FlowMatchEulerDiscreteScheduler from hacked_models.pipeline import QwenImageEditPipeline from hacked_models.models import QwenImageTransformer2DModel from hacked_models.utils import seed_everything from huggingface_hub import snapshot_download from requests.exceptions import ChunkedEncodingError from urllib3.exceptions import ProtocolError import os, time def robust_snapshot_download(repo_id, local_dir, token=None, retries=5): os.makedirs(local_dir, exist_ok=True) os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") # 可选:更稳更快 last_err = None for i in range(retries): try: return snapshot_download( repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False, resume_download=True, # 断点续传 use_auth_token=token, max_workers=1 # 并行下载分片 ) except (ChunkedEncodingError, ProtocolError) as e: last_err = e wait = min(2**i, 30) print(f"[download] network error {i+1}/{retries}: {e}; retry in {wait}s", flush=True) time.sleep(wait) raise RuntimeError(f"Download failed after {retries} retries: {last_err}") # ----------------------------- # Global state # ----------------------------- _DEVICE = "cuda" if torch.cuda.is_available() else "cpu" _DTYPE = torch.bfloat16 if _DEVICE == "cuda" else torch.float32 _PIPELINE: Optional[QwenImageEditPipeline] = None _LOADED_MODEL_PATH: Optional[str] = None def _load_pipeline(model_path: str) -> QwenImageEditPipeline: """Load (or reuse) the pipeline for the given model_path.""" global _PIPELINE, _LOADED_MODEL_PATH if _PIPELINE is not None and _LOADED_MODEL_PATH == model_path: return _PIPELINE # Set seed once (matches original) seed_everything(42) # Load components scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained( os.path.join(model_path, "scheduler"), torch_dtype=_DTYPE ) transformer = QwenImageTransformer2DModel.from_pretrained( os.path.join(model_path, "transformer"), torch_dtype=_DTYPE ) pipe = QwenImageEditPipeline.from_pretrained( model_path, torch_dtype=_DTYPE, scheduler=scheduler, transformer=transformer ) pipe.set_progress_bar_config(disable=None) pipe.to(_DTYPE) pipe.to(_DEVICE) _PIPELINE = pipe _LOADED_MODEL_PATH = model_path return pipe def _build_grag_scale(cond_b: float, cond_delta: float, repeats: int = 60): """Replicates your original GRAG schedule structure. Each element is: ((512, 1.0, 1.0), (4096, cond_b, cond_delta)) """ return [((512, 1.0, 1.0), (4096, cond_b, cond_delta))] * repeats def predict( image: Image.Image, edit_prompt: str, cond_b: float, cond_delta: float, ): if image is None or not edit_prompt: return None # Match original preprocessing input_image = image.convert("RGB").resize((1024, 1024)) inputs = { "image": input_image, "prompt": edit_prompt, "generator": torch.manual_seed(42), "true_cfg_scale": 4.0, "negative_prompt": " ", "num_inference_steps": 24, "return_dict": False, "grag_scale": _build_grag_scale(cond_b, cond_delta, repeats=60), } with torch.inference_mode(): image_batch, x0_images, saved_outputs = pipe(**inputs) # Return the first image (same as original save behavior) return image_batch[0] model_dir = "Qwen-Image-Edit" repo_id = "Qwen/Qwen-Image-Edit" if not os.path.exists(model_dir) or not os.listdir(model_dir): robust_snapshot_download(repo_id, model_dir, token=os.getenv("HF_TOKEN")) print(f"Model downloaded to {model_dir}") else: print(f"Model already exists at {model_dir}") pipe = _load_pipeline(model_dir) with gr.Blocks(title="Qwen Image Edit — Minimal GRAG Demo") as demo: gr.Markdown("# Qwen Image Edit — Minimal GRAG Demo\nUpload an image, enter your edit instruction, and set GRAG params.") with gr.Row(): in_image = gr.Image(label="Input Image", type="pil") out_image = gr.Image(label="Edited Output", type="pil") edit_prompt = gr.Textbox(label="Edit Instruction", placeholder="e.g., Put a pair of black-framed glasses on him.") with gr.Row(): cond_b = gr.Slider(label="cond_b", minimum=0.8, maximum=2.0, value=1.0, step=0.01) cond_delta = gr.Slider(label="cond_delta", minimum=0.8, maximum=2.0, value=1.0, step=0.01) run_btn = gr.Button("Run Edit") run_btn.click( fn=predict, inputs=[in_image, edit_prompt, cond_b, cond_delta], outputs=[out_image], api_name="run_edit", ) gr.Markdown( """ **Notes** - Uses fixed seed=42 and num_inference_steps=24 to match your script. - Resizes the input to 1024×1024 before inference (as in your code). - `grag_scale` is built as a list of length 60 with the same tuples. - Automatically chooses CUDA if available; otherwise runs on CPU. """ ) if __name__ == "__main__": demo.queue().launch(share=True)