import os import subprocess import sys import spaces import torch from optimization import optimize_pipeline_ from diffusers import QwenImageEditPlusPipeline from PIL import Image import gradio as gr HF_BASE_MODEL = "Qwen/Qwen-Image-Edit-2509" BFS_LORA = "Alissonerdx/BFS-Best-Face-Swap" BFS_LORA_WEIGHT = "bfs_head_v3_qwen_image_edit_2509.safetensors" # Head V3 (recommended) # --------- PIPELINE (ZERO GPU) --------- # Lưu device để dùng lại trong quá trình suy luận. EXEC_DEVICE = "cpu" # Cho phép ép dùng CPU nếu GPU yếu hoặc hay abort (ZeroGPU không ổn định). FORCE_CPU = bool(int(os.getenv("FORCE_CPU", "1"))) # Chỉ bật GPU khi thực sự muốn (mặc định = 0 để tránh abort vì OOM trên ZeroGPU). PREFER_GPU = bool(int(os.getenv("PREFER_GPU", "0"))) def ensure_torchvision(): """ Qwen2VLProcessor yêu cầu torchvision. Thử import, nếu thiếu sẽ cài đặt phiên bản khớp torch. """ try: import torchvision # noqa: F401 return except ImportError: torch_version = torch.__version__.split("+")[0] try: subprocess.check_call( [sys.executable, "-m", "pip", "install", f"torchvision=={torch_version}"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) import torchvision # noqa: F401 except Exception as exc: # pragma: no cover - chỉ chạy trên hạ tầng Spaces raise ImportError( "Torchvision is required for Qwen2VLProcessor. " "Please add a matching torchvision to requirements (e.g. pip install torchvision==torch_version)." ) from exc def _build_pipeline(device: str, dtype: torch.dtype): ensure_torchvision() pipe = QwenImageEditPlusPipeline.from_pretrained( HF_BASE_MODEL, torch_dtype=dtype, ) pipe.to(device) # giảm chiếm dụng VRAM/RAM pipe.enable_attention_slicing() pipe.enable_vae_slicing() # load LoRA BFS Head V3 pipe.load_lora_weights( BFS_LORA, weight_name=BFS_LORA_WEIGHT, adapter_name="bfs_head_v3", ) pipe.set_adapters(["bfs_head_v3"], adapter_weights=[1.0]) pipe.set_progress_bar_config(disable=True) return pipe def maybe_optimize_pipeline(pipe): """ Áp dụng AOTI tối ưu hóa trên GPU (nếu đang chạy CUDA). Dùng input dummy nhỏ để tránh tốn VRAM, fallback im lặng nếu lỗi. """ if EXEC_DEVICE != "cuda": return pipe try: dummy = Image.new("RGB", (256, 256)) generator = torch.Generator(device="cuda").manual_seed(0) optimize_pipeline_( pipe, image=[dummy, dummy], prompt="warmup", negative_prompt=" ", num_inference_steps=1, true_cfg_scale=1.0, guidance_scale=1.0, num_images_per_prompt=1, generator=generator, width=256, height=256, ) except Exception: # Nếu tối ưu thất bại (thường do bộ nhớ), giữ pipeline gốc để tiếp tục chạy. pass return pipe @spaces.GPU # bắt buộc cho ZeroGPU def load_pipeline(): global EXEC_DEVICE # Mặc định chạy CPU để tránh GPU abort. Bật GPU bằng PREFER_GPU=1 và FORCE_CPU=0. prefer_cuda = torch.cuda.is_available() and PREFER_GPU and not FORCE_CPU device = "cuda" if prefer_cuda else "cpu" dtype = torch.float16 if device == "cuda" else torch.float32 try: pipe = _build_pipeline(device, dtype) EXEC_DEVICE = device pipe = maybe_optimize_pipeline(pipe) return pipe except Exception as exc: # GPU worker thường abort vì OOM. Fallback CPU để không crash app. if device == "cuda": device = "cpu" dtype = torch.float32 pipe = _build_pipeline(device, dtype) EXEC_DEVICE = device return pipe raise exc pipe = load_pipeline() # --------- UTILITIES --------- def resize_to_max(img: Image.Image, max_side: int = 896) -> Image.Image: w, h = img.size max_dim = max(w, h) if max_dim <= max_side: return img # không upscale scale = max_side / max_dim new_w = int(w * scale) new_h = int(h * scale) return img.resize((new_w, new_h), Image.Resampling.LANCZOS) DEFAULT_PROMPT = ( "head_swap: start with Picture 1 as the base image, keeping its lighting, " "environment, and background. remove the head from Picture 1 completely and " "replace it with the head from Picture 2. ensure the head and body have correct " "anatomical proportions, and blend the skin tones, shadows, and lighting naturally " "so the final result appears as one coherent, realistic person." ) # --------- INFERENCE FUNCTION --------- def run_bfs( body_image, # Picture 1 (body) face_image, # Picture 2 (face) prompt_text, steps, true_cfg_scale, guidance_scale, seed, ): if body_image is None or face_image is None: return None, "⚠️ Cần upload đủ 2 ảnh: Picture 1 (body) và Picture 2 (face)." # BFS Head V3: Image 1 = body, Image 2 = face body_image = resize_to_max(body_image) face_image = resize_to_max(face_image) if not str(prompt_text).strip(): prompt = DEFAULT_PROMPT else: prompt = prompt_text.strip() generator = torch.Generator(device=EXEC_DEVICE).manual_seed(int(seed)) inputs = { "image": [body_image, face_image], # rất quan trọng: [body, face] "prompt": prompt, "negative_prompt": " ", "num_inference_steps": int(steps), "true_cfg_scale": float(true_cfg_scale), "guidance_scale": float(guidance_scale), "num_images_per_prompt": 1, "generator": generator, "width": body_image.width, "height": body_image.height, } with torch.inference_mode(): out = pipe(**inputs) return out.images[0], "" # --------- GRADIO UI --------- with gr.Blocks(title="BFS - Best Face Swap (Qwen Image Edit 2509, CPU)") as demo: gr.Markdown( """ # 🧠 BFS - Best Face Swap (Qwen Image Edit 2509, CPU) **BFS Head V3** – Picture 1 = **Body**, Picture 2 = **Face**. Model chạy trên **CPU (zero GPU)** nên sẽ hơi chậm, ưu tiên ảnh vừa phải (≤ 896px cạnh dài). > Vui lòng không dùng cho người thật / người nổi tiếng ngoài đời. """ ) with gr.Row(): with gr.Column(): body_image = gr.Image( label="Picture 1 - BODY (ảnh gốc, giữ background)", type="pil", ) face_image = gr.Image( label="Picture 2 - FACE (ảnh mặt muốn ghép)", type="pil", ) prompt_box = gr.Textbox( label="Prompt (để trống dùng prompt BFS Head V3 mặc định)", value="", lines=4, ) steps = gr.Slider( label="Steps", minimum=8, maximum=40, value=24, step=1, ) true_cfg_scale = gr.Slider( label="True CFG Scale", minimum=0.0, maximum=10.0, value=4.0, step=0.1, ) guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.0, maximum=8.0, value=1.0, step=0.1, ) seed = gr.Number( label="Seed", value=0, precision=0, ) run_button = gr.Button("🚀 Run Face / Head Swap", variant="primary") with gr.Column(): output_image = gr.Image( label="Kết quả", type="pil", ) info = gr.Markdown("") run_button.click( fn=run_bfs, inputs=[body_image, face_image, prompt_box, steps, true_cfg_scale, guidance_scale, seed], outputs=[output_image, info], ) if __name__ == "__main__": demo.launch()