Spaces:
Runtime error
Runtime error
| import math | |
| import random | |
| import cv2 | |
| import gradio as gr | |
| import numpy as np | |
| import spaces | |
| import torch | |
| from PIL import Image | |
| from diffusers import FlowMatchEulerDiscreteScheduler | |
| import mediapipe as mp | |
| from optimization import optimize_pipeline_ | |
| from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline | |
| from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 | |
| from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel | |
| import glob | |
| import os | |
| os.environ['HF_HOME'] = '/tmp' | |
| os.environ['TRANSFORMERS_CACHE'] = '/tmp' | |
| HAIR_IMAGE_DIR = "hair_png" | |
| def list_hair_images(): | |
| files = glob.glob(os.path.join(HAIR_IMAGE_DIR, "*.png")) | |
| return [os.path.basename(f) for f in files] | |
| def load_hair_image(filename): | |
| if filename is None: | |
| return None | |
| path = os.path.join(HAIR_IMAGE_DIR, filename) | |
| return Image.open(path).convert("RGB") | |
| # =============================== | |
| # --- Model Loading --- | |
| # =============================== | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| scheduler_config = { | |
| "base_image_seq_len": 256, | |
| "base_shift": math.log(3), | |
| "invert_sigmas": False, | |
| "max_image_seq_len": 8192, | |
| "max_shift": math.log(3), | |
| "num_train_timesteps": 1000, | |
| "shift": 1.0, | |
| "shift_terminal": None, | |
| "stochastic_sampling": False, | |
| "time_shift_type": "exponential", | |
| "use_beta_sigmas": False, | |
| "use_dynamic_shifting": True, | |
| "use_exponential_sigmas": False, | |
| "use_karras_sigmas": False, | |
| } | |
| scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) | |
| # Pipeline読み込み | |
| pipe = QwenImageEditPlusPipeline.from_pretrained( | |
| "Qwen/Qwen-Image-Edit-2509", | |
| scheduler=scheduler, | |
| torch_dtype=dtype, | |
| ).to(device) | |
| # LoRA適用(Lightning 4steps) | |
| pipe.load_lora_weights( | |
| "lightx2v/Qwen-Image-Lightning", | |
| weight_name="Qwen-Image-Edit-2509/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16.safetensors", | |
| weight_dtype=torch.bfloat16, | |
| ) | |
| pipe.fuse_lora(lora_scale=1.0) | |
| pipe.transformer.__class__ = QwenImageTransformer2DModel | |
| pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) | |
| # =============================== | |
| # --- Constants & Prompts --- | |
| # =============================== | |
| MAX_SEED = np.iinfo(np.int32).max | |
| DEFAULT_SEED = 0 | |
| DEFAULT_RANDOMIZE = True | |
| DEFAULT_TRUE_GUIDANCE_SCALE = 1.0 | |
| DEFAULT_NUM_INFERENCE_STEPS = 4 | |
| # picture1 = Hair / ベース画像, picture2 = Face画像 | |
| FIXED_PROMPT = ( | |
| "Replace the blurred face in picture1 with the face from picture2." | |
| "Use only the visible face from picture2, and ignore all the surrounding white areas." | |
| "Preserve picture1’s hairstyle, head shape, lighting, shadows, and background exactly as they are." | |
| "Place the new face in the correct position and adjust its size, rotation angle, and perspective so that it matches the original head orientation naturally." | |
| "Match the inserted face to picture2's skin tone, lighting direction, and contrast." | |
| "Also adjust the skin tone of the neck and body in picture1 so that it matches the skin tone of the face from picture2 naturally." | |
| "Blend edges smoothly so the result looks like a single realistic person with no visible editing artifacts." | |
| ) | |
| FIXED_NEGATIVE_PROMPT = "blurry, extra lines, color bleeding" | |
| # =============================== | |
| # --- I18N Dictionary --- | |
| # =============================== | |
| I18N = { | |
| "title": { | |
| "en": "Hairstyle Transformer", | |
| "ja": "髪型変換", | |
| "zh": "发型变换", | |
| }, | |
| "notice": { | |
| "en": ( | |
| "Note: When using this software, please comply with applicable laws and ensure that you do not infringe on the rights of others. " | |
| "The software developer assumes no responsibility for how users utilize this software. " | |
| "When posting images online (SNS, etc.), be sure to use source photos of fictional people created with image-generation tools, " | |
| "and never engage in activities such as deepfakes that impersonate or mislead others." | |
| ), | |
| "ja": ( | |
| "注意:本ソフトウェアを利用する際は、関連する法規制を遵守し、他者の権利を侵害しないよう十分ご注意ください。" | |
| "また、ソフトウェア開発者は、ユーザーによる利用方法について一切の責任を負いません。" | |
| "SNS等で公開する際は、画像生成アプリなどで作成した実在しない人物の画像を入力素材としてご使用ください。" | |
| "他者を不当に模倣・誤認させるディープフェイクなどの行為は絶対に行わないでください。" | |
| ), | |
| "zh": ( | |
| "注意:使用本软件时,请遵守相关法律法规,并注意不要侵犯他人的权利。" | |
| "软件开发者对用户的使用方式不承担任何责任。" | |
| "在社交平台(SNS等)公开发布时,请使用通过图像生成工具创建的虚构人物图片作为输入," | |
| "绝不可从事深度伪造等不正当模仿或误导他人的行为。" | |
| ) | |
| }, | |
| "face_input": { | |
| "en": "Face image (picture2)", | |
| "ja": "Face入力画像(picture2)", | |
| "zh": "人脸图像(picture2)", | |
| }, | |
| "hair_input": { | |
| "en": "Hair image (picture1)", | |
| "ja": "Hair 画像(picture1)", | |
| "zh": "头发图像(picture1)", | |
| }, | |
| "accordion": {"en": "Advanced settings", "ja": "詳細設定", "zh": "高级设置"}, | |
| "seed": {"en": "Seed", "ja": "Seed", "zh": "Seed"}, | |
| "rand": {"en": "Randomize seed", "ja": "ランダムシード", "zh": "随机种子"}, | |
| "tgs": {"en": "True guidance scale", "ja": "True guidance scale", "zh": "True guidance scale"}, | |
| "steps": {"en": "Steps", "ja": "生成ステップ数", "zh": "生成步数"}, | |
| "run": {"en": "Generate", "ja": "生成", "zh": "生成"}, | |
| "output": {"en": "Output image", "ja": "出力画像", "zh": "输出图像"}, | |
| "status": {"en": "Status", "ja": "ステータス", "zh": "状态"}, | |
| "status_ok": { | |
| "en": "Generated 1 image (PNG).", | |
| "ja": "1枚生成しました(PNG)。", | |
| "zh": "已生成 1 张图片(PNG)。", | |
| }, | |
| "err_no_img": { | |
| "en": "Error: Please upload both Face and Hair images.", | |
| "ja": "エラー: Face画像とHair画像の両方をアップロードしてください。", | |
| "zh": "错误:请先上传 Face 和 Hair 两张图片。", | |
| }, | |
| "lang_label": {"en": "UI Language", "ja": "UI言語", "zh": "界面语言"}, | |
| } | |
| def t(key, lang): | |
| return I18N[key][lang] | |
| mp_face = mp.solutions.face_mesh | |
| FACE_OVAL = [ | |
| 10,338,297,332,284,251,389,356,454,323,361,288, | |
| 397,365,379,378,400,377,152,148,176,149,150,136, | |
| 172,58,132,93,234,127,162,21,54,103,67,109 | |
| ] | |
| # 眉の代表点(下側) | |
| LEFT_BROW = [105, 66, 107] # 左眉の中央付近 | |
| RIGHT_BROW = [334, 293, 300] # 右眉の中央付近 | |
| # =============================== | |
| # --- Face Preprocess with OpenCV --- | |
| # =============================== | |
| def preprocess_face(image: Image.Image, target_size: int = 1024) -> Image.Image: | |
| """ | |
| MediaPipe による高精度な顔検出で、顔・首が中心になるよう整形して正方形画像を生成。 | |
| """ | |
| face_ratio = 0.6 # 顔の占有率(0.6 = 60%) | |
| img_rgb = np.array(image.convert("RGB")) | |
| h, w, _ = img_rgb.shape | |
| # OpenCV BGR | |
| img_bgr = img_rgb[:, :, ::-1] | |
| # ================================ | |
| # MediaPipe 顔検出 | |
| # ================================ | |
| mp_face = mp.solutions.face_detection | |
| with mp_face.FaceDetection(model_selection=1, min_detection_confidence=0.5) as fd: | |
| results = fd.process(img_rgb) | |
| # ----------------------------------------------------------- | |
| # 顔が見つからない場合:中央を正方形に切って白背景でパディング | |
| # ----------------------------------------------------------- | |
| if not results.detections: | |
| side = min(w, h) | |
| x1 = (w - side) // 2 | |
| y1 = (h - side) // 2 | |
| crop = img_rgb[y1:y1 + side, x1:x1 + side] | |
| pil = Image.fromarray(crop).resize((target_size, target_size), Image.LANCZOS) | |
| return pil | |
| # ================================ | |
| # 最大の顔検出を選択(信頼度 or bbox サイズ) | |
| # ================================ | |
| detections = results.detections | |
| def bbox_area(det): | |
| box = det.location_data.relative_bounding_box | |
| return box.width * box.height | |
| det = max(detections, key=bbox_area) | |
| box = det.location_data.relative_bounding_box | |
| # MediaPipe は0〜1正規化 → 画像座標へ変換 | |
| x = int(box.xmin * w) | |
| y = int(box.ymin * h) | |
| fw = int(box.width * w) | |
| fh = int(box.height * h) | |
| # 顔中心 | |
| cx = x + fw // 2 | |
| cy = y + fh // 2 | |
| # 顔重心を少し上に補正 | |
| cy_adjusted = cy - int(fh * 0.15) | |
| # 顔 + 首の範囲を大きめに取る | |
| head_top = max(0, y - int(fh * 0.3)) | |
| head_bottom = min(h, y + fh + int(fh * 0.4)) | |
| head_left = max(0, x - int(fw * 0.3)) | |
| head_right = min(w, x + fw + int(fw * 0.3)) | |
| head_w = head_right - head_left | |
| head_h = head_bottom - head_top | |
| # 顔サイズに応じてスケール | |
| desired_face_size = int(target_size * face_ratio) | |
| scale = desired_face_size / max(fw, fh) | |
| # 画像全体をスケール | |
| scaled_w = int(w * scale) | |
| scaled_h = int(h * scale) | |
| scaled_img = cv2.resize(img_rgb, (scaled_w, scaled_h), interpolation=cv2.INTER_LANCZOS4) | |
| # スケール後の顔中心 | |
| cx_s = int(cx * scale) | |
| cy_s = int(cy_adjusted * scale) | |
| # キャンバス中央に合わせるオフセット | |
| offset_x = target_size // 2 - cx_s | |
| offset_y = target_size // 2 - cy_s | |
| # 白背景キャンバス | |
| canvas = np.ones((target_size, target_size, 3), dtype=np.uint8) * 255 | |
| # 貼り付け先座標 | |
| x_start = max(0, offset_x) | |
| y_start = max(0, offset_y) | |
| x_end = min(target_size, offset_x + scaled_w) | |
| y_end = min(target_size, offset_y + scaled_h) | |
| # 元画像の切り出し位置 | |
| src_x1 = max(0, -offset_x) | |
| src_y1 = max(0, -offset_y) | |
| src_x2 = src_x1 + (x_end - x_start) | |
| src_y2 = src_y1 + (y_end - y_start) | |
| src_x2 = min(src_x2, scaled_w) | |
| src_y2 = min(src_y2, scaled_h) | |
| # キャンバスに貼り付け | |
| canvas[y_start:y_end, x_start:x_end] = scaled_img[src_y1:src_y2, src_x1:src_x2] | |
| return Image.fromarray(canvas) | |
| # =============================== | |
| # --- Blur Hair Image with face_recognition --- | |
| # =============================== | |
| def blur_face_with_landmarks(image_pil): | |
| if image_pil is None: | |
| return None | |
| img_rgb = np.array(image_pil.convert("RGB")) | |
| img = img_rgb[:, :, ::-1] # RGB→BGR | |
| h, w = img.shape[:2] | |
| rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| with mp_face.FaceMesh( | |
| static_image_mode=True, | |
| max_num_faces=5, | |
| refine_landmarks=True, | |
| ) as face: | |
| res = face.process(rgb) | |
| # ★★ 顔なし → 全体ブラーに変更 ★★ | |
| if not res.multi_face_landmarks: | |
| blurred = cv2.GaussianBlur(img, (301, 301), 0) | |
| return Image.fromarray(blurred[:, :, ::-1]) # BGR→RGB | |
| output = img.copy() | |
| for lm in res.multi_face_landmarks: | |
| L = lm.landmark | |
| oval = [(int(L[i].x * w), int(L[i].y * h)) for i in FACE_OVAL] | |
| left_brow = [(int(L[i].x*w), int(L[i].y*h)) for i in LEFT_BROW] | |
| right_brow = [(int(L[i].x*w), int(L[i].y*h)) for i in RIGHT_BROW] | |
| brow_center = np.mean(left_brow + right_brow, axis=0) | |
| chin_center = np.mean(oval[:8], axis=0) | |
| face_h = np.linalg.norm(brow_center - chin_center) | |
| offset = int(face_h * 0.12) | |
| forehead = [(x, y - offset) for (x, y) in left_brow + right_brow] | |
| mask = np.zeros((h, w), dtype=np.uint8) | |
| cv2.fillPoly(mask, [np.array(oval, np.int32)], 255) | |
| flood = mask.copy() | |
| cv2.floodFill(flood, None, seedPoint=(w//2, h//2), newVal=255) | |
| cv2.fillPoly(flood, [np.array(forehead, np.int32)], 255) | |
| final_mask = flood | |
| blurred = cv2.GaussianBlur(output, (301, 301), 0) | |
| mask3 = cv2.merge([final_mask, final_mask, final_mask]) | |
| output = np.where(mask3 == 255, blurred, output) | |
| return Image.fromarray(output[:, :, ::-1]) | |
| def whiteout_except_face(image_pil): | |
| if image_pil is None: | |
| return None | |
| img_rgb = np.array(image_pil.convert("RGB")) | |
| img = img_rgb[:, :, ::-1] # RGB→BGR | |
| h, w = img.shape[:2] | |
| rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
| with mp_face.FaceMesh( | |
| static_image_mode=True, | |
| max_num_faces=5, | |
| refine_landmarks=True, | |
| ) as face: | |
| res = face.process(rgb) | |
| # ★★ 顔なし → 全部白塗りに変更 ★★ | |
| if not res.multi_face_landmarks: | |
| white_img = np.full_like(img, 255) # BGRで白 | |
| return Image.fromarray(white_img[:, :, ::-1]) # RGBに戻す | |
| white_bg = np.full_like(img, 255) | |
| mask_total = np.zeros((h, w), dtype=np.uint8) | |
| for lm in res.multi_face_landmarks: | |
| L = lm.landmark | |
| oval = [(int(L[i].x * w), int(L[i].y * h)) for i in FACE_OVAL] | |
| left_brow = [(int(L[i].x * w), int(L[i].y * h)) for i in LEFT_BROW] | |
| right_brow = [(int(L[i].x * w), int(L[i].y * h)) for i in RIGHT_BROW] | |
| brow_y = int(np.mean([p[1] for p in left_brow + right_brow])) | |
| chin_y = int(np.mean([oval[i][1] for i in range(8)])) | |
| face_h = abs(chin_y - brow_y) | |
| margin = int(face_h * 0.50) | |
| mask = np.zeros((h, w), dtype=np.uint8) | |
| cv2.fillPoly(mask, [np.array(oval, np.int32)], 255) | |
| filled = mask.copy() | |
| cv2.floodFill(filled, None, seedPoint=(w//2, h//2), newVal=255) | |
| cut_y = max(brow_y - margin, 0) | |
| filled[:cut_y, :] = 0 | |
| mask_total = cv2.bitwise_or(mask_total, filled) | |
| soft_mask = cv2.GaussianBlur(mask_total, (0, 0), 25) | |
| soft_mask_f = soft_mask.astype(np.float32) / 255.0 | |
| soft_mask_f = cv2.merge([soft_mask_f]*3) | |
| output = img * soft_mask_f + white_bg * (1.0 - soft_mask_f) | |
| output = output.astype(np.uint8) | |
| return Image.fromarray(output[:, :, ::-1]) | |
| # =============================== | |
| # --- Unified Inference Function --- | |
| # =============================== | |
| def infer( | |
| face_image, | |
| hair_image, | |
| seed=DEFAULT_SEED, | |
| randomize_seed=DEFAULT_RANDOMIZE, | |
| true_guidance_scale=DEFAULT_TRUE_GUIDANCE_SCALE, | |
| num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS, | |
| lang="en", | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| if face_image is None or hair_image is None: | |
| return None, t("err_no_img", lang) | |
| # --- PIL 化 --- | |
| if isinstance(face_image, Image.Image): | |
| face_pil = face_image.convert("RGB") | |
| else: | |
| face_pil = Image.open(face_image).convert("RGB") | |
| # hair_image はドロップダウンの値(ファイル名 str) | |
| hair_pil = load_hair_image(hair_image) | |
| if hair_pil is None: | |
| return None, t("err_no_img", lang) | |
| hair_pil = hair_pil.convert("RGB") | |
| # --- 顔中心になるようトリミング&リサイズ (1024x1024) --- | |
| face_pil = preprocess_face(face_pil, target_size=1024) | |
| #hair_pil = preprocess_face(hair_pil, target_size=1024) | |
| # --- Hair 画像にのみ顔部分ブラーを適用 --- | |
| hair_pil = blur_face_with_landmarks(hair_pil) | |
| # --- face 画像にのみ顔以外白塗り --- | |
| face_pil = whiteout_except_face(face_pil) | |
| # picture1 = Hair(ブラー済みベース)、picture2 = Face として渡す | |
| pil_images = [hair_pil, face_pil] | |
| progress(0.4, desc="Generating..." if lang == "en" else ("生成中..." if lang == "ja" else "生成中...")) | |
| generator = torch.Generator(device=device).manual_seed(seed) | |
| result = pipe( | |
| image=pil_images, | |
| prompt=FIXED_PROMPT, | |
| negative_prompt=FIXED_NEGATIVE_PROMPT, | |
| num_inference_steps=num_inference_steps, | |
| generator=generator, | |
| true_cfg_scale=true_guidance_scale, | |
| num_images_per_prompt=1, | |
| ).images | |
| progress(1.0, desc="Done" if lang == "en" else ("完了" if lang == "ja" else "完成")) | |
| return result[0], t("status_ok", lang) | |
| #return result[0], t("status_ok", lang), hair_pil, face_pil | |
| # =============================== | |
| # --- Gradio UI Section --- | |
| # =============================== | |
| css = """ | |
| #app-wrap {margin: 0 auto; max-width: 1200px;} | |
| .notice { | |
| background: #fff8e1; | |
| border: 1px solid #facc15; | |
| color: #713f12; | |
| padding: 12px 14px; | |
| border-radius: 12px; | |
| font-weight: 600; | |
| margin-bottom: 10px; | |
| } | |
| .card { | |
| background: white; | |
| border: 1px solid #e5e7eb; | |
| border-radius: 14px; | |
| padding: 14px; | |
| box-shadow: 0 1px 2px rgba(0,0,0,0.04); | |
| } | |
| """ | |
| with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: | |
| lang_selector = gr.Radio( | |
| label=I18N["lang_label"]["en"], | |
| choices=[("English", "en"), ("日本語", "ja"), ("中文", "zh")], | |
| value="en", | |
| interactive=True, | |
| ) | |
| title_md = gr.Markdown(I18N["title"]["en"]) | |
| notice_html = gr.HTML(f"<div class='notice'>{I18N['notice']['en']}</div>") | |
| with gr.Column(elem_id="app-wrap"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| hair_dropdown = gr.Dropdown( | |
| label=I18N["hair_input"]["en"], | |
| choices=list_hair_images(), | |
| value=None, | |
| ) | |
| hair_preview = gr.Image( | |
| label="Hair Preview", | |
| type="pil", | |
| height=320, | |
| interactive=False, | |
| ) | |
| face_image = gr.Image( | |
| label=I18N["face_input"]["en"], | |
| type="pil", | |
| height=320, | |
| ) | |
| with gr.Column(scale=1, elem_classes=["card"]): | |
| with gr.Accordion(I18N["accordion"]["en"], open=False): | |
| seed = gr.Slider( | |
| label=I18N["seed"]["en"], | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=DEFAULT_SEED, | |
| ) | |
| randomize_seed = gr.Checkbox( | |
| label=I18N["rand"]["en"], | |
| value=DEFAULT_RANDOMIZE, | |
| ) | |
| true_guidance_scale = gr.Slider( | |
| label=I18N["tgs"]["en"], | |
| minimum=1.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=DEFAULT_TRUE_GUIDANCE_SCALE, | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label=I18N["steps"]["en"], | |
| minimum=1, | |
| maximum=40, | |
| step=1, | |
| value=DEFAULT_NUM_INFERENCE_STEPS, | |
| ) | |
| run_button = gr.Button(I18N["run"]["en"], variant="primary") | |
| result_image = gr.Image( | |
| label=I18N["output"]["en"], | |
| type="pil", | |
| format="png", | |
| height=520, | |
| show_download_button=True, | |
| ) | |
| status_text = gr.Textbox(label=I18N["status"]["en"], interactive=False) | |
| def _switch_lang(lang): | |
| return ( | |
| gr.update(label=I18N["lang_label"][lang]), # lang_selector label | |
| I18N["title"][lang], # title_md markdown text | |
| gr.update(value=f"<div class='notice'>{I18N['notice'][lang]}</div>"), # notice_html | |
| gr.update(label=I18N["hair_input"][lang]), # hair_image label | |
| gr.update(label=I18N["face_input"][lang]), # face_image label | |
| gr.update(label=I18N["seed"][lang]), | |
| gr.update(label=I18N["rand"][lang]), | |
| gr.update(label=I18N["tgs"][lang]), | |
| gr.update(label=I18N["steps"][lang]), | |
| gr.update(value=I18N["run"][lang]), | |
| gr.update(label=I18N["output"][lang]), | |
| gr.update(label=I18N["status"][lang]), | |
| ) | |
| def update_hair_preview(selected): | |
| if selected is None: | |
| return None | |
| return load_hair_image(selected) | |
| hair_dropdown.change( | |
| fn=update_hair_preview, | |
| inputs=[hair_dropdown], | |
| outputs=[hair_preview], | |
| ) | |
| lang_selector.change( | |
| fn=_switch_lang, | |
| inputs=[lang_selector], | |
| outputs=[ | |
| lang_selector, | |
| title_md, | |
| notice_html, | |
| hair_preview, | |
| face_image, | |
| seed, | |
| randomize_seed, | |
| true_guidance_scale, | |
| num_inference_steps, | |
| run_button, | |
| result_image, | |
| status_text, | |
| ], | |
| ) | |
| run_button.click( | |
| fn=infer, | |
| inputs=[ | |
| face_image, | |
| hair_dropdown, | |
| seed, | |
| randomize_seed, | |
| true_guidance_scale, | |
| num_inference_steps, | |
| lang_selector, | |
| ], | |
| outputs=[result_image, status_text], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |