Spaces:

FireRedTeam
/

FireRed-Image-Edit-1.1

Running on Zero

App Files Files Community

qiaochanghao commited on Mar 3

Commit

484e146

1 Parent(s): 3c8d247

update to firered1.1

Browse files

Files changed (15) hide show

app.py +258 -122
examples/makeup1.png +3 -0
examples/makeup2.png +3 -0
examples/master1.png +3 -0
examples/master2.png +3 -0
examples/master3_1.png +3 -0
examples/master3_2.png +3 -0
examples/master4_1.png +3 -0
examples/master4_2.png +3 -0
examples/text1_1.png +3 -0
examples/text1_2.png +3 -0
examples/text2_1.png +3 -0
examples/text2_2.png +3 -0
prompt_augment.py +1 -1
requirements.txt +6 -1

app.py CHANGED Viewed

@@ -3,86 +3,188 @@ import numpy as np
 import random
 import torch
 import spaces
 from PIL import Image
 from diffusers import QwenImageEditPlusPipeline
-import os
-import base64
-import json
 from huggingface_hub import login
 from prompt_augment import PromptAugment
 login(token=os.environ.get('hf'))
-# --- Model Loading ---
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load the model pipeline
-pipe = QwenImageEditPlusPipeline.from_pretrained("FireRedTeam/FireRed-Image-Edit-1.0", torch_dtype=dtype).to(device)
 prompt_handler = PromptAugment()
-# --- UI Constants and Helpers ---
 MAX_SEED = np.iinfo(np.int32).max
-# --- Main Inference Function (with hardcoded negative prompt) ---
 @spaces.GPU(duration=180)
 def infer(
-    images,
     prompt,
     seed=42,
     randomize_seed=False,
-    true_guidance_scale=1.0,
-    num_inference_steps=50,
     height=None,
     width=None,
-    rewrite_prompt=True,
     num_images_per_prompt=1,
     progress=gr.Progress(track_tqdm=True),
 ):
-    """
-    Generates an image using the local Qwen-Image diffusers pipeline.
-    """
-    # Hardcode the negative prompt as requested
     negative_prompt = " "
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    # Set up the generator for reproducibility
     generator = torch.Generator(device=device).manual_seed(seed)
-    # Load input images into PIL Images
     pil_images = []
-    if images is not None:
-        for item in images:
             try:
-                if isinstance(item[0], Image.Image):
-                    pil_images.append(item[0].convert("RGB"))
-                elif isinstance(item[0], str):
-                    pil_images.append(Image.open(item[0]).convert("RGB"))
-                elif hasattr(item, "name"):
-                    pil_images.append(Image.open(item.name).convert("RGB"))
-            except Exception:
                 continue
-    if height==256 and width==256:
-        height, width = None, None
-    print(f"Calling pipeline with prompt: '{prompt}'")
-    print(f"Negative Prompt: '{negative_prompt}'")
-    print(f"Seed: {seed}, Steps: {num_inference_steps}, Guidance: {true_guidance_scale}, Size: {width}x{height}")
     if rewrite_prompt and len(pil_images) > 0:
-        # prompt = polish_prompt(prompt, pil_images[0])
         prompt = prompt_handler.predict(prompt, [pil_images[0]])
         print(f"Rewritten Prompt: {prompt}")
-    # Generate the image
-    image = pipe(
         image=pil_images if len(pil_images) > 0 else None,
         prompt=prompt,
         height=height,
@@ -90,116 +192,150 @@ def infer(
         negative_prompt=negative_prompt,
         num_inference_steps=num_inference_steps,
         generator=generator,
         true_cfg_scale=true_guidance_scale,
         num_images_per_prompt=num_images_per_prompt,
     ).images
-    return image, seed
-# --- Examples and UI Layout ---
-examples = []
 css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 1024px;
-}
-#edit_text{margin-top: -62px !important}
 """
 def get_image_base64(image_path):
     with open(image_path, "rb") as img_file:
         return base64.b64encode(img_file.read()).decode('utf-8')
-logo_base64 = get_image_base64("logo.png")
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.HTML(f'<img src="data:image/png;base64,{logo_base64}" alt="Firered Logo" width="400" style="display: block; margin: 0 auto;">')
-        gr.Markdown("[Learn more](https://github.com/FireRedTeam/FireRed-Image-Edit) about the FireRed-Image-Edit series.")
         with gr.Row():
-            with gr.Column():
-                input_images = gr.Gallery(label="Input Images", show_label=False, type="pil", interactive=True)
-            # result = gr.Image(label="Result", show_label=False, type="pil")
-            result = gr.Gallery(label="Result", show_label=False, type="pil")
-        with gr.Row():
-            prompt = gr.Text(
-                    label="Prompt",
-                    show_label=False,
-                    placeholder="describe the edit instruction",
-                    container=False,
-            )
-            run_button = gr.Button("Edit!", variant="primary")
         with gr.Accordion("Advanced Settings", open=False):
-            # Negative prompt UI element is removed here
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
-                true_guidance_scale = gr.Slider(
-                    label="True guidance scale",
-                    minimum=1.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=4.0
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=40,
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=2048,
-                    step=8,
-                    value=None,
-                )
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=2048,
-                    step=8,
-                    value=None,
-                )
-                rewrite_prompt = gr.Checkbox(label="Rewrite prompt", value=True)
-        # gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=False)
     gr.on(
         triggers=[run_button.click, prompt.submit],
         fn=infer,
         inputs=[
             input_images,
-            prompt,
-            seed,
-            randomize_seed,
-            true_guidance_scale,
-            num_inference_steps,
-            height,
-            width,
-            rewrite_prompt,
         ],
         outputs=[result, seed],
     )
 if __name__ == "__main__":
-    # demo.launch()
-    demo.launch(allowed_paths=["./"])

 import random
 import torch
 import spaces
+import os
+import base64
+import math
 from PIL import Image
 from diffusers import QwenImageEditPlusPipeline
+from pillow_heif import register_heif_opener
 from huggingface_hub import login
 from prompt_augment import PromptAugment
 login(token=os.environ.get('hf'))
+register_heif_opener()
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+pipe = QwenImageEditPlusPipeline.from_pretrained(
+    "FireRedTeam/FireRed-Image-Edit-1.1",
+    torch_dtype=dtype
+).to(device)
 prompt_handler = PromptAugment()
+ADAPTER_SPECS = {
+    "Covercraft": {
+        "repo": "FireRedTeam/FireRed-Image-Edit-LoRA-Zoo",
+        "weights": "FireRed-Image-Edit-Covercraft.safetensors",
+        "adapter_name": "covercraft",
+    },
+    "Lightning": {
+        "repo": "FireRedTeam/FireRed-Image-Edit-LoRA-Zoo",
+        "weights": "FireRed-Image-Edit-Lightning-8steps-v1.0.safetensors",
+        "adapter_name": "lightning",
+    },
+    "Makeup": {
+        "repo": "FireRedTeam/FireRed-Image-Edit-LoRA-Zoo",
+        "weights": "FireRed-Image-Edit-Makeup.safetensors",
+        "adapter_name": "makeup",
+    }
+}
+LOADED_ADAPTERS = set()
+LORA_OPTIONS = ["None"] + list(ADAPTER_SPECS.keys())
+def load_lora(lora_name):
+    """加载并激活指定的 LoRA"""
+    if lora_name == "None" or not lora_name:
+        if LOADED_ADAPTERS:
+            pipe.set_adapters([], adapter_weights=[])
+        return
+    spec = ADAPTER_SPECS.get(lora_name)
+    if not spec:
+        raise gr.Error(f"LoRA 配置未找到: {lora_name}")
+    adapter_name = spec["adapter_name"]
+    if adapter_name not in LOADED_ADAPTERS:
+        print(f"--- Downloading and Loading Adapter: {lora_name} ---")
+        try:
+            pipe.load_lora_weights(
+                spec["repo"],
+                weight_name=spec["weights"],
+                adapter_name=adapter_name
+            )
+            LOADED_ADAPTERS.add(adapter_name)
+        except Exception as e:
+            raise gr.Error(f"Failed to load adapter {lora_name}: {e}")
+    else:
+        print(f"--- Adapter {lora_name} is already loaded ---")
+    pipe.set_adapters([adapter_name], adapter_weights=[1.0])
 MAX_SEED = np.iinfo(np.int32).max
+MAX_INPUT_IMAGES = 3
+def limit_images(images):
+    if images is None:
+        return None
+    if len(images) > MAX_INPUT_IMAGES:
+        gr.Info(f"最多支持 {MAX_INPUT_IMAGES} 张图片，已自动移除多余图片")
+        return images[:MAX_INPUT_IMAGES]
+    return images
+def calculate_dimensions(target_area, ratio):
+    width = math.sqrt(target_area * ratio)
+    height = width / ratio
+    width = round(width / 32) * 32
+    height = round(height / 32) * 32
+    return int(width), int(height)
+def update_dimensions_on_upload(images, max_area=1024*1024):
+    if images is None or len(images) == 0:
+        return 0, 0
+    try:
+        first_item = images[0]
+        if isinstance(first_item, tuple):
+            img = first_item[0]
+        else:
+            img = first_item
+        if isinstance(img, Image.Image):
+            pil_img = img
+        elif isinstance(img, str):
+            pil_img = Image.open(img)
+        else:
+            return 0, 0
+        h, w = pil_img.height, pil_img.width
+        is_multi_image = len(images) > 1
+        if not is_multi_image:
+            return 0, 0
+        ratio = w / h
+        new_w, new_h = calculate_dimensions(max_area, ratio)
+        return new_h, new_w
+    except Exception as e:
+        print(f"获取图片尺寸失败: {e}")
+        return 0, 0
 @spaces.GPU(duration=180)
 def infer(
+    input_images,
     prompt,
+    lora_choice,
     seed=42,
     randomize_seed=False,
+    true_guidance_scale=4.0,
+    num_inference_steps=40,
     height=None,
     width=None,
+    rewrite_prompt=False,
     num_images_per_prompt=1,
     progress=gr.Progress(track_tqdm=True),
 ):
     negative_prompt = " "
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed)
+    load_lora(lora_choice)
     pil_images = []
+    if input_images is not None:
+        for item in input_images[:MAX_INPUT_IMAGES]:
             try:
+                if isinstance(item, tuple):
+                    img = item[0]
+                else:
+                    img = item
+                if isinstance(img, Image.Image):
+                    pil_images.append(img.convert("RGB"))
+                elif isinstance(img, str):
+                    pil_images.append(Image.open(img).convert("RGB"))
+            except Exception as e:
+                print(f"处理图片出错: {e}")
                 continue
+    if height == 0:
+        height = None
+    if width == 0:
+        width = None
     if rewrite_prompt and len(pil_images) > 0:
         prompt = prompt_handler.predict(prompt, [pil_images[0]])
         print(f"Rewritten Prompt: {prompt}")
+    if pil_images:
+        for i, img in enumerate(pil_images):
+            print(f"    [{i}] size: {img.width}x{img.height}")
+    images = pipe(
         image=pil_images if len(pil_images) > 0 else None,
         prompt=prompt,
         height=height,
         negative_prompt=negative_prompt,
         num_inference_steps=num_inference_steps,
         generator=generator,
+        guidance_scale=1.0,
         true_cfg_scale=true_guidance_scale,
         num_images_per_prompt=num_images_per_prompt,
     ).images
+    return images, seed
 css = """
+#col-container { margin: 0 auto; max-width: 1200px; }
+#edit-btn { height: 100% !important; min-height: 42px; }
 """
 def get_image_base64(image_path):
     with open(image_path, "rb") as img_file:
         return base64.b64encode(img_file.read()).decode('utf-8')
+logo_base64 = get_image_base64("logo.png") if os.path.exists("logo.png") else None
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        if logo_base64:
+            gr.HTML(f'<img src="data:image/png;base64,{logo_base64}" alt="FireRed Logo" width="400" style="display: block; margin: 0 auto;">')
+        else:
+            gr.Markdown("# FireRed Image Edit")
+        gr.Markdown(f"[Learn more](https://github.com/FireRedTeam/FireRed-Image-Edit) about the FireRed-Image-Edit series. Supports multi-image input (up to {MAX_INPUT_IMAGES} images.)")
         with gr.Row():
+            with gr.Column(scale=1):
+                input_images = gr.Gallery(
+                    label="Upload Images",
+                    type="pil",
+                    interactive=True,
+                    height=300,
+                    columns=3,
+                    object_fit="contain",
+                )
+            with gr.Column(scale=1):
+                result = gr.Gallery(
+                    label="Output Images",
+                    type="pil",
+                    height=300,
+                    columns=2,
+                    object_fit="contain",
+                )
+        prompt = gr.Textbox(
+            label="Edit Prompt",
+            placeholder="e.g., transform into anime..",
+        )
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=5):
+                lora_choice = gr.Dropdown(
+                    label="Choose Lora",
+                    choices=LORA_OPTIONS,
+                    value=LORA_OPTIONS[0] if LORA_OPTIONS else "None",
+                )
+            with gr.Column(scale=4):
+                run_button = gr.Button("Edit Image", variant="primary", elem_id="edit-btn")
         with gr.Accordion("Advanced Settings", open=False):
             with gr.Row():
+                seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
+                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+            with gr.Row():
+                true_guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0)
+                num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=40)
+            with gr.Row():
+                height = gr.Slider(label="Height (0=auto)", minimum=0, maximum=2048, step=8, value=0)
+                width = gr.Slider(label="Width (0=auto)", minimum=0, maximum=2048, step=8, value=0)
+            with gr.Row():
+                rewrite_prompt = gr.Checkbox(label="Rewrite Prompt", value=False)
+                num_images_per_prompt = gr.Slider(label="Num Images", minimum=1, maximum=4, step=1, value=1)
+        # Examples
+        gr.Examples(
+            examples=[
+                [["examples/master1.png"], "将背景换为带自然光效的浅蓝色，身穿浅米色蕾丝领上衣，将发型改为右侧佩戴精致珍珠发夹，同时单手向前抬起握着一把宝剑，另一只手自然摆放。面部微笑。",  "None"],
+                [["examples/master2.png"], "替换背景为盛开的樱花树场景；更换衣服为黑色西装，为人物添加单肩蓝色书包，单手抓住包带。头发变为高马尾。色调明亮。蹲下。",  "None"],
+                [["examples/master3_1.png", "examples/master3_2.png"], "把图1中的模特换成图2里的长裙和高帮帆布鞋，保持原有姿态和配饰，整体风格统一。",  "None"],
+                [["examples/master4_1.png", "examples/master4_2.png"], "把图1中的白色衬衫和棕色半裙，换成图2里的灰褐色连帽卫衣、黑色侧边条纹裤、卡其色工装靴和同色云朵包，保持模特姿态和背景不变。",  "None"],
+                [["examples/makeup1.png"], "为人物添加纯欲厌世妆：使用冷白皮哑光粉底均匀肤色，描绘细挑的灰黑色野生眉，眼部晕染浅灰调眼影并加深眼尾，画出上扬的黑色眼线，粘贴浓密卷翘的假睫毛，在眼头和卧蚕处提亮，涂抹深紫调哑光口红并勾勒唇形，在颧骨处扫上浅粉腮红，鼻梁和眉骨处打高光，下颌线处轻扫阴影。", "Makeup"],
+                [["examples/makeup2.png"], "为人物添加妆容：使用象牙白哑光粉底均匀肤色，描绘细长柳叶眉并填充浅棕色，眼部晕染浅棕色眼影并加深眼尾，画出自然黑色眼线，粘贴浓密假睫毛，用浅棕色眼影提亮卧蚕；涂抹豆沙色哑光口红并勾勒唇形，在两颊扫上浅粉色腮红，在鼻梁和颧骨处轻扫高光，在面部轮廓处轻扫阴影。", "Makeup"],
+                [["examples/text1_1.png", "examples/text1_2.png"], "请在图1添加主标题文本 “谁说我们丑了”，字体样式参考图2中主标题《人！给我开个罐罐》；主标题整体采用横向排版多行错落（非严格对齐），置于图片左下角；在狗狗右下方、贴近前爪附近添加一个手绘“爱心”涂鸦贴纸；增加鱼眼镜头效果", "Covercraft"],
+                [["examples/text2_1.png", "examples/text2_2.png"], "请在图1添加主标题文本 “崽子第一次玩冰”，副标题“坐标：东南休闲公园”，主标题和副标题的字体样式参考图2中主标题“无露营不冬天”，主标题整体采用横向排版多行，主标题添加在画面左侧上方；副标题添加在画面左侧下方，字的层级更小，避免修改和遮挡图1主体关键信息（人物/核心景物）和画面中心。", "Covercraft"],
+            ],
+            inputs=[input_images, prompt, lora_choice],
+            outputs=[result, seed],
+            fn=infer,
+            cache_examples=False,
+            label="Examples"
+        )
+    # 监听 LoRA 选择变化：Lightning 时锁定参数
+    def on_lora_change(lora_name):
+        if lora_name == "Lightning":
+            return (
+                gr.update(value=8, interactive=False),      # num_inference_steps
+                gr.update(value=1.0, interactive=False),    # true_guidance_scale
+                gr.update(value=0, interactive=True),      # seed
+                gr.update(value=False, interactive=False),  # randomize_seed
+            )
+        else:
+            return (
+                gr.update(value=40, interactive=True),      # num_inference_steps
+                gr.update(value=4.0, interactive=True),     # true_guidance_scale
+                gr.update(value=42, interactive=True),      # seed
+                gr.update(value=True, interactive=True),    # randomize_seed
+            )
+    lora_choice.change(
+        fn=on_lora_change,
+        inputs=[lora_choice],
+        outputs=[num_inference_steps, true_guidance_scale, seed, randomize_seed],
+    )
+    def on_image_upload(images):
+        limited = limit_images(images)
+        h, w = update_dimensions_on_upload(limited)
+        return limited, h, w
+    input_images.upload(
+        fn=on_image_upload,
+        inputs=[input_images],
+        outputs=[input_images, height, width],
+    )
     gr.on(
         triggers=[run_button.click, prompt.submit],
         fn=infer,
         inputs=[
             input_images,
+            prompt, lora_choice, seed, randomize_seed,
+            true_guidance_scale, num_inference_steps,
+            height, width, rewrite_prompt, num_images_per_prompt,
         ],
         outputs=[result, seed],
     )
 if __name__ == "__main__":
+    demo.queue()
+    demo.launch(allowed_paths=["./"])

examples/makeup1.png ADDED Viewed

Git LFS Details

SHA256: 7543c29b92ac5610e00ff6460102dcf93db529c08dc144b9b708a09f1b54643f
Pointer size: 132 Bytes
Size of remote file: 1.48 MB

examples/makeup2.png ADDED Viewed

Git LFS Details

SHA256: 602cb41eaca2cbc713c024a5da200a19ae621739f8c56db6fcf644e2b20ca874
Pointer size: 132 Bytes
Size of remote file: 1.99 MB

examples/master1.png ADDED Viewed

Git LFS Details

SHA256: f752b4c2fd2ed6cc98b28bba4db45e96ea3aa06bd815db9f315399cd31f3eace
Pointer size: 132 Bytes
Size of remote file: 1.72 MB

examples/master2.png ADDED Viewed

Git LFS Details

SHA256: 9feaf2982c23d7e9d8e5149d9fead8f8662d6c0259a53f22d49312944b8b3756
Pointer size: 132 Bytes
Size of remote file: 1.63 MB

examples/master3_1.png ADDED Viewed

Git LFS Details

SHA256: 1ab2a2afabb68f26285945d17873c8179efc53f1939081178ddea4cd621809a5
Pointer size: 131 Bytes
Size of remote file: 436 kB

examples/master3_2.png ADDED Viewed

Git LFS Details

SHA256: e5d2da1de2ffd0664114e86f8bcc8c01bcfbd7297558ab10ed4013c05878a2e7
Pointer size: 132 Bytes
Size of remote file: 1.07 MB

examples/master4_1.png ADDED Viewed

Git LFS Details

SHA256: c937a30ed5576d3e792ee895e98c1383a4be4f93f8ae8a8a1a93c7cd572065f5
Pointer size: 131 Bytes
Size of remote file: 856 kB

examples/master4_2.png ADDED Viewed

Git LFS Details

SHA256: ecb55fca7a1c03d1b56fcdab1bbbf6910c3560fd761cee332d621f6215076f85
Pointer size: 132 Bytes
Size of remote file: 7.95 MB

examples/text1_1.png ADDED Viewed

Git LFS Details

SHA256: 3fda76f4b209ad572028c5f01e6d98a54a3bad1ae55c7ea6bee1c8888e838401
Pointer size: 132 Bytes
Size of remote file: 1.61 MB

examples/text1_2.png ADDED Viewed

Git LFS Details

SHA256: 75eba063913e669e20df06ce2268f7f7d134755dcc3c7751d38d082aa819c7f6
Pointer size: 132 Bytes
Size of remote file: 1.5 MB

examples/text2_1.png ADDED Viewed

Git LFS Details

SHA256: 4680b1badb49da68e97f89138940c0d7e25000dd59ac09595e00e6c95b18f60c
Pointer size: 132 Bytes
Size of remote file: 2.14 MB

examples/text2_2.png ADDED Viewed

Git LFS Details

SHA256: fc471e4eac7646f179bc017565124f75dd495ed2c51886aa1531a087f6f40dd5
Pointer size: 132 Bytes
Size of remote file: 1.07 MB

prompt_augment.py CHANGED Viewed

@@ -180,7 +180,7 @@ Please strictly follow the rewriting rules below:
     def predict(self, original_prompt, img_list=[]):
         api_key = os.environ.get('DASH_API_KEY')
-        model="qwen3-vl-235b-a22b-thinking"
         language = contains_chinese(original_prompt)
         original_prompt = original_prompt.strip()
         if language == 'zh':

     def predict(self, original_prompt, img_list=[]):
         api_key = os.environ.get('DASH_API_KEY')
+        model="qwen3-vl-235b-a22b-instruct"
         language = contains_chinese(original_prompt)
         original_prompt = original_prompt.strip()
         if language == 'zh':

requirements.txt CHANGED Viewed

@@ -5,4 +5,9 @@ safetensors
 sentencepiece
 dashscope
 kernels
-torchvision

 sentencepiece
 dashscope
 kernels
+torchvision
+invisible_watermark
+torch
+xformers
+pillow_heif
+peft