ImageEdit

Runtime error

App Files Files Community

sabannna commited on 23 days ago

Commit

cc874ea

verified ·

1 Parent(s): 93e2bb6

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -42

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import os
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 import random
 from PIL import Image
 from typing import Iterable
 from gradio.themes import Soft
@@ -131,6 +135,16 @@ pipe.load_lora_weights("vafipas663/Qwen-Edit-2509-Upscale-LoRA",
 pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
 MAX_SEED = np.iinfo(np.int32).max
 def _round8(x: int) -> int:
@@ -138,7 +152,6 @@ def _round8(x: int) -> int:
     return max(8, (x // 8) * 8)
 def fit_long_side(image: Image.Image, long_side: int):
-    """Keep aspect ratio; set long side; round down to mult of 8."""
     w0, h0 = image.size
     long_side = _round8(long_side)
     if w0 >= h0:
@@ -180,6 +193,7 @@ def upload_image_to_hub(image, dataset_id, folder_prefix="images"):
         print(f"Yükleme hatası ({dataset_id}): {e}")
 # -----------------
 SIZE_PRESETS = [
     "Smart Auto (closest base + scale)",
     "Auto (fit long side to 1024)",
@@ -191,17 +205,17 @@ SIZE_PRESETS = [
 ]
 SCALE_CHOICES = ["Auto", "0.5x", "0.75x", "1.0x", "1.25x", "1.5x"]
-SMART_BASE_LONG_SIDES = [512, 768, 1024, 1280, 1536]  # “元に一番近い base”
-SMART_SCALE_CANDIDATES = [0.5, 0.75, 1.0, 1.25, 1.5]  # “縮尺指定”
-SMART_MAX_LONG_SIDE = 1536  # 自動はここまで（手動はスライダで2048までOK）
 def parse_scale(scale_choice: str):
     if scale_choice == "Auto":
         return None
     return float(scale_choice.replace("x", "").strip())
-def smart_auto_size(image: Image.Image, scale_choice: str):
     if image is None:
         return 1024, 1024, "No image"
@@ -209,55 +223,55 @@ def smart_auto_size(image: Image.Image, scale_choice: str):
     w0, h0 = img.size
     long0 = max(w0, h0)
-    # 1) base: 元画像の長辺に最も近いもの
-    # タイブレークは「小さい方」優先（無駄なアップスケールを避ける）
     base = min(
         SMART_BASE_LONG_SIDES,
         key=lambda b: (abs(b - long0), 0 if b <= long0 else 1, b)
     )
-    # 2) scale: “いい感じ”に近づく縮尺を選ぶ（Auto の場合）
     s_user = parse_scale(scale_choice)
     if s_user is not None:
         cand_long = int(base * s_user)
         cand_long = max(256, min(cand_long, 2048))
         w, h = fit_long_side(img, cand_long)
-        info = f"Smart(base={base}, scale={s_user}x) -> {w}x{h} (orig {w0}x{h0})"
         return w, h, info
-    # Auto scale selection with mild anti-upscale penalty
     best = None
     for s in SMART_SCALE_CANDIDATES:
         cand_long = int(base * s)
         if cand_long < 256:
             continue
-        if cand_long > SMART_MAX_LONG_SIDE:
             continue
         diff = abs(cand_long - long0)
         upscale_penalty = 0
         if cand_long > long0:
-            upscale_penalty = (cand_long - long0) * 2.5  # アップスケールはやや不利に
         cost = diff + upscale_penalty
         if best is None or cost < best[0]:
             best = (cost, s, cand_long)
-    # フォールバック
     if best is None:
-        cand_long = min(max(256, base), SMART_MAX_LONG_SIDE)
         w, h = fit_long_side(img, cand_long)
-        info = f"Smart(base={base}, scale=Fallback) -> {w}x{h} (orig {w0}x{h0})"
         return w, h, info
     _, s_best, long_best = best
     w, h = fit_long_side(img, long_best)
-    info = f"Smart(base={base}, scale={s_best}x Auto) -> {w}x{h} (orig {w0}x{h0})"
     return w, h, info
-def apply_size_controls(preset, image, scale_choice, cur_w, cur_h):
     if preset == "Smart Auto (closest base + scale)":
-        w, h, info = smart_auto_size(image, scale_choice)
         return w, h, info
     if preset == "Auto (fit long side to 1024)":
@@ -275,9 +289,9 @@ def apply_size_controls(preset, image, scale_choice, cur_w, cur_h):
     if preset == "512 x 512 (Fast)":
         return 512, 512, "Fixed 512x512"
-    # Custom
     return _round8(cur_w), _round8(cur_h), f"Custom -> {_round8(cur_w)}x{_round8(cur_h)}"
 def set_adapter(lora_adapter: str):
     if lora_adapter == "Photo-to-Anime":
         pipe.set_adapters(["anime"], adapter_weights=[1.0])
@@ -296,7 +310,12 @@ def set_adapter(lora_adapter: str):
     elif lora_adapter == "Upscale-Image":
         pipe.set_adapters(["upscale-image"], adapter_weights=[1.0])
-@spaces.GPU(duration=60)
 def infer_6pack(
     input_image,
     prompt1,
@@ -305,6 +324,7 @@ def infer_6pack(
     lora_adapter,
     size_preset,
     scale_choice,
     width,
     height,
     seed,
@@ -325,7 +345,7 @@ def infer_6pack(
     prompts = [prompt1, prompt2, prompt3]
-    # Seeds (2 per prompt => 6)
     seeds = []
     if randomize_seed:
         for _ in range(6):
@@ -335,13 +355,21 @@ def infer_6pack(
         for i in range(6):
             seeds.append((base + i) % MAX_SEED)
-    negative_prompt = (
-        "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
-        "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
-    )
     original_image = input_image.convert("RGB")
     outputs = []
     seed_idx = 0
     for p_i, p in enumerate(prompts):
@@ -350,22 +378,32 @@ def infer_6pack(
             seed_idx += 1
             generator = torch.Generator(device=device).manual_seed(int(s))
-            result = pipe(
                 image=original_image,
                 prompt=p,
-                negative_prompt=negative_prompt,
                 height=int(height),
                 width=int(width),
                 num_inference_steps=int(steps),
                 generator=generator,
                 true_cfg_scale=float(guidance_scale),
-            ).images[0]
             upload_image_to_hub(result, OUTPUT_DATASET_ID, folder_prefix="generated")
             caption = f"prompt{p_i+1} var{v+1} | seed={s} | {width}x{height}"
             outputs.append((result, caption))
     seeds_text = "\n".join([f"{i+1}: {s}" for i, s in enumerate(seeds)])
     return outputs, seeds_text
@@ -398,15 +436,22 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                         value="Auto",
                     )
                 with gr.Row():
                     width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
                     height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
                 size_info = gr.Textbox(label="Size Decision Info", lines=2)
                 prompt1 = gr.Text(
                     label="Prompt 1",
-                    value="move camera to below floor, make this girl to another standing pose, dynamic camera anble from below",
                 )
                 prompt2 = gr.Text(
                     label="Prompt 2",
@@ -417,6 +462,26 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                     value="make this girl to another standing pose with hand sign",
                 )
                 run_button = gr.Button("Generate 6 Images (3 prompts x 2 seeds)", variant="primary")
             with gr.Column():
@@ -451,26 +516,31 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                 seeds_box = gr.Textbox(label="Used Seeds (1..6)", lines=6)
-    # サイズUI更新：preset/scale/画像アップロードで追従
-    for evt in (size_preset.change, scale_choice.change, input_image.change):
         evt(
-            fn=apply_size_controls,
-            inputs=[size_preset, input_image, scale_choice, width, height],
             outputs=[width, height, size_info],
         )
     run_button.click(
         fn=infer_6pack,
         inputs=[
             input_image,
             prompt1, prompt2, prompt3,
             lora_adapter,
-            size_preset, scale_choice,
             width, height,
-            seed, randomize_seed, guidance_scale, steps,
-        ],
-        outputs=[output_gallery, seeds_box],
-    )
-if __name__ == "__main__":
-    demo.queue(max_size=30).launch(mcp_server=True, ssr_mode=False, show_error=True)

 import os
+# ★ torch import 前に allocator 設定（ZeroGPU/断片化対策）
+os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:128")
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 import random
+import gc
 from PIL import Image
 from typing import Iterable
 from gradio.themes import Soft
 pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
+# ★ メモリ節約（対応していれば効く）
+try:
+    pipe.enable_vae_slicing()
+except Exception:
+    pass
+try:
+    pipe.enable_attention_slicing("auto")
+except Exception:
+    pass
 MAX_SEED = np.iinfo(np.int32).max
 def _round8(x: int) -> int:
     return max(8, (x // 8) * 8)
 def fit_long_side(image: Image.Image, long_side: int):
     w0, h0 = image.size
     long_side = _round8(long_side)
     if w0 >= h0:
         print(f"Yükleme hatası ({dataset_id}): {e}")
 # -----------------
+# ===== Size logic =====
 SIZE_PRESETS = [
     "Smart Auto (closest base + scale)",
     "Auto (fit long side to 1024)",
 ]
 SCALE_CHOICES = ["Auto", "0.5x", "0.75x", "1.0x", "1.25x", "1.5x"]
+SMART_BASE_LONG_SIDES = [512, 768, 1024, 1280, 1536]
+SMART_SCALE_CANDIDATES = [0.5, 0.75, 1.0, 1.25, 1.5]
+SMART_MAX_CHOICES = [768, 1024, 1280, 1536]
+SMART_MAX_LONG_SIDE_DEFAULT = 1024  # ★安全側デフォルト
 def parse_scale(scale_choice: str):
     if scale_choice == "Auto":
         return None
     return float(scale_choice.replace("x", "").strip())
+def smart_auto_size(image: Image.Image, scale_choice: str, smart_max_long: int):
     if image is None:
         return 1024, 1024, "No image"
     w0, h0 = img.size
     long0 = max(w0, h0)
     base = min(
         SMART_BASE_LONG_SIDES,
         key=lambda b: (abs(b - long0), 0 if b <= long0 else 1, b)
     )
     s_user = parse_scale(scale_choice)
+    smart_max_long = int(smart_max_long)
     if s_user is not None:
         cand_long = int(base * s_user)
         cand_long = max(256, min(cand_long, 2048))
+        cand_long = min(cand_long, smart_max_long)
         w, h = fit_long_side(img, cand_long)
+        info = f"Smart(base={base}, scale={s_user}x, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
         return w, h, info
     best = None
     for s in SMART_SCALE_CANDIDATES:
         cand_long = int(base * s)
         if cand_long < 256:
             continue
+        if cand_long > smart_max_long:
             continue
         diff = abs(cand_long - long0)
         upscale_penalty = 0
         if cand_long > long0:
+            upscale_penalty = (cand_long - long0) * 2.5
         cost = diff + upscale_penalty
         if best is None or cost < best[0]:
             best = (cost, s, cand_long)
     if best is None:
+        cand_long = min(max(256, base), smart_max_long)
         w, h = fit_long_side(img, cand_long)
+        info = f"Smart(base={base}, scale=Fallback, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
         return w, h, info
     _, s_best, long_best = best
     w, h = fit_long_side(img, long_best)
+    info = f"Smart(base={base}, scale={s_best}x Auto, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
     return w, h, info
+def apply_size_controls(preset, image, scale_choice, smart_max_long, cur_w, cur_h):
+    smart_max_long = int(smart_max_long)
     if preset == "Smart Auto (closest base + scale)":
+        w, h, info = smart_auto_size(image, scale_choice, smart_max_long)
         return w, h, info
     if preset == "Auto (fit long side to 1024)":
     if preset == "512 x 512 (Fast)":
         return 512, 512, "Fixed 512x512"
     return _round8(cur_w), _round8(cur_h), f"Custom -> {_round8(cur_w)}x{_round8(cur_h)}"
+# ===== LoRA =====
 def set_adapter(lora_adapter: str):
     if lora_adapter == "Photo-to-Anime":
         pipe.set_adapters(["anime"], adapter_weights=[1.0])
     elif lora_adapter == "Upscale-Image":
         pipe.set_adapters(["upscale-image"], adapter_weights=[1.0])
+# ===== Prompt swap =====
+def swap_prompt_sets(p1, p2, p3, p4, p5, p6):
+    return p4, p5, p6, p1, p2, p3
+# ===== Inference (6 images) =====
+@spaces.GPU(duration=120)
 def infer_6pack(
     input_image,
     prompt1,
     lora_adapter,
     size_preset,
     scale_choice,
+    smart_max_long,
     width,
     height,
     seed,
     prompts = [prompt1, prompt2, prompt3]
+    # seeds: 2 per prompt => 6
     seeds = []
     if randomize_seed:
         for _ in range(6):
         for i in range(6):
             seeds.append((base + i) % MAX_SEED)
+    # true_cfg_scale<=1 のときは negative_prompt 渡さない（警告＆無駄回避）
+    guidance_scale = float(guidance_scale)
+    negative_prompt = None
+    if guidance_scale > 1.0:
+        negative_prompt = (
+            "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
+            "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
+        )
     original_image = input_image.convert("RGB")
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    gc.collect()
     outputs = []
     seed_idx = 0
     for p_i, p in enumerate(prompts):
             seed_idx += 1
             generator = torch.Generator(device=device).manual_seed(int(s))
+            call_kwargs = dict(
                 image=original_image,
                 prompt=p,
                 height=int(height),
                 width=int(width),
                 num_inference_steps=int(steps),
                 generator=generator,
                 true_cfg_scale=float(guidance_scale),
+            )
+            if negative_prompt is not None:
+                call_kwargs["negative_prompt"] = negative_prompt
+            result = pipe(**call_kwargs).images[0]
             upload_image_to_hub(result, OUTPUT_DATASET_ID, folder_prefix="generated")
             caption = f"prompt{p_i+1} var{v+1} | seed={s} | {width}x{height}"
             outputs.append((result, caption))
+            # ★ 連続生成のメモリ圧を下げる
+            del generator
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            gc.collect()
     seeds_text = "\n".join([f"{i+1}: {s}" for i, s in enumerate(seeds)])
     return outputs, seeds_text
                         value="Auto",
                     )
+                smart_max_long = gr.Dropdown(
+                    label="Smart Max Long Side (Safe default 1024)",
+                    choices=[str(x) for x in SMART_MAX_CHOICES],
+                    value=str(SMART_MAX_LONG_SIDE_DEFAULT),
+                )
                 with gr.Row():
                     width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
                     height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
                 size_info = gr.Textbox(label="Size Decision Info", lines=2)
+                # ---- main prompts (1-3) ----
                 prompt1 = gr.Text(
                     label="Prompt 1",
+                    value="move camera to below floor, make this girl to another standing pose, dynamic camera angle from below",
                 )
                 prompt2 = gr.Text(
                     label="Prompt 2",
                     value="make this girl to another standing pose with hand sign",
                 )
+                # ---- swap buttons ----
+                with gr.Row():
+                    swap_left = gr.Button("◀", variant="secondary")
+                    swap_right = gr.Button("▶", variant="secondary")
+                # ---- alt prompts (4-6) ----
+                with gr.Accordion("Alt Prompts (4-6)", open=False):
+                    prompt4 = gr.Text(
+                        label="Prompt 4",
+                        value="camera zoom in to her face, cute face with smiling, aesthetics image film,",
+                    )
+                    prompt5 = gr.Text(
+                        label="Prompt 5",
+                        value="camera zoom out and she split legs, cute posing",
+                    )
+                    prompt6 = gr.Text(
+                        label="Prompt 6",
+                        value="camera move to up, she look at another, and sitting,",
+                    )
                 run_button = gr.Button("Generate 6 Images (3 prompts x 2 seeds)", variant="primary")
             with gr.Column():
                 seeds_box = gr.Textbox(label="Used Seeds (1..6)", lines=6)
+    # サイズUI更新：preset/scale/max/画像アップロードで追従
+    def _size_update(preset, img, scale, mx, w, h):
+        return apply_size_controls(preset, img, scale, mx, w, h)
+    for evt in (size_preset.change, scale_choice.change, smart_max_long.change, input_image.change):
         evt(
+            fn=_size_update,
+            inputs=[size_preset, input_image, scale_choice, smart_max_long, width, height],
             outputs=[width, height, size_info],
         )
+    # 左右ボタン：prompt1-3 <-> prompt4-6 を swap
+    for btn in (swap_left, swap_right):
+        btn.click(
+            fn=swap_prompt_sets,
+            inputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
+            outputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
+        )
     run_button.click(
         fn=infer_6pack,
         inputs=[
             input_image,
             prompt1, prompt2, prompt3,
             lora_adapter,
+            size_preset, scale_choice, smart_max_long,
             width, height,
+            seed, randomize_seed, guidance_scale,