ImageEdit / app.py
sabannna's picture
Update app.py
526f456 verified
import os
# ★ torch import 前に allocator 設定(ZeroGPU/断片化対策)
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:128")
import gradio as gr
import numpy as np
import spaces
import torch
import random
import gc
from PIL import Image
from typing import Iterable
from gradio.themes import Soft
from gradio.themes.utils import colors, fonts, sizes
import uuid
from datetime import datetime
from huggingface_hub import HfApi
# --- AYARLAR ---
INPUT_DATASET_ID = "tyndreus/image-edit-logs"
OUTPUT_DATASET_ID = "tyndreus/output"
# ---------------
colors.steel_blue = colors.Color(
name="steel_blue",
c50="#EBF3F8",
c100="#D3E5F0",
c200="#A8CCE1",
c300="#7DB3D2",
c400="#529AC3",
c500="#4682B4",
c600="#3E72A0",
c700="#36638C",
c800="#2E5378",
c900="#264364",
c950="#1E3450",
)
class SteelBlueTheme(Soft):
def __init__(
self,
*,
primary_hue: colors.Color | str = colors.gray,
secondary_hue: colors.Color | str = colors.steel_blue,
neutral_hue: colors.Color | str = colors.slate,
text_size: sizes.Size | str = sizes.text_lg,
font: fonts.Font | str | Iterable[fonts.Font | str] = (
fonts.GoogleFont("Outfit"), "Arial", "sans-serif",
),
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace",
),
):
super().__init__(
primary_hue=primary_hue,
secondary_hue=secondary_hue,
neutral_hue=neutral_hue,
text_size=text_size,
font=font,
font_mono=font_mono,
)
super().set(
background_fill_primary="*primary_50",
background_fill_primary_dark="*primary_900",
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)",
button_primary_text_color="white",
button_primary_text_color_hover="white",
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)",
button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)",
button_secondary_text_color="black",
button_secondary_text_color_hover="white",
button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)",
button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)",
button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)",
button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)",
slider_color="*secondary_500",
slider_color_dark="*secondary_600",
block_title_text_weight="600",
block_border_width="3px",
block_shadow="*shadow_drop_lg",
button_primary_shadow="*shadow_drop_lg",
button_large_padding="11px",
color_accent_soft="*primary_100",
block_label_background_fill="*primary_200",
)
steel_blue_theme = SteelBlueTheme()
from diffusers import FlowMatchEulerDiscreteScheduler
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2509",
transformer=QwenImageTransformer2DModel.from_pretrained(
"linoyts/Qwen-Image-Edit-Rapid-AIO",
subfolder="transformer",
torch_dtype=dtype,
device_map="cuda" if torch.cuda.is_available() else None,
),
torch_dtype=dtype,
).to(device)
pipe.load_lora_weights("autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime",
weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors",
adapter_name="anime")
pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multiple-angles",
weight_name="镜头转换.safetensors",
adapter_name="multiple-angles")
pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Light_restoration",
weight_name="移除光影.safetensors",
adapter_name="light-restoration")
pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Relight",
weight_name="Qwen-Edit-Relight.safetensors",
adapter_name="relight")
pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multi-Angle-Lighting",
weight_name="多角度灯光-251116.safetensors",
adapter_name="multi-angle-lighting")
pipe.load_lora_weights("tlennon-ie/qwen-edit-skin",
weight_name="qwen-edit-skin_1.1_000002750.safetensors",
adapter_name="edit-skin")
pipe.load_lora_weights("lovis93/next-scene-qwen-image-lora-2509",
weight_name="next-scene_lora-v2-3000.safetensors",
adapter_name="next-scene")
pipe.load_lora_weights("vafipas663/Qwen-Edit-2509-Upscale-LoRA",
weight_name="qwen-edit-enhance_64-v3_000001000.safetensors",
adapter_name="upscale-image")
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
# ★ メモリ節約(対応していれば効く)
try:
pipe.enable_vae_slicing()
except Exception:
pass
try:
pipe.enable_attention_slicing("auto")
except Exception:
pass
MAX_SEED = np.iinfo(np.int32).max
def _round8(x: int) -> int:
x = int(x)
return max(8, (x // 8) * 8)
def fit_long_side(image: Image.Image, long_side: int):
w0, h0 = image.size
long_side = _round8(long_side)
if w0 >= h0:
w = long_side
h = int(long_side * (h0 / w0))
else:
h = long_side
w = int(long_side * (w0 / h0))
return _round8(w), _round8(h)
# --- HUB upload ---
def upload_image_to_hub(image, dataset_id, folder_prefix="images"):
try:
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
print("Fail")
return
api = HfApi(token=hf_token)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
unique_id = str(uuid.uuid4())[:8]
filename = f"{folder_prefix}_{timestamp}_{unique_id}.png"
temp_path = f"/tmp/{filename}"
image.save(temp_path)
api.upload_file(
path_or_fileobj=temp_path,
path_in_repo=f"{folder_prefix}/{filename}",
repo_id=dataset_id,
repo_type="dataset",
)
os.remove(temp_path)
print("Success")
except Exception as e:
print(f"Yükleme hatası ({dataset_id}): {e}")
# -----------------
# ===== Size logic =====
SIZE_PRESETS = [
"Smart Auto (closest base + scale)",
"Auto (fit long side to 1024)",
"1024 x 1024 (Square)",
"1024 x 768 (Landscape)",
"768 x 1024 (Portrait)",
"512 x 512 (Fast)",
"Custom (use sliders)",
]
SCALE_CHOICES = ["Auto", "0.5x", "0.75x", "1.0x", "1.25x", "1.5x"]
SMART_BASE_LONG_SIDES = [512, 768, 1024, 1280, 1536]
SMART_SCALE_CANDIDATES = [0.5, 0.75, 1.0, 1.25, 1.5]
SMART_MAX_CHOICES = [768, 1024, 1280, 1536]
SMART_MAX_LONG_SIDE_DEFAULT = 1024 # ★安全側デフォルト
def parse_scale(scale_choice: str):
if scale_choice == "Auto":
return None
return float(scale_choice.replace("x", "").strip())
def smart_auto_size(image: Image.Image, scale_choice: str, smart_max_long: int):
if image is None:
return 1024, 1024, "No image"
img = image.convert("RGB")
w0, h0 = img.size
long0 = max(w0, h0)
base = min(
SMART_BASE_LONG_SIDES,
key=lambda b: (abs(b - long0), 0 if b <= long0 else 1, b)
)
s_user = parse_scale(scale_choice)
smart_max_long = int(smart_max_long)
if s_user is not None:
cand_long = int(base * s_user)
cand_long = max(256, min(cand_long, 2048))
cand_long = min(cand_long, smart_max_long)
w, h = fit_long_side(img, cand_long)
info = f"Smart(base={base}, scale={s_user}x, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
return w, h, info
best = None
for s in SMART_SCALE_CANDIDATES:
cand_long = int(base * s)
if cand_long < 256:
continue
if cand_long > smart_max_long:
continue
diff = abs(cand_long - long0)
upscale_penalty = 0
if cand_long > long0:
upscale_penalty = (cand_long - long0) * 2.5
cost = diff + upscale_penalty
if best is None or cost < best[0]:
best = (cost, s, cand_long)
if best is None:
cand_long = min(max(256, base), smart_max_long)
w, h = fit_long_side(img, cand_long)
info = f"Smart(base={base}, scale=Fallback, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
return w, h, info
_, s_best, long_best = best
w, h = fit_long_side(img, long_best)
info = f"Smart(base={base}, scale={s_best}x Auto, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})"
return w, h, info
def apply_size_controls(preset, image, scale_choice, smart_max_long, cur_w, cur_h):
smart_max_long = int(smart_max_long)
if preset == "Smart Auto (closest base + scale)":
w, h, info = smart_auto_size(image, scale_choice, smart_max_long)
return w, h, info
if preset == "Auto (fit long side to 1024)":
if image is None:
return 1024, 1024, "Auto long side 1024 (no image)"
w, h = fit_long_side(image.convert("RGB"), 1024)
return w, h, f"Auto long side 1024 -> {w}x{h}"
if preset == "1024 x 1024 (Square)":
return 1024, 1024, "Fixed 1024x1024"
if preset == "1024 x 768 (Landscape)":
return 1024, 768, "Fixed 1024x768"
if preset == "768 x 1024 (Portrait)":
return 768, 1024, "Fixed 768x1024"
if preset == "512 x 512 (Fast)":
return 512, 512, "Fixed 512x512"
return _round8(cur_w), _round8(cur_h), f"Custom -> {_round8(cur_w)}x{_round8(cur_h)}"
# ===== LoRA =====
def set_adapter(lora_adapter: str):
if lora_adapter == "Photo-to-Anime":
pipe.set_adapters(["anime"], adapter_weights=[1.0])
elif lora_adapter == "Multiple-Angles":
pipe.set_adapters(["multiple-angles"], adapter_weights=[1.0])
elif lora_adapter == "Light-Restoration":
pipe.set_adapters(["light-restoration"], adapter_weights=[1.0])
elif lora_adapter == "Relight":
pipe.set_adapters(["relight"], adapter_weights=[1.0])
elif lora_adapter == "Multi-Angle-Lighting":
pipe.set_adapters(["multi-angle-lighting"], adapter_weights=[1.0])
elif lora_adapter == "Edit-Skin":
pipe.set_adapters(["edit-skin"], adapter_weights=[1.0])
elif lora_adapter == "Next-Scene":
pipe.set_adapters(["next-scene"], adapter_weights=[1.0])
elif lora_adapter == "Upscale-Image":
pipe.set_adapters(["upscale-image"], adapter_weights=[1.0])
# ===== Prompt swap =====
def swap_prompt_sets(p1, p2, p3, p4, p5, p6):
return p4, p5, p6, p1, p2, p3
# ===== Inference (6 images) =====
@spaces.GPU(duration=120)
def infer_6pack(
input_image,
prompt1,
prompt2,
prompt3,
lora_adapter,
size_preset,
scale_choice,
smart_max_long,
width,
height,
seed,
randomize_seed,
guidance_scale,
steps,
progress=gr.Progress(track_tqdm=True),
):
if input_image is None:
raise gr.Error("Please upload an image to edit.")
upload_image_to_hub(input_image, INPUT_DATASET_ID, folder_prefix="inputs")
set_adapter(lora_adapter)
width = _round8(width)
height = _round8(height)
prompts = [prompt1, prompt2, prompt3]
# seeds: 2 per prompt => 6
seeds = []
if randomize_seed:
for _ in range(6):
seeds.append(random.randint(0, MAX_SEED))
else:
base = int(seed)
for i in range(6):
seeds.append((base + i) % MAX_SEED)
# true_cfg_scale<=1 のときは negative_prompt 渡さない(警告&無駄回避)
guidance_scale = float(guidance_scale)
negative_prompt = None
if guidance_scale > 1.0:
negative_prompt = (
"worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
"extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
)
original_image = input_image.convert("RGB")
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
outputs = []
seed_idx = 0
for p_i, p in enumerate(prompts):
for v in range(2):
s = seeds[seed_idx]
seed_idx += 1
generator = torch.Generator(device=device).manual_seed(int(s))
call_kwargs = dict(
image=original_image,
prompt=p,
height=int(height),
width=int(width),
num_inference_steps=int(steps),
generator=generator,
true_cfg_scale=float(guidance_scale),
)
if negative_prompt is not None:
call_kwargs["negative_prompt"] = negative_prompt
result = pipe(**call_kwargs).images[0]
upload_image_to_hub(result, OUTPUT_DATASET_ID, folder_prefix="generated")
caption = f"prompt{p_i+1} var{v+1} | seed={s} | {width}x{height}"
outputs.append((result, caption))
# ★ 連続生成のメモリ圧を下げる
del generator
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
seeds_text = "\n".join([f"{i+1}: {s}" for i, s in enumerate(seeds)])
return outputs, seeds_text
css = """
#col-container {
margin: 0 auto;
max-width: 960px;
}
#main-title h1 {font-size: 2.1em !important;}
"""
with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("# **RAINBO PRO 3D IMAGE EDIT**", elem_id="main-title")
gr.Markdown("Test) adapters for the [Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) model.")
with gr.Row(equal_height=True):
with gr.Column():
input_image = gr.Image(label="Upload Image", type="pil", height=290)
with gr.Row():
size_preset = gr.Dropdown(
label="Image Size Preset",
choices=SIZE_PRESETS,
value="Smart Auto (closest base + scale)",
)
scale_choice = gr.Dropdown(
label="Smart Scale",
choices=SCALE_CHOICES,
value="Auto",
)
smart_max_long = gr.Dropdown(
label="Smart Max Long Side (Safe default 1024)",
choices=[str(x) for x in SMART_MAX_CHOICES],
value=str(SMART_MAX_LONG_SIDE_DEFAULT),
)
with gr.Row():
width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)
height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
size_info = gr.Textbox(label="Size Decision Info", lines=2)
# ---- main prompts (1-3) ----
prompt1 = gr.Text(
label="Prompt 1",
value="move camera to below floor, make this girl to another standing pose, dynamic camera angle from below",
)
prompt2 = gr.Text(
label="Prompt 2",
value="make this girl to another sitting pose",
)
prompt3 = gr.Text(
label="Prompt 3",
value="make this girl to another standing pose with hand sign",
)
# ---- swap buttons ----
with gr.Row():
swap_left = gr.Button("◀", variant="secondary")
swap_right = gr.Button("▶", variant="secondary")
# ---- alt prompts (4-6) ----
with gr.Accordion("Alt Prompts (4-6)", open=False):
prompt4 = gr.Text(
label="Prompt 4",
value="camera zoom in to her face, cute face with smiling, aesthetics image film,",
)
prompt5 = gr.Text(
label="Prompt 5",
value="camera zoom out and she split legs, cute posing",
)
prompt6 = gr.Text(
label="Prompt 6",
value="camera move to up, she look at another, and sitting,",
)
run_button = gr.Button("Generate 6 Images (3 prompts x 2 seeds)", variant="primary")
with gr.Column():
output_gallery = gr.Gallery(
label="Outputs (3 x 2 = 6)",
columns=3,
rows=2,
height=380,
preview=True,
)
lora_adapter = gr.Dropdown(
label="Choose Editing Style",
choices=[
"Photo-to-Anime",
"Multiple-Angles",
"Light-Restoration",
"Multi-Angle-Lighting",
"Upscale-Image",
"Relight",
"Next-Scene",
"Edit-Skin",
],
value="Next-Scene",
)
with gr.Accordion("Advanced Settings", open=False, visible=True):
seed = gr.Slider(label="Base Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
randomize_seed = gr.Checkbox(label="Randomize Seeds (6 images)", value=True)
guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=6) # ★ default 6
seeds_box = gr.Textbox(label="Used Seeds (1..6)", lines=6)
# サイズUI更新:preset/scale/max/画像アップロードで追従
def _size_update(preset, img, scale, mx, w, h):
return apply_size_controls(preset, img, scale, mx, w, h)
for evt in (size_preset.change, scale_choice.change, smart_max_long.change, input_image.change):
evt(
fn=_size_update,
inputs=[size_preset, input_image, scale_choice, smart_max_long, width, height],
outputs=[width, height, size_info],
)
# 左右ボタン:prompt1-3 <-> prompt4-6 を swap
for btn in (swap_left, swap_right):
btn.click(
fn=swap_prompt_sets,
inputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
outputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6],
)
run_button.click(
fn=infer_6pack,
inputs=[
input_image,
prompt1, prompt2, prompt3,
lora_adapter,
size_preset, scale_choice, smart_max_long,
width, height,
seed, randomize_seed, guidance_scale, steps,
],
outputs=[output_gallery, seeds_box],
)
if __name__ == "__main__":
demo.queue(max_size=30).launch(mcp_server=True, ssr_mode=False, show_error=True)