import os # ★ torch import 前に allocator 設定(ZeroGPU/断片化対策) os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:128") import gradio as gr import numpy as np import spaces import torch import random import gc from PIL import Image from typing import Iterable from gradio.themes import Soft from gradio.themes.utils import colors, fonts, sizes import uuid from datetime import datetime from huggingface_hub import HfApi # --- AYARLAR --- INPUT_DATASET_ID = "tyndreus/image-edit-logs" OUTPUT_DATASET_ID = "tyndreus/output" # --------------- colors.steel_blue = colors.Color( name="steel_blue", c50="#EBF3F8", c100="#D3E5F0", c200="#A8CCE1", c300="#7DB3D2", c400="#529AC3", c500="#4682B4", c600="#3E72A0", c700="#36638C", c800="#2E5378", c900="#264364", c950="#1E3450", ) class SteelBlueTheme(Soft): def __init__( self, *, primary_hue: colors.Color | str = colors.gray, secondary_hue: colors.Color | str = colors.steel_blue, neutral_hue: colors.Color | str = colors.slate, text_size: sizes.Size | str = sizes.text_lg, font: fonts.Font | str | Iterable[fonts.Font | str] = ( fonts.GoogleFont("Outfit"), "Arial", "sans-serif", ), font_mono: fonts.Font | str | Iterable[fonts.Font | str] = ( fonts.GoogleFont("IBM Plex Mono"), "ui-monospace", "monospace", ), ): super().__init__( primary_hue=primary_hue, secondary_hue=secondary_hue, neutral_hue=neutral_hue, text_size=text_size, font=font, font_mono=font_mono, ) super().set( background_fill_primary="*primary_50", background_fill_primary_dark="*primary_900", body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)", body_background_fill_dark="linear-gradient(135deg, *primary_900, *primary_800)", button_primary_text_color="white", button_primary_text_color_hover="white", button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)", button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)", button_primary_background_fill_dark="linear-gradient(90deg, *secondary_600, *secondary_800)", button_primary_background_fill_hover_dark="linear-gradient(90deg, *secondary_500, *secondary_500)", button_secondary_text_color="black", button_secondary_text_color_hover="white", button_secondary_background_fill="linear-gradient(90deg, *primary_300, *primary_300)", button_secondary_background_fill_hover="linear-gradient(90deg, *primary_400, *primary_400)", button_secondary_background_fill_dark="linear-gradient(90deg, *primary_500, *primary_600)", button_secondary_background_fill_hover_dark="linear-gradient(90deg, *primary_500, *primary_500)", slider_color="*secondary_500", slider_color_dark="*secondary_600", block_title_text_weight="600", block_border_width="3px", block_shadow="*shadow_drop_lg", button_primary_shadow="*shadow_drop_lg", button_large_padding="11px", color_accent_soft="*primary_100", block_label_background_fill="*primary_200", ) steel_blue_theme = SteelBlueTheme() from diffusers import FlowMatchEulerDiscreteScheduler from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" pipe = QwenImageEditPlusPipeline.from_pretrained( "Qwen/Qwen-Image-Edit-2509", transformer=QwenImageTransformer2DModel.from_pretrained( "linoyts/Qwen-Image-Edit-Rapid-AIO", subfolder="transformer", torch_dtype=dtype, device_map="cuda" if torch.cuda.is_available() else None, ), torch_dtype=dtype, ).to(device) pipe.load_lora_weights("autoweeb/Qwen-Image-Edit-2509-Photo-to-Anime", weight_name="Qwen-Image-Edit-2509-Photo-to-Anime_000001000.safetensors", adapter_name="anime") pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multiple-angles", weight_name="镜头转换.safetensors", adapter_name="multiple-angles") pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Light_restoration", weight_name="移除光影.safetensors", adapter_name="light-restoration") pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Relight", weight_name="Qwen-Edit-Relight.safetensors", adapter_name="relight") pipe.load_lora_weights("dx8152/Qwen-Edit-2509-Multi-Angle-Lighting", weight_name="多角度灯光-251116.safetensors", adapter_name="multi-angle-lighting") pipe.load_lora_weights("tlennon-ie/qwen-edit-skin", weight_name="qwen-edit-skin_1.1_000002750.safetensors", adapter_name="edit-skin") pipe.load_lora_weights("lovis93/next-scene-qwen-image-lora-2509", weight_name="next-scene_lora-v2-3000.safetensors", adapter_name="next-scene") pipe.load_lora_weights("vafipas663/Qwen-Edit-2509-Upscale-LoRA", weight_name="qwen-edit-enhance_64-v3_000001000.safetensors", adapter_name="upscale-image") pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) # ★ メモリ節約(対応していれば効く) try: pipe.enable_vae_slicing() except Exception: pass try: pipe.enable_attention_slicing("auto") except Exception: pass MAX_SEED = np.iinfo(np.int32).max def _round8(x: int) -> int: x = int(x) return max(8, (x // 8) * 8) def fit_long_side(image: Image.Image, long_side: int): w0, h0 = image.size long_side = _round8(long_side) if w0 >= h0: w = long_side h = int(long_side * (h0 / w0)) else: h = long_side w = int(long_side * (w0 / h0)) return _round8(w), _round8(h) # --- HUB upload --- def upload_image_to_hub(image, dataset_id, folder_prefix="images"): try: hf_token = os.environ.get("HF_TOKEN") if not hf_token: print("Fail") return api = HfApi(token=hf_token) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") unique_id = str(uuid.uuid4())[:8] filename = f"{folder_prefix}_{timestamp}_{unique_id}.png" temp_path = f"/tmp/{filename}" image.save(temp_path) api.upload_file( path_or_fileobj=temp_path, path_in_repo=f"{folder_prefix}/{filename}", repo_id=dataset_id, repo_type="dataset", ) os.remove(temp_path) print("Success") except Exception as e: print(f"Yükleme hatası ({dataset_id}): {e}") # ----------------- # ===== Size logic ===== SIZE_PRESETS = [ "Smart Auto (closest base + scale)", "Auto (fit long side to 1024)", "1024 x 1024 (Square)", "1024 x 768 (Landscape)", "768 x 1024 (Portrait)", "512 x 512 (Fast)", "Custom (use sliders)", ] SCALE_CHOICES = ["Auto", "0.5x", "0.75x", "1.0x", "1.25x", "1.5x"] SMART_BASE_LONG_SIDES = [512, 768, 1024, 1280, 1536] SMART_SCALE_CANDIDATES = [0.5, 0.75, 1.0, 1.25, 1.5] SMART_MAX_CHOICES = [768, 1024, 1280, 1536] SMART_MAX_LONG_SIDE_DEFAULT = 1024 # ★安全側デフォルト def parse_scale(scale_choice: str): if scale_choice == "Auto": return None return float(scale_choice.replace("x", "").strip()) def smart_auto_size(image: Image.Image, scale_choice: str, smart_max_long: int): if image is None: return 1024, 1024, "No image" img = image.convert("RGB") w0, h0 = img.size long0 = max(w0, h0) base = min( SMART_BASE_LONG_SIDES, key=lambda b: (abs(b - long0), 0 if b <= long0 else 1, b) ) s_user = parse_scale(scale_choice) smart_max_long = int(smart_max_long) if s_user is not None: cand_long = int(base * s_user) cand_long = max(256, min(cand_long, 2048)) cand_long = min(cand_long, smart_max_long) w, h = fit_long_side(img, cand_long) info = f"Smart(base={base}, scale={s_user}x, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})" return w, h, info best = None for s in SMART_SCALE_CANDIDATES: cand_long = int(base * s) if cand_long < 256: continue if cand_long > smart_max_long: continue diff = abs(cand_long - long0) upscale_penalty = 0 if cand_long > long0: upscale_penalty = (cand_long - long0) * 2.5 cost = diff + upscale_penalty if best is None or cost < best[0]: best = (cost, s, cand_long) if best is None: cand_long = min(max(256, base), smart_max_long) w, h = fit_long_side(img, cand_long) info = f"Smart(base={base}, scale=Fallback, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})" return w, h, info _, s_best, long_best = best w, h = fit_long_side(img, long_best) info = f"Smart(base={base}, scale={s_best}x Auto, max={smart_max_long}) -> {w}x{h} (orig {w0}x{h0})" return w, h, info def apply_size_controls(preset, image, scale_choice, smart_max_long, cur_w, cur_h): smart_max_long = int(smart_max_long) if preset == "Smart Auto (closest base + scale)": w, h, info = smart_auto_size(image, scale_choice, smart_max_long) return w, h, info if preset == "Auto (fit long side to 1024)": if image is None: return 1024, 1024, "Auto long side 1024 (no image)" w, h = fit_long_side(image.convert("RGB"), 1024) return w, h, f"Auto long side 1024 -> {w}x{h}" if preset == "1024 x 1024 (Square)": return 1024, 1024, "Fixed 1024x1024" if preset == "1024 x 768 (Landscape)": return 1024, 768, "Fixed 1024x768" if preset == "768 x 1024 (Portrait)": return 768, 1024, "Fixed 768x1024" if preset == "512 x 512 (Fast)": return 512, 512, "Fixed 512x512" return _round8(cur_w), _round8(cur_h), f"Custom -> {_round8(cur_w)}x{_round8(cur_h)}" # ===== LoRA ===== def set_adapter(lora_adapter: str): if lora_adapter == "Photo-to-Anime": pipe.set_adapters(["anime"], adapter_weights=[1.0]) elif lora_adapter == "Multiple-Angles": pipe.set_adapters(["multiple-angles"], adapter_weights=[1.0]) elif lora_adapter == "Light-Restoration": pipe.set_adapters(["light-restoration"], adapter_weights=[1.0]) elif lora_adapter == "Relight": pipe.set_adapters(["relight"], adapter_weights=[1.0]) elif lora_adapter == "Multi-Angle-Lighting": pipe.set_adapters(["multi-angle-lighting"], adapter_weights=[1.0]) elif lora_adapter == "Edit-Skin": pipe.set_adapters(["edit-skin"], adapter_weights=[1.0]) elif lora_adapter == "Next-Scene": pipe.set_adapters(["next-scene"], adapter_weights=[1.0]) elif lora_adapter == "Upscale-Image": pipe.set_adapters(["upscale-image"], adapter_weights=[1.0]) # ===== Prompt swap ===== def swap_prompt_sets(p1, p2, p3, p4, p5, p6): return p4, p5, p6, p1, p2, p3 # ===== Inference (6 images) ===== @spaces.GPU(duration=120) def infer_6pack( input_image, prompt1, prompt2, prompt3, lora_adapter, size_preset, scale_choice, smart_max_long, width, height, seed, randomize_seed, guidance_scale, steps, progress=gr.Progress(track_tqdm=True), ): if input_image is None: raise gr.Error("Please upload an image to edit.") upload_image_to_hub(input_image, INPUT_DATASET_ID, folder_prefix="inputs") set_adapter(lora_adapter) width = _round8(width) height = _round8(height) prompts = [prompt1, prompt2, prompt3] # seeds: 2 per prompt => 6 seeds = [] if randomize_seed: for _ in range(6): seeds.append(random.randint(0, MAX_SEED)) else: base = int(seed) for i in range(6): seeds.append((base + i) % MAX_SEED) # true_cfg_scale<=1 のときは negative_prompt 渡さない(警告&無駄回避) guidance_scale = float(guidance_scale) negative_prompt = None if guidance_scale > 1.0: negative_prompt = ( "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, " "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry" ) original_image = input_image.convert("RGB") if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() outputs = [] seed_idx = 0 for p_i, p in enumerate(prompts): for v in range(2): s = seeds[seed_idx] seed_idx += 1 generator = torch.Generator(device=device).manual_seed(int(s)) call_kwargs = dict( image=original_image, prompt=p, height=int(height), width=int(width), num_inference_steps=int(steps), generator=generator, true_cfg_scale=float(guidance_scale), ) if negative_prompt is not None: call_kwargs["negative_prompt"] = negative_prompt result = pipe(**call_kwargs).images[0] upload_image_to_hub(result, OUTPUT_DATASET_ID, folder_prefix="generated") caption = f"prompt{p_i+1} var{v+1} | seed={s} | {width}x{height}" outputs.append((result, caption)) # ★ 連続生成のメモリ圧を下げる del generator if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() seeds_text = "\n".join([f"{i+1}: {s}" for i, s in enumerate(seeds)]) return outputs, seeds_text css = """ #col-container { margin: 0 auto; max-width: 960px; } #main-title h1 {font-size: 2.1em !important;} """ with gr.Blocks(css=css, theme=steel_blue_theme) as demo: with gr.Column(elem_id="col-container"): gr.Markdown("# **RAINBO PRO 3D IMAGE EDIT**", elem_id="main-title") gr.Markdown("Test) adapters for the [Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit-2509) model.") with gr.Row(equal_height=True): with gr.Column(): input_image = gr.Image(label="Upload Image", type="pil", height=290) with gr.Row(): size_preset = gr.Dropdown( label="Image Size Preset", choices=SIZE_PRESETS, value="Smart Auto (closest base + scale)", ) scale_choice = gr.Dropdown( label="Smart Scale", choices=SCALE_CHOICES, value="Auto", ) smart_max_long = gr.Dropdown( label="Smart Max Long Side (Safe default 1024)", choices=[str(x) for x in SMART_MAX_CHOICES], value=str(SMART_MAX_LONG_SIDE_DEFAULT), ) with gr.Row(): width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024) height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024) size_info = gr.Textbox(label="Size Decision Info", lines=2) # ---- main prompts (1-3) ---- prompt1 = gr.Text( label="Prompt 1", value="move camera to below floor, make this girl to another standing pose, dynamic camera angle from below", ) prompt2 = gr.Text( label="Prompt 2", value="make this girl to another sitting pose", ) prompt3 = gr.Text( label="Prompt 3", value="make this girl to another standing pose with hand sign", ) # ---- swap buttons ---- with gr.Row(): swap_left = gr.Button("◀", variant="secondary") swap_right = gr.Button("▶", variant="secondary") # ---- alt prompts (4-6) ---- with gr.Accordion("Alt Prompts (4-6)", open=False): prompt4 = gr.Text( label="Prompt 4", value="camera zoom in to her face, cute face with smiling, aesthetics image film,", ) prompt5 = gr.Text( label="Prompt 5", value="camera zoom out and she split legs, cute posing", ) prompt6 = gr.Text( label="Prompt 6", value="camera move to up, she look at another, and sitting,", ) run_button = gr.Button("Generate 6 Images (3 prompts x 2 seeds)", variant="primary") with gr.Column(): output_gallery = gr.Gallery( label="Outputs (3 x 2 = 6)", columns=3, rows=2, height=380, preview=True, ) lora_adapter = gr.Dropdown( label="Choose Editing Style", choices=[ "Photo-to-Anime", "Multiple-Angles", "Light-Restoration", "Multi-Angle-Lighting", "Upscale-Image", "Relight", "Next-Scene", "Edit-Skin", ], value="Next-Scene", ) with gr.Accordion("Advanced Settings", open=False, visible=True): seed = gr.Slider(label="Base Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) randomize_seed = gr.Checkbox(label="Randomize Seeds (6 images)", value=True) guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0) steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=6) # ★ default 6 seeds_box = gr.Textbox(label="Used Seeds (1..6)", lines=6) # サイズUI更新:preset/scale/max/画像アップロードで追従 def _size_update(preset, img, scale, mx, w, h): return apply_size_controls(preset, img, scale, mx, w, h) for evt in (size_preset.change, scale_choice.change, smart_max_long.change, input_image.change): evt( fn=_size_update, inputs=[size_preset, input_image, scale_choice, smart_max_long, width, height], outputs=[width, height, size_info], ) # 左右ボタン:prompt1-3 <-> prompt4-6 を swap for btn in (swap_left, swap_right): btn.click( fn=swap_prompt_sets, inputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6], outputs=[prompt1, prompt2, prompt3, prompt4, prompt5, prompt6], ) run_button.click( fn=infer_6pack, inputs=[ input_image, prompt1, prompt2, prompt3, lora_adapter, size_preset, scale_choice, smart_max_long, width, height, seed, randomize_seed, guidance_scale, steps, ], outputs=[output_gallery, seeds_box], ) if __name__ == "__main__": demo.queue(max_size=30).launch(mcp_server=True, ssr_mode=False, show_error=True)