import os import gc import gradio as gr import numpy as np import spaces import torch import random from PIL import Image from typing import Iterable from gradio.themes import Soft from gradio.themes.utils import colors, fonts, sizes # ═══════════════════════════════════════════════════════════════════════ # THEME # ═══════════════════════════════════════════════════════════════════════ colors.fire_red = colors.Color( name="fire_red", c50="#FFF5F0", c100="#FFE8DB", c200="#FFD0B5", c300="#FFB088", c400="#FF8C5A", c500="#FF6B35", c600="#E8531F", c700="#CC4317", c800="#A63812", c900="#80300F", c950="#5C220A", ) class FireRedTheme(Soft): def __init__( self, *, primary_hue: colors.Color | str = colors.gray, secondary_hue: colors.Color | str = colors.fire_red, neutral_hue: colors.Color | str = colors.slate, text_size: sizes.Size | str = sizes.text_md, font: fonts.Font | str | Iterable[fonts.Font | str] = ( fonts.GoogleFont("Inter"), "system-ui", "sans-serif", ), font_mono: fonts.Font | str | Iterable[fonts.Font | str] = ( fonts.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace", ), ): super().__init__( primary_hue=primary_hue, secondary_hue=secondary_hue, neutral_hue=neutral_hue, text_size=text_size, font=font, font_mono=font_mono, ) super().set( body_background_fill="#f0f2f6", body_background_fill_dark="*neutral_950", background_fill_primary="white", background_fill_primary_dark="*neutral_900", block_background_fill="white", block_background_fill_dark="*neutral_800", block_border_width="1px", block_border_color="*neutral_200", block_border_color_dark="*neutral_700", block_shadow="0 1px 4px rgba(0,0,0,0.05)", block_shadow_dark="0 1px 4px rgba(0,0,0,0.25)", block_title_text_weight="600", block_label_background_fill="*neutral_50", block_label_background_fill_dark="*neutral_800", button_primary_text_color="white", button_primary_text_color_hover="white", button_primary_background_fill="linear-gradient(135deg, *secondary_500, *secondary_600)", button_primary_background_fill_hover="linear-gradient(135deg, *secondary_600, *secondary_700)", button_primary_background_fill_dark="linear-gradient(135deg, *secondary_500, *secondary_600)", button_primary_background_fill_hover_dark="linear-gradient(135deg, *secondary_600, *secondary_700)", button_primary_shadow="0 4px 14px rgba(232, 83, 31, 0.25)", button_secondary_text_color="*secondary_700", button_secondary_text_color_dark="*secondary_300", button_secondary_background_fill="*secondary_50", button_secondary_background_fill_hover="*secondary_100", button_secondary_background_fill_dark="rgba(255, 107, 53, 0.1)", button_secondary_background_fill_hover_dark="rgba(255, 107, 53, 0.2)", button_large_padding="12px 24px", slider_color="*secondary_500", slider_color_dark="*secondary_500", input_border_color_focus="*secondary_400", input_border_color_focus_dark="*secondary_500", color_accent_soft="*secondary_50", color_accent_soft_dark="rgba(255, 107, 53, 0.15)", ) theme = FireRedTheme() # ═══════════════════════════════════════════════════════════════════════ # GLOBAL CUDA OPTIMIZATIONS # ═══════════════════════════════════════════════════════════════════════ # Enable cuDNN autotuner — finds the fastest convolution algorithms for # the hardware and input sizes after a short warm-up. torch.backends.cudnn.benchmark = True # Allow TF32 on Ampere+ GPUs for ~3× faster matmuls with negligible # precision loss (already bf16 pipeline, so this is free perf). torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True torch.set_float32_matmul_precision("high") # ═══════════════════════════════════════════════════════════════════════ # MODEL # ═══════════════════════════════════════════════════════════════════════ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES")) print("torch.__version__ =", torch.__version__) print("device =", device) from diffusers import FlowMatchEulerDiscreteScheduler from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 dtype = torch.bfloat16 # Load transformer separately so we can optimise it before plugging in transformer = QwenImageTransformer2DModel.from_pretrained( "prithivMLmods/Qwen-Image-Edit-Rapid-AIO-V23", torch_dtype=dtype, device_map="cuda", ) # Attempt torch.compile for a fused-kernel speed-up on the denoising # backbone. Falls back gracefully if the environment doesn't support it # (older driver / torch version / dynamic-shape issues). try: transformer = torch.compile(transformer, mode="reduce-overhead") print("torch.compile applied to transformer (reduce-overhead).") except Exception as e: print(f"torch.compile skipped: {e}") pipe = QwenImageEditPlusPipeline.from_pretrained( "FireRedTeam/FireRed-Image-Edit-1.1", transformer=transformer, torch_dtype=dtype, ).to(device) # Flash Attention 3 processor — fastest path when available try: pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) print("Flash Attention 3 Processor set successfully.") except Exception as e: print(f"Warning: Could not set FA3 processor: {e}") # VAE optimisations — process large images in tiles / slices so we # never OOM on the decode step, and still stay fast for normal sizes. try: pipe.vae.enable_tiling() print("VAE tiling enabled.") except Exception: pass try: pipe.vae.enable_slicing() print("VAE slicing enabled.") except Exception: pass # ── Warmup pass ───────────────────────────────────────────────────── # The first inference is always slower (CUDA context init, cuDNN # autotuner, torch.compile tracing). Run a tiny dummy forward so that # cost is paid at startup, not on the first user request. print("Running warmup inference …") try: _warmup_img = Image.new("RGB", (64, 64), color=(128, 128, 128)) _warmup_gen = torch.Generator(device=device).manual_seed(0) with torch.inference_mode(): pipe( image=[_warmup_img], prompt="warmup", negative_prompt="", height=64, width=64, num_inference_steps=1, generator=_warmup_gen, true_cfg_scale=1.0, ) del _warmup_img, _warmup_gen gc.collect() torch.cuda.empty_cache() print("Warmup complete.") except Exception as e: print(f"Warmup skipped: {e}") MAX_SEED = np.iinfo(np.int32).max DEFAULT_NEGATIVE_PROMPT = ( "worst quality, low quality, bad anatomy, bad hands, text, error, " "missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, " "signature, watermark, username, blurry" ) # ═══════════════════════════════════════════════════════════════════════ # HELPERS # ═══════════════════════════════════════════════════════════════════════ def update_dimensions_on_upload(image): if image is None: return 1024, 1024 w, h = image.size if w > h: nw, nh = 1024, int(1024 * h / w) else: nh, nw = 1024, int(1024 * w / h) return (nw // 8) * 8, (nh // 8) * 8 def format_seed(seed_val): return f"{int(seed_val)}" def format_info(seed_val, images): if images: try: first = images[0] path = first[0] if isinstance(first, (tuple, list)) else first if isinstance(path, str): im = Image.open(path) elif isinstance(path, Image.Image): im = path else: im = Image.open(path.name) ow, oh = im.size nw, nh = update_dimensions_on_upload(im) return ( f"**Seed:** `{int(seed_val)}`\n\n" f"**Original:** {ow}×{oh} → **Output:** {nw}×{nh}" ) except Exception: pass return f"**Seed:** `{int(seed_val)}`" # ═══════════════════════════════════════════════════════════════════════ # INFERENCE # ═══════════════════════════════════════════════════════════════════════ @spaces.GPU def infer( images, prompt, negative_prompt, seed, randomize_seed, guidance_scale, steps, progress=gr.Progress(track_tqdm=True), ): # ── Input validation (cheap, do first) ────────────────────────── if not images: raise gr.Error("⚠️ Please upload at least one image.") if not prompt or not prompt.strip(): raise gr.Error("⚠️ Please enter an edit prompt.") pil_images = [] for item in images: try: path = item[0] if isinstance(item, (tuple, list)) else item if isinstance(path, str): pil_images.append(Image.open(path).convert("RGB")) elif isinstance(path, Image.Image): pil_images.append(path.convert("RGB")) else: pil_images.append(Image.open(path.name).convert("RGB")) except Exception as e: print(f"Skipping invalid image: {e}") if not pil_images: raise gr.Error("⚠️ Could not process uploaded images.") if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) width, height = update_dimensions_on_upload(pil_images[0]) try: # torch.inference_mode is strictly faster than torch.no_grad — # it also disables view-tracking and version-counter bumps. with torch.inference_mode(): result = pipe( image=pil_images, prompt=prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, generator=generator, true_cfg_scale=guidance_scale, ).images[0] return result, seed finally: # GC *after* inference to reclaim any temporaries the pipeline # allocated. Avoid gc.collect() + empty_cache() *before* # inference — that stalls the CUDA stream for nothing. gc.collect() torch.cuda.empty_cache() @spaces.GPU def infer_example(images, prompt): if not images: return None, 0 images_list = [images] if isinstance(images, str) else images return infer(images_list, prompt, DEFAULT_NEGATIVE_PROMPT, 0, True, 1.0, 4) # ═══════════════════════════════════════════════════════════════════════ # PROMPT SUGGESTIONS # ═══════════════════════════════════════════════════════════════════════ SUGGESTIONS = [ "Transform into anime style", "Convert to oil painting", "Add dramatic sunset lighting", "Make it a pencil sketch", "Apply cyberpunk neon aesthetic", "Add snow and winter vibes", "Turn into watercolor art", "Make it look vintage 1970s", ] # ═══════════════════════════════════════════════════════════════════════ # CSS # ═══════════════════════════════════════════════════════════════════════ css = """ /* ── Container ─────────────────────────────────────────────── */ #col-container { margin: 0 auto; max-width: 1120px; } /* ── Header ────────────────────────────────────────────────── */ .hdr { text-align: center; padding: 38px 28px 30px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); border-radius: 20px; margin-bottom: 22px; border: 1px solid rgba(255,107,53,.15); box-shadow: 0 12px 44px rgba(0,0,0,.10); position: relative; overflow: hidden; } .hdr::before { content: ""; position: absolute; inset: 0; background: radial-gradient(ellipse at 25% 50%, rgba(255,107,53,.07) 0%, transparent 60%), radial-gradient(ellipse at 80% 25%, rgba(255,140,90,.05) 0%, transparent 50%); pointer-events: none; } .hdr > * { position: relative; z-index: 1; } .hdr h1 { font-size: 2.6em; font-weight: 800; background: linear-gradient(135deg, #FF8C5A, #FF6B35, #FF4500); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; margin: 0 0 8px; letter-spacing: -.02em; line-height: 1.15; } .hdr .sub { color: #94a3b8; font-size: 1.05em; margin: 0 0 16px; line-height: 1.55; } .hdr .sub a { color: #FF8C5A; text-decoration: none; border-bottom: 1px solid rgba(255,140,90,.3); transition: border-color .2s; } .hdr .sub a:hover { border-bottom-color: #FF8C5A; } .badges { display: flex; justify-content: center; gap: 8px; flex-wrap: wrap; } .bdg { background: rgba(255,107,53,.12); color: #FFB088; padding: 5px 14px; border-radius: 100px; font-size: .82em; font-weight: 500; border: 1px solid rgba(255,107,53,.18); } /* ── Section Label ─────────────────────────────────────────── */ .stl { font-size: .92em; font-weight: 700; color: #475569; margin: 0 0 6px; display: flex; align-items: center; gap: 6px; } .dark .stl { color: #cbd5e1; } /* ── Generate Button ───────────────────────────────────────── */ #gen-btn { margin-top: 14px !important; font-size: 1.1em !important; font-weight: 700 !important; padding: 14px 28px !important; border-radius: 14px !important; letter-spacing: .3px; transition: all .25s cubic-bezier(.4,0,.2,1) !important; min-height: 52px !important; } #gen-btn:hover { transform: translateY(-2px) !important; box-shadow: 0 8px 28px rgba(232,83,31,.40) !important; } #gen-btn:active { transform: translateY(0) !important; } /* ── Clear Button ──────────────────────────────────────────── */ #clear-btn { min-height: 52px !important; margin-top: 14px !important; border-radius: 14px !important; font-weight: 600 !important; } /* ── Prompt Chip Row ───────────────────────────────────────── */ .chip-row { gap: 6px !important; margin-top: 2px !important; } .chip-btn { font-size: .78em !important; padding: 5px 13px !important; border-radius: 100px !important; min-width: 0 !important; font-weight: 500 !important; white-space: nowrap !important; transition: all .2s ease !important; } .chip-btn:hover { transform: translateY(-1px) !important; } /* ── Output Image ──────────────────────────────────────────── */ #output-img { border-radius: 14px !important; overflow: hidden; } /* ── Info Box ──────────────────────────────────────────────── */ #info-box { margin-top: 6px !important; border-radius: 12px !important; } #info-box .prose { font-family: 'JetBrains Mono', monospace; font-size: .88em; } /* ── Tips ──────────────────────────────────────────────────── */ .tips { background: linear-gradient(135deg, #FFF5F0, #FFE8DB); border: 1px solid #FFD0B5; border-radius: 14px; padding: 18px 24px; margin-top: 14px; } .tips h4 { margin: 0 0 10px; font-size: .95em; color: #A63812; } .tips ul { margin: 0; padding: 0 0 0 20px; color: #80300F; font-size: .85em; line-height: 1.75; } .tips li { margin-bottom: 2px; } .tips li::marker { color: #FF6B35; } .tips strong { color: #A63812; } .dark .tips { background: linear-gradient(135deg, #2a1a10, #201510); border-color: rgba(255,107,53,.2); } .dark .tips h4 { color: #FFB088; } .dark .tips ul { color: #FFD0B5; } .dark .tips strong { color: #FFB088; } /* ── Footer ────────────────────────────────────────────────── */ .ftr { text-align: center; padding: 18px; margin-top: 20px; color: #94a3b8; font-size: .82em; border-top: 1px solid #e2e8f0; } .dark .ftr { border-top-color: rgba(255,255,255,.08); } .ftr a { color: #E8531F; text-decoration: none; font-weight: 500; } .ftr a:hover { text-decoration: underline; } /* ── Responsive ────────────────────────────────────────────── */ @media (max-width: 768px) { .hdr h1 { font-size: 1.8em; } .hdr { padding: 24px 16px 22px; } .bdg { font-size: .72em; padding: 4px 10px; } .chip-btn { font-size: .72em !important; padding: 4px 10px !important; } } """ # ═══════════════════════════════════════════════════════════════════════ # UI # ═══════════════════════════════════════════════════════════════════════ with gr.Blocks(css=css, theme=theme, title="🔥 FireRed Image Edit") as demo: with gr.Column(elem_id="col-container"): # ── Header ────────────────────────────────────────────────── gr.HTML("""

🔥 FireRed Image Edit

AI-powered image editing with blazing-fast 4-step inference
Powered by FireRed-Image-Edit-1.1 & Rapid-AIO-V23

⚡ 4-Step Fast 🎨 Style Transfer 📐 Auto Resize 🖼️ Multi-Image 🔧 BF16 Precision
""") # ── Main two-column layout ───────────────────────────────── with gr.Row(equal_height=False): # ─── Left: inputs ─────────────────────────────────────── with gr.Column(scale=1): gr.HTML('

📤  Upload Image(s)

') images = gr.Gallery( label="Upload Images", type="filepath", columns=2, rows=1, height=280, allow_preview=True, object_fit="contain", ) gr.HTML('

✏️  Describe Your Edit

') prompt = gr.Textbox( show_label=False, max_lines=3, placeholder=( "e.g. 'Transform into a Studio Ghibli anime scene " "with warm golden-hour lighting'" ), ) # Suggestion chips gr.HTML( '

' "💡 Quick suggestions — click to fill prompt:

" ) chip_data_1, chip_data_2 = [], [] with gr.Row(elem_classes="chip-row"): for t in SUGGESTIONS[:4]: b = gr.Button(t, size="sm", variant="secondary", elem_classes="chip-btn") chip_data_1.append((b, t)) with gr.Row(elem_classes="chip-row"): for t in SUGGESTIONS[4:]: b = gr.Button(t, size="sm", variant="secondary", elem_classes="chip-btn") chip_data_2.append((b, t)) with gr.Row(): run_button = gr.Button( "🎨 Generate Edit", variant="primary", elem_id="gen-btn", size="lg", scale=3, ) clear_button = gr.Button( "🗑️ Clear", variant="secondary", elem_id="clear-btn", size="lg", scale=1, ) # ─── Right: output ────────────────────────────────────── with gr.Column(scale=1): gr.HTML('

🖼️  Result

') output_image = gr.Image( show_label=False, interactive=False, format="png", height=420, elem_id="output-img", ) info_box = gr.Markdown( value="*Generate an edit to see details here.*", elem_id="info-box", ) # ── Advanced settings ─────────────────────────────────────── with gr.Accordion("⚙️ Advanced Settings", open=False): with gr.Row(): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, scale=3, ) randomize_seed = gr.Checkbox( label="🎲 Randomize seed", value=True, scale=1, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0, info="Higher → stronger prompt adherence", ) steps = gr.Slider( label="Inference Steps", minimum=1, maximum=50, step=1, value=4, info="More steps → higher quality (slower)", ) negative_prompt = gr.Textbox( label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT, max_lines=3, info="Describe what to avoid in the output", ) # ── Tips ──────────────────────────────────────────────────── gr.HTML("""

💡 Tips for Best Results

""") # ── Footer ────────────────────────────────────────────────── gr.HTML("""
Model  FireRed-Image-Edit-1.1  ·  Accelerated  Rapid-AIO-V19
""") # ═══════════════════════════════════════════════════════════════ # EVENT WIRING # ═══════════════════════════════════════════════════════════════ # Suggestion chips → fill prompt for btn, text in chip_data_1 + chip_data_2: btn.click(fn=lambda t=text: t, inputs=[], outputs=[prompt]) # Clear button clear_button.click( fn=lambda: (None, "", None, "*Generate an edit to see details here.*"), inputs=[], outputs=[images, prompt, output_image, info_box], ) # Generate — with a public api_name so the endpoint is discoverable run_button.click( fn=infer, inputs=[ images, prompt, negative_prompt, seed, randomize_seed, guidance_scale, steps, ], outputs=[output_image, seed], api_name="edit", ).then( fn=format_info, inputs=[seed, images], outputs=[info_box], ) # ═══════════════════════════════════════════════════════════════════════ # LAUNCH # ═══════════════════════════════════════════════════════════════════════ if __name__ == "__main__": demo.queue( max_size=30, default_concurrency_limit=2, # allow 2 concurrent GPU jobs ).launch( share=True, # ← public shareable link )