FireRed-Image-Edit-1.1-Fast

Running on Zero

App Files Files Community

FireRed-Image-Edit-1.1-Fast / app.py

primerz

Update app.py

c98def1 verified 16 days ago

raw

history blame contribute delete

29.5 kB

	import os
	import gc
	import gradio as gr
	import numpy as np
	import spaces
	import torch
	import random
	from PIL import Image
	from typing import Iterable
	from gradio.themes import Soft
	from gradio.themes.utils import colors, fonts, sizes

	# ═══════════════════════════════════════════════════════════════════════
	# THEME
	# ═══════════════════════════════════════════════════════════════════════

	colors.fire_red = colors.Color(
	name="fire_red",
	c50="#FFF5F0",
	c100="#FFE8DB",
	c200="#FFD0B5",
	c300="#FFB088",
	c400="#FF8C5A",
	c500="#FF6B35",
	c600="#E8531F",
	c700="#CC4317",
	c800="#A63812",
	c900="#80300F",
	c950="#5C220A",
	)


	class FireRedTheme(Soft):
	def __init__(
	self,
	*,
	primary_hue: colors.Color \| str = colors.gray,
	secondary_hue: colors.Color \| str = colors.fire_red,
	neutral_hue: colors.Color \| str = colors.slate,
	text_size: sizes.Size \| str = sizes.text_md,
	font: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	fonts.GoogleFont("Inter"),
	"system-ui",
	"sans-serif",
	),
	font_mono: fonts.Font \| str \| Iterable[fonts.Font \| str] = (
	fonts.GoogleFont("JetBrains Mono"),
	"ui-monospace",
	"monospace",
	),
	):
	super().__init__(
	primary_hue=primary_hue,
	secondary_hue=secondary_hue,
	neutral_hue=neutral_hue,
	text_size=text_size,
	font=font,
	font_mono=font_mono,
	)
	super().set(
	body_background_fill="#f0f2f6",
	body_background_fill_dark="*neutral_950",
	background_fill_primary="white",
	background_fill_primary_dark="*neutral_900",
	block_background_fill="white",
	block_background_fill_dark="*neutral_800",
	block_border_width="1px",
	block_border_color="*neutral_200",
	block_border_color_dark="*neutral_700",
	block_shadow="0 1px 4px rgba(0,0,0,0.05)",
	block_shadow_dark="0 1px 4px rgba(0,0,0,0.25)",
	block_title_text_weight="600",
	block_label_background_fill="*neutral_50",
	block_label_background_fill_dark="*neutral_800",
	button_primary_text_color="white",
	button_primary_text_color_hover="white",
	button_primary_background_fill="linear-gradient(135deg, secondary_500, secondary_600)",
	button_primary_background_fill_hover="linear-gradient(135deg, secondary_600, secondary_700)",
	button_primary_background_fill_dark="linear-gradient(135deg, secondary_500, secondary_600)",
	button_primary_background_fill_hover_dark="linear-gradient(135deg, secondary_600, secondary_700)",
	button_primary_shadow="0 4px 14px rgba(232, 83, 31, 0.25)",
	button_secondary_text_color="*secondary_700",
	button_secondary_text_color_dark="*secondary_300",
	button_secondary_background_fill="*secondary_50",
	button_secondary_background_fill_hover="*secondary_100",
	button_secondary_background_fill_dark="rgba(255, 107, 53, 0.1)",
	button_secondary_background_fill_hover_dark="rgba(255, 107, 53, 0.2)",
	button_large_padding="12px 24px",
	slider_color="*secondary_500",
	slider_color_dark="*secondary_500",
	input_border_color_focus="*secondary_400",
	input_border_color_focus_dark="*secondary_500",
	color_accent_soft="*secondary_50",
	color_accent_soft_dark="rgba(255, 107, 53, 0.15)",
	)


	theme = FireRedTheme()

	# ═══════════════════════════════════════════════════════════════════════
	# GLOBAL CUDA OPTIMIZATIONS
	# ═══════════════════════════════════════════════════════════════════════

	# Enable cuDNN autotuner — finds the fastest convolution algorithms for
	# the hardware and input sizes after a short warm-up.
	torch.backends.cudnn.benchmark = True

	# Allow TF32 on Ampere+ GPUs for ~3× faster matmuls with negligible
	# precision loss (already bf16 pipeline, so this is free perf).
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.allow_tf32 = True
	torch.set_float32_matmul_precision("high")

	# ═══════════════════════════════════════════════════════════════════════
	# MODEL
	# ═══════════════════════════════════════════════════════════════════════

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
	print("torch.__version__ =", torch.__version__)
	print("device =", device)

	from diffusers import FlowMatchEulerDiscreteScheduler
	from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
	from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
	from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3

	dtype = torch.bfloat16

	# Load transformer separately so we can optimise it before plugging in
	transformer = QwenImageTransformer2DModel.from_pretrained(
	"prithivMLmods/Qwen-Image-Edit-Rapid-AIO-V23",
	torch_dtype=dtype,
	device_map="cuda",
	)

	# Attempt torch.compile for a fused-kernel speed-up on the denoising
	# backbone. Falls back gracefully if the environment doesn't support it
	# (older driver / torch version / dynamic-shape issues).
	try:
	transformer = torch.compile(transformer, mode="reduce-overhead")
	print("torch.compile applied to transformer (reduce-overhead).")
	except Exception as e:
	print(f"torch.compile skipped: {e}")

	pipe = QwenImageEditPlusPipeline.from_pretrained(
	"FireRedTeam/FireRed-Image-Edit-1.1",
	transformer=transformer,
	torch_dtype=dtype,
	).to(device)

	# Flash Attention 3 processor — fastest path when available
	try:
	pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
	print("Flash Attention 3 Processor set successfully.")
	except Exception as e:
	print(f"Warning: Could not set FA3 processor: {e}")

	# VAE optimisations — process large images in tiles / slices so we
	# never OOM on the decode step, and still stay fast for normal sizes.
	try:
	pipe.vae.enable_tiling()
	print("VAE tiling enabled.")
	except Exception:
	pass

	try:
	pipe.vae.enable_slicing()
	print("VAE slicing enabled.")
	except Exception:
	pass

	# ── Warmup pass ─────────────────────────────────────────────────────
	# The first inference is always slower (CUDA context init, cuDNN
	# autotuner, torch.compile tracing). Run a tiny dummy forward so that
	# cost is paid at startup, not on the first user request.
	print("Running warmup inference …")
	try:
	_warmup_img = Image.new("RGB", (64, 64), color=(128, 128, 128))
	_warmup_gen = torch.Generator(device=device).manual_seed(0)
	with torch.inference_mode():
	pipe(
	image=[_warmup_img],
	prompt="warmup",
	negative_prompt="",
	height=64,
	width=64,
	num_inference_steps=1,
	generator=_warmup_gen,
	true_cfg_scale=1.0,
	)
	del _warmup_img, _warmup_gen
	gc.collect()
	torch.cuda.empty_cache()
	print("Warmup complete.")
	except Exception as e:
	print(f"Warmup skipped: {e}")

	MAX_SEED = np.iinfo(np.int32).max

	DEFAULT_NEGATIVE_PROMPT = (
	"worst quality, low quality, bad anatomy, bad hands, text, error, "
	"missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, "
	"signature, watermark, username, blurry"
	)

	# ═══════════════════════════════════════════════════════════════════════
	# HELPERS
	# ═══════════════════════════════════════════════════════════════════════

	def update_dimensions_on_upload(image):
	if image is None:
	return 1024, 1024
	w, h = image.size
	if w > h:
	nw, nh = 1024, int(1024 * h / w)
	else:
	nh, nw = 1024, int(1024 * w / h)
	return (nw // 8) * 8, (nh // 8) * 8


	def format_seed(seed_val):
	return f"{int(seed_val)}"


	def format_info(seed_val, images):
	if images:
	try:
	first = images[0]
	path = first[0] if isinstance(first, (tuple, list)) else first
	if isinstance(path, str):
	im = Image.open(path)
	elif isinstance(path, Image.Image):
	im = path
	else:
	im = Image.open(path.name)
	ow, oh = im.size
	nw, nh = update_dimensions_on_upload(im)
	return (
	f"Seed: `{int(seed_val)}`\n\n"
	f"Original: {ow}×{oh} → Output: {nw}×{nh}"
	)
	except Exception:
	pass
	return f"Seed: `{int(seed_val)}`"


	# ═══════════════════════════════════════════════════════════════════════
	# INFERENCE
	# ═══════════════════════════════════════════════════════════════════════

	@spaces.GPU
	def infer(
	images, prompt, negative_prompt,
	seed, randomize_seed, guidance_scale, steps,
	progress=gr.Progress(track_tqdm=True),
	):
	# ── Input validation (cheap, do first) ──────────────────────────
	if not images:
	raise gr.Error("⚠️ Please upload at least one image.")
	if not prompt or not prompt.strip():
	raise gr.Error("⚠️ Please enter an edit prompt.")

	pil_images = []
	for item in images:
	try:
	path = item[0] if isinstance(item, (tuple, list)) else item
	if isinstance(path, str):
	pil_images.append(Image.open(path).convert("RGB"))
	elif isinstance(path, Image.Image):
	pil_images.append(path.convert("RGB"))
	else:
	pil_images.append(Image.open(path.name).convert("RGB"))
	except Exception as e:
	print(f"Skipping invalid image: {e}")

	if not pil_images:
	raise gr.Error("⚠️ Could not process uploaded images.")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	generator = torch.Generator(device=device).manual_seed(seed)
	width, height = update_dimensions_on_upload(pil_images[0])

	try:
	# torch.inference_mode is strictly faster than torch.no_grad —
	# it also disables view-tracking and version-counter bumps.
	with torch.inference_mode():
	result = pipe(
	image=pil_images,
	prompt=prompt,
	negative_prompt=negative_prompt,
	height=height,
	width=width,
	num_inference_steps=steps,
	generator=generator,
	true_cfg_scale=guidance_scale,
	).images[0]
	return result, seed
	finally:
	# GC after inference to reclaim any temporaries the pipeline
	# allocated. Avoid gc.collect() + empty_cache() before
	# inference — that stalls the CUDA stream for nothing.
	gc.collect()
	torch.cuda.empty_cache()


	@spaces.GPU
	def infer_example(images, prompt):
	if not images:
	return None, 0
	images_list = [images] if isinstance(images, str) else images
	return infer(images_list, prompt, DEFAULT_NEGATIVE_PROMPT, 0, True, 1.0, 4)


	# ═══════════════════════════════════════════════════════════════════════
	# PROMPT SUGGESTIONS
	# ═══════════════════════════════════════════════════════════════════════

	SUGGESTIONS = [
	"Transform into anime style",
	"Convert to oil painting",
	"Add dramatic sunset lighting",
	"Make it a pencil sketch",
	"Apply cyberpunk neon aesthetic",
	"Add snow and winter vibes",
	"Turn into watercolor art",
	"Make it look vintage 1970s",
	]

	# ═══════════════════════════════════════════════════════════════════════
	# CSS
	# ═══════════════════════════════════════════════════════════════════════

	css = """
	/* ── Container ─────────────────────────────────────────────── */
	#col-container {
	margin: 0 auto;
	max-width: 1120px;
	}

	/* ── Header ────────────────────────────────────────────────── */
	.hdr {
	text-align: center;
	padding: 38px 28px 30px;
	background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
	border-radius: 20px;
	margin-bottom: 22px;
	border: 1px solid rgba(255,107,53,.15);
	box-shadow: 0 12px 44px rgba(0,0,0,.10);
	position: relative;
	overflow: hidden;
	}
	.hdr::before {
	content: "";
	position: absolute; inset: 0;
	background:
	radial-gradient(ellipse at 25% 50%, rgba(255,107,53,.07) 0%, transparent 60%),
	radial-gradient(ellipse at 80% 25%, rgba(255,140,90,.05) 0%, transparent 50%);
	pointer-events: none;
	}
	.hdr > * { position: relative; z-index: 1; }
	.hdr h1 {
	font-size: 2.6em; font-weight: 800;
	background: linear-gradient(135deg, #FF8C5A, #FF6B35, #FF4500);
	-webkit-background-clip: text; -webkit-text-fill-color: transparent;
	background-clip: text;
	margin: 0 0 8px; letter-spacing: -.02em; line-height: 1.15;
	}
	.hdr .sub {
	color: #94a3b8; font-size: 1.05em; margin: 0 0 16px; line-height: 1.55;
	}
	.hdr .sub a {
	color: #FF8C5A; text-decoration: none;
	border-bottom: 1px solid rgba(255,140,90,.3);
	transition: border-color .2s;
	}
	.hdr .sub a:hover { border-bottom-color: #FF8C5A; }
	.badges { display: flex; justify-content: center; gap: 8px; flex-wrap: wrap; }
	.bdg {
	background: rgba(255,107,53,.12); color: #FFB088;
	padding: 5px 14px; border-radius: 100px;
	font-size: .82em; font-weight: 500;
	border: 1px solid rgba(255,107,53,.18);
	}

	/* ── Section Label ─────────────────────────────────────────── */
	.stl {
	font-size: .92em; font-weight: 700; color: #475569;
	margin: 0 0 6px; display: flex; align-items: center; gap: 6px;
	}
	.dark .stl { color: #cbd5e1; }

	/* ── Generate Button ───────────────────────────────────────── */
	#gen-btn {
	margin-top: 14px !important;
	font-size: 1.1em !important; font-weight: 700 !important;
	padding: 14px 28px !important; border-radius: 14px !important;
	letter-spacing: .3px;
	transition: all .25s cubic-bezier(.4,0,.2,1) !important;
	min-height: 52px !important;
	}
	#gen-btn:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 8px 28px rgba(232,83,31,.40) !important;
	}
	#gen-btn:active { transform: translateY(0) !important; }

	/* ── Clear Button ──────────────────────────────────────────── */
	#clear-btn {
	min-height: 52px !important;
	margin-top: 14px !important;
	border-radius: 14px !important;
	font-weight: 600 !important;
	}

	/* ── Prompt Chip Row ───────────────────────────────────────── */
	.chip-row { gap: 6px !important; margin-top: 2px !important; }
	.chip-btn {
	font-size: .78em !important; padding: 5px 13px !important;
	border-radius: 100px !important; min-width: 0 !important;
	font-weight: 500 !important; white-space: nowrap !important;
	transition: all .2s ease !important;
	}
	.chip-btn:hover { transform: translateY(-1px) !important; }

	/* ── Output Image ──────────────────────────────────────────── */
	#output-img { border-radius: 14px !important; overflow: hidden; }

	/* ── Info Box ──────────────────────────────────────────────── */
	#info-box {
	margin-top: 6px !important;
	border-radius: 12px !important;
	}
	#info-box .prose {
	font-family: 'JetBrains Mono', monospace;
	font-size: .88em;
	}

	/* ── Tips ──────────────────────────────────────────────────── */
	.tips {
	background: linear-gradient(135deg, #FFF5F0, #FFE8DB);
	border: 1px solid #FFD0B5; border-radius: 14px;
	padding: 18px 24px; margin-top: 14px;
	}
	.tips h4 { margin: 0 0 10px; font-size: .95em; color: #A63812; }
	.tips ul {
	margin: 0; padding: 0 0 0 20px;
	color: #80300F; font-size: .85em; line-height: 1.75;
	}
	.tips li { margin-bottom: 2px; }
	.tips li::marker { color: #FF6B35; }
	.tips strong { color: #A63812; }

	.dark .tips {
	background: linear-gradient(135deg, #2a1a10, #201510);
	border-color: rgba(255,107,53,.2);
	}
	.dark .tips h4 { color: #FFB088; }
	.dark .tips ul { color: #FFD0B5; }
	.dark .tips strong { color: #FFB088; }

	/* ── Footer ────────────────────────────────────────────────── */
	.ftr {
	text-align: center; padding: 18px; margin-top: 20px;
	color: #94a3b8; font-size: .82em;
	border-top: 1px solid #e2e8f0;
	}
	.dark .ftr { border-top-color: rgba(255,255,255,.08); }
	.ftr a { color: #E8531F; text-decoration: none; font-weight: 500; }
	.ftr a:hover { text-decoration: underline; }

	/* ── Responsive ────────────────────────────────────────────── */
	@media (max-width: 768px) {
	.hdr h1 { font-size: 1.8em; }
	.hdr { padding: 24px 16px 22px; }
	.bdg { font-size: .72em; padding: 4px 10px; }
	.chip-btn { font-size: .72em !important; padding: 4px 10px !important; }
	}
	"""

	# ═══════════════════════════════════════════════════════════════════════
	# UI
	# ═══════════════════════════════════════════════════════════════════════

	with gr.Blocks(css=css, theme=theme, title="🔥 FireRed Image Edit") as demo:
	with gr.Column(elem_id="col-container"):

	# ── Header ──────────────────────────────────────────────────
	gr.HTML("""
	<div class="hdr">
	<h1>🔥 FireRed Image Edit</h1>
	<p class="sub">
	AI-powered image editing with blazing-fast <strong>4-step inference</strong><br>
	Powered by
	<a href="https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.1"
	target="_blank">FireRed-Image-Edit-1.1</a>
	&
	<a href="https://huggingface.co/prithivMLmods/Qwen-Image-Edit-Rapid-AIO-V23"
	target="_blank">Rapid-AIO-V23</a>
	</p>
	<div class="badges">
	<span class="bdg">⚡ 4-Step Fast</span>
	<span class="bdg">🎨 Style Transfer</span>
	<span class="bdg">📐 Auto Resize</span>
	<span class="bdg">🖼️ Multi-Image</span>
	<span class="bdg">🔧 BF16 Precision</span>
	</div>
	</div>
	""")

	# ── Main two-column layout ─────────────────────────────────
	with gr.Row(equal_height=False):

	# ─── Left: inputs ───────────────────────────────────────
	with gr.Column(scale=1):
	gr.HTML('<p class="stl">📤  Upload Image(s)</p>')

	images = gr.Gallery(
	label="Upload Images",
	type="filepath",
	columns=2,
	rows=1,
	height=280,
	allow_preview=True,
	object_fit="contain",
	)

	gr.HTML('<p class="stl" style="margin-top:16px">✏️  Describe Your Edit</p>')

	prompt = gr.Textbox(
	show_label=False,
	max_lines=3,
	placeholder=(
	"e.g. 'Transform into a Studio Ghibli anime scene "
	"with warm golden-hour lighting'"
	),
	)

	# Suggestion chips
	gr.HTML(
	'<p style="font-size:.78em;color:#94a3b8;margin:10px 0 4px;">'
	"💡 Quick suggestions — click to fill prompt:</p>"
	)

	chip_data_1, chip_data_2 = [], []
	with gr.Row(elem_classes="chip-row"):
	for t in SUGGESTIONS[:4]:
	b = gr.Button(t, size="sm", variant="secondary",
	elem_classes="chip-btn")
	chip_data_1.append((b, t))

	with gr.Row(elem_classes="chip-row"):
	for t in SUGGESTIONS[4:]:
	b = gr.Button(t, size="sm", variant="secondary",
	elem_classes="chip-btn")
	chip_data_2.append((b, t))

	with gr.Row():
	run_button = gr.Button(
	"🎨 Generate Edit",
	variant="primary", elem_id="gen-btn", size="lg", scale=3,
	)
	clear_button = gr.Button(
	"🗑️ Clear",
	variant="secondary", elem_id="clear-btn", size="lg", scale=1,
	)

	# ─── Right: output ──────────────────────────────────────
	with gr.Column(scale=1):
	gr.HTML('<p class="stl">🖼️  Result</p>')

	output_image = gr.Image(
	show_label=False,
	interactive=False,
	format="png",
	height=420,
	elem_id="output-img",
	)

	info_box = gr.Markdown(
	value="Generate an edit to see details here.",
	elem_id="info-box",
	)

	# ── Advanced settings ───────────────────────────────────────
	with gr.Accordion("⚙️ Advanced Settings", open=False):
	with gr.Row():
	seed = gr.Slider(
	label="Seed", minimum=0, maximum=MAX_SEED, step=1,
	value=0, scale=3,
	)
	randomize_seed = gr.Checkbox(
	label="🎲 Randomize seed", value=True, scale=1,
	)

	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1.0, maximum=10.0, step=0.1, value=1.0,
	info="Higher → stronger prompt adherence",
	)
	steps = gr.Slider(
	label="Inference Steps",
	minimum=1, maximum=50, step=1, value=4,
	info="More steps → higher quality (slower)",
	)

	negative_prompt = gr.Textbox(
	label="Negative Prompt",
	value=DEFAULT_NEGATIVE_PROMPT,
	max_lines=3,
	info="Describe what to avoid in the output",
	)

	# ── Tips ────────────────────────────────────────────────────
	gr.HTML("""
	<div class="tips">
	<h4>💡 Tips for Best Results</h4>
	<ul>
	<li><strong>Be specific</strong> — clearly describe
	the change you want</li>
	<li><strong>Style keywords</strong> — "anime", "oil painting",
	"watercolor", "pixel art", "3D render"</li>
	<li><strong>Lighting</strong> — "golden hour", "dramatic shadows",
	"soft diffused light", "neon glow"</li>
	<li><strong>Higher quality</strong> — increase steps to 8-12
	for finer details (takes longer)</li>
	<li><strong>Multiple images</strong> — upload extra reference
	images for richer context</li>
	</ul>
	</div>
	""")

	# ── Footer ──────────────────────────────────────────────────
	gr.HTML("""
	<div class="ftr">
	Model
	<a href="https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.1"
	target="_blank">FireRed-Image-Edit-1.1</a>
	·  Accelerated
	<a href="https://huggingface.co/prithivMLmods/Qwen-Image-Edit-Rapid-AIO-V19"
	target="_blank">Rapid-AIO-V19</a>
	</div>
	""")

	# ═══════════════════════════════════════════════════════════════
	# EVENT WIRING
	# ═══════════════════════════════════════════════════════════════

	# Suggestion chips → fill prompt
	for btn, text in chip_data_1 + chip_data_2:
	btn.click(fn=lambda t=text: t, inputs=[], outputs=[prompt])

	# Clear button
	clear_button.click(
	fn=lambda: (None, "", None, "Generate an edit to see details here."),
	inputs=[],
	outputs=[images, prompt, output_image, info_box],
	)

	# Generate — with a public api_name so the endpoint is discoverable
	run_button.click(
	fn=infer,
	inputs=[
	images, prompt, negative_prompt,
	seed, randomize_seed, guidance_scale, steps,
	],
	outputs=[output_image, seed],
	api_name="edit",
	).then(
	fn=format_info,
	inputs=[seed, images],
	outputs=[info_box],
	)

	# ═══════════════════════════════════════════════════════════════════════
	# LAUNCH
	# ═══════════════════════════════════════════════════════════════════════

	if __name__ == "__main__":
	demo.queue(
	max_size=30,
	default_concurrency_limit=2, # allow 2 concurrent GPU jobs
	).launch(
	share=True, # ← public shareable link
	)