Qwen-Image-Edit-Angles

Runtime error

App Files Files Community

Qwen-Image-Edit-Angles / app_2511.py

dinhvanvu94

Create app_2511.py

6a59ab9 verified 4 months ago

raw

history blame contribute delete

8.1 kB

	# app_2511.py
	import random
	from typing import Optional, Tuple

	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image

	from diffusers import QwenImageEditPlusPipeline

	# -----------------------------------------------------------------------------
	# Optional: Hugging Face Spaces helpers (chỉ có trên Spaces runtime)
	# -----------------------------------------------------------------------------
	try:
	import spaces # type: ignore
	except Exception:
	spaces = None

	# -----------------------------------------------------------------------------
	# Global config
	# -----------------------------------------------------------------------------
	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"
	MAX_SEED = np.iinfo(np.int32).max

	_pipe: Optional[QwenImageEditPlusPipeline] = None


	def get_pipe() -> QwenImageEditPlusPipeline:
	"""
	Load Qwen-Image-Edit-2511 + fal Multiple Angles LoRA as a global singleton.
	"""
	global _pipe
	if _pipe is not None:
	return _pipe

	pipe = QwenImageEditPlusPipeline.from_pretrained(
	"Qwen/Qwen-Image-Edit-2511",
	torch_dtype=dtype,
	)

	if device == "cuda":
	pipe = pipe.to("cuda")

	# Load LoRA: fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA
	pipe.load_lora_weights(
	"fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA",
	weight_name="qwen-image-edit-2511-multiple-angles-lora.safetensors",
	adapter_name="angles",
	)
	pipe.set_adapters(["angles"], adapter_weights=[1.0])

	# Fuse LoRA for speed. Strength recommended around 0.8–1.0, default 0.9.
	pipe.fuse_lora(adapter_names=["angles"], lora_scale=0.9)
	pipe.unload_lora_weights()

	# Optional AOTI (Spaces only) - keep same pattern; adjust repo if you have a 2511-specific AOTI artifact.
	if spaces is not None and hasattr(spaces, "aoti_blocks_load"):
	try:
	spaces.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/Qwen-Image", variant="fa3")
	print("[INFO] spaces.aoti_blocks_load enabled")
	except Exception as e:
	print(f"[WARN] spaces.aoti_blocks_load failed: {e}")

	_pipe = pipe
	return _pipe


	# -----------------------------------------------------------------------------
	# Angle prompt (fal schema)
	# -----------------------------------------------------------------------------
	AZIMUTH_CHOICES = [
	"front view",
	"front-right quarter view",
	"right side view",
	"back-right quarter view",
	"back view",
	"back-left quarter view",
	"left side view",
	"front-left quarter view",
	]

	ELEVATION_CHOICES = [
	"low-angle shot",
	"eye-level shot",
	"elevated shot",
	"high-angle shot",
	]

	DISTANCE_CHOICES = [
	"close-up",
	"medium shot",
	"wide shot",
	]


	def build_fal_angles_prompt(azimuth: str, elevation: str, distance: str) -> str:
	# fal requires: <sks> [azimuth] [elevation] [distance]
	return f"<sks> {azimuth} {elevation} {distance}"


	# -----------------------------------------------------------------------------
	# Inference
	# -----------------------------------------------------------------------------
	def infer_angles_edit(
	image: Optional[Image.Image],
	azimuth: str,
	elevation: str,
	distance: str,
	seed: int,
	randomize_seed: bool,
	true_guidance_scale: float,
	num_inference_steps: int,
	height: int,
	width: int,
	) -> Tuple[Image.Image, int, str]:
	if image is None:
	raise gr.Error("Please upload an image.")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	prompt = build_fal_angles_prompt(azimuth, elevation, distance)
	print("Prompt:", prompt)

	pipe = get_pipe()
	generator = torch.Generator(device=device).manual_seed(int(seed))

	out = pipe(
	image=[image.convert("RGB")],
	prompt=prompt,
	height=height if height and height > 0 else None,
	width=width if width and width > 0 else None,
	num_inference_steps=int(num_inference_steps),
	true_cfg_scale=float(true_guidance_scale),
	guidance_scale=1.0,
	negative_prompt=" ",
	generator=generator,
	num_images_per_prompt=1,
	).images[0]

	return out, seed, prompt


	# -----------------------------------------------------------------------------
	# UI (Gradio 6.3.0 compatible)
	# -----------------------------------------------------------------------------
	CAMERA_3D_HTML_TEMPLATE = """
	<div style="width:100%;height:400px;background:#1a1a1a;border-radius:12px;
	display:flex;align-items:center;justify-content:center;color:#aaa;">
	<div style="text-align:center;">
	<div style="font-size:18px;color:#00ff88;font-weight:700;">3D Camera Preview</div>
	<div style="margin-top:8px;">(Gradio 6) JS ↔ Python sync removed</div>
	<div style="margin-top:6px;">Use dropdowns to control camera</div>
	</div>
	</div>
	"""

	css = """
	#camera-3d-control { min-height: 400px; }
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown("## 🎬 Qwen Image Edit — 2511 + Multiple Angles LoRA (UI only)")

	with gr.Row():
	with gr.Column(scale=1):
	image = gr.Image(label="Input Image", type="pil", height=280)

	with gr.Tab("🎮 3D Camera Preview"):
	_ = gr.HTML(value=CAMERA_3D_HTML_TEMPLATE, elem_id="camera-3d-control")

	with gr.Tab("🎚️ Angles (fal schema)"):
	azimuth = gr.Dropdown(
	choices=AZIMUTH_CHOICES,
	value="front view",
	label="Azimuth (8 directions)",
	)
	elevation = gr.Dropdown(
	choices=ELEVATION_CHOICES,
	value="eye-level shot",
	label="Elevation (4 levels)",
	)
	distance = gr.Dropdown(
	choices=DISTANCE_CHOICES,
	value="medium shot",
	label="Distance (3 levels)",
	)

	run_btn = gr.Button("🚀 Generate", variant="primary")

	with gr.Column(scale=1):
	result = gr.Image(label="Output Image", height=350)
	prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)

	with gr.Accordion("⚙️ Advanced", open=False):
	seed = gr.Slider(0, MAX_SEED, step=1, label="Seed", value=0)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)

	# Thường 2511 edit chạy ổn với steps 8–16, true_cfg_scale khoảng 3–6 (tùy ảnh).
	true_guidance_scale = gr.Slider(1.0, 10.0, step=0.1, label="True CFG Scale", value=4.0)
	num_inference_steps = gr.Slider(1, 40, step=1, label="Steps", value=12)

	height = gr.Slider(256, 2048, step=8, value=1024, label="Height")
	width = gr.Slider(256, 2048, step=8, value=1024, label="Width")

	# (Optional) Quick tips
	gr.Markdown(
	"- Prompt format used: `<sks> azimuth elevation distance`\n"
	"- Nếu ảnh bị méo: giảm True CFG Scale hoặc tăng Steps nhẹ.\n"
	"- Nếu đổi góc chưa mạnh: tăng True CFG Scale một chút (không cần tăng LoRA strength)."
	)

	run_btn.click(
	fn=infer_angles_edit,
	inputs=[
	image,
	azimuth,
	elevation,
	distance,
	seed,
	randomize_seed,
	true_guidance_scale,
	num_inference_steps,
	height,
	width,
	],
	outputs=[result, seed, prompt_preview],
	# chưa cần api -> không set api_name
	)

	if __name__ == "__main__":
	demo.queue(max_size=16)
	demo.launch(
	server_name="127.0.0.1",
	server_port=7861,
	share=False,
	debug=True,
	show_error=True,
	prevent_thread_lock=True,
	)

	# giữ process sống nếu chạy bằng python app_2511.py
	import time as _time
	while True:
	_time.sleep(3600)