Qwen-Image-Edit-Angles / app_2511.py
dinhvanvu94's picture
Create app_2511.py
6a59ab9 verified
# app_2511.py
import random
from typing import Optional, Tuple
import gradio as gr
import numpy as np
import torch
from PIL import Image
from diffusers import QwenImageEditPlusPipeline
# -----------------------------------------------------------------------------
# Optional: Hugging Face Spaces helpers (chỉ có trên Spaces runtime)
# -----------------------------------------------------------------------------
try:
import spaces # type: ignore
except Exception:
spaces = None
# -----------------------------------------------------------------------------
# Global config
# -----------------------------------------------------------------------------
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
MAX_SEED = np.iinfo(np.int32).max
_pipe: Optional[QwenImageEditPlusPipeline] = None
def get_pipe() -> QwenImageEditPlusPipeline:
"""
Load Qwen-Image-Edit-2511 + fal Multiple Angles LoRA as a global singleton.
"""
global _pipe
if _pipe is not None:
return _pipe
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2511",
torch_dtype=dtype,
)
if device == "cuda":
pipe = pipe.to("cuda")
# Load LoRA: fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA
pipe.load_lora_weights(
"fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA",
weight_name="qwen-image-edit-2511-multiple-angles-lora.safetensors",
adapter_name="angles",
)
pipe.set_adapters(["angles"], adapter_weights=[1.0])
# Fuse LoRA for speed. Strength recommended around 0.8–1.0, default 0.9.
pipe.fuse_lora(adapter_names=["angles"], lora_scale=0.9)
pipe.unload_lora_weights()
# Optional AOTI (Spaces only) - keep same pattern; adjust repo if you have a 2511-specific AOTI artifact.
if spaces is not None and hasattr(spaces, "aoti_blocks_load"):
try:
spaces.aoti_blocks_load(pipe.transformer, "zerogpu-aoti/Qwen-Image", variant="fa3")
print("[INFO] spaces.aoti_blocks_load enabled")
except Exception as e:
print(f"[WARN] spaces.aoti_blocks_load failed: {e}")
_pipe = pipe
return _pipe
# -----------------------------------------------------------------------------
# Angle prompt (fal schema)
# -----------------------------------------------------------------------------
AZIMUTH_CHOICES = [
"front view",
"front-right quarter view",
"right side view",
"back-right quarter view",
"back view",
"back-left quarter view",
"left side view",
"front-left quarter view",
]
ELEVATION_CHOICES = [
"low-angle shot",
"eye-level shot",
"elevated shot",
"high-angle shot",
]
DISTANCE_CHOICES = [
"close-up",
"medium shot",
"wide shot",
]
def build_fal_angles_prompt(azimuth: str, elevation: str, distance: str) -> str:
# fal requires: <sks> [azimuth] [elevation] [distance]
return f"<sks> {azimuth} {elevation} {distance}"
# -----------------------------------------------------------------------------
# Inference
# -----------------------------------------------------------------------------
def infer_angles_edit(
image: Optional[Image.Image],
azimuth: str,
elevation: str,
distance: str,
seed: int,
randomize_seed: bool,
true_guidance_scale: float,
num_inference_steps: int,
height: int,
width: int,
) -> Tuple[Image.Image, int, str]:
if image is None:
raise gr.Error("Please upload an image.")
if randomize_seed:
seed = random.randint(0, MAX_SEED)
prompt = build_fal_angles_prompt(azimuth, elevation, distance)
print("Prompt:", prompt)
pipe = get_pipe()
generator = torch.Generator(device=device).manual_seed(int(seed))
out = pipe(
image=[image.convert("RGB")],
prompt=prompt,
height=height if height and height > 0 else None,
width=width if width and width > 0 else None,
num_inference_steps=int(num_inference_steps),
true_cfg_scale=float(true_guidance_scale),
guidance_scale=1.0,
negative_prompt=" ",
generator=generator,
num_images_per_prompt=1,
).images[0]
return out, seed, prompt
# -----------------------------------------------------------------------------
# UI (Gradio 6.3.0 compatible)
# -----------------------------------------------------------------------------
CAMERA_3D_HTML_TEMPLATE = """
<div style="width:100%;height:400px;background:#1a1a1a;border-radius:12px;
display:flex;align-items:center;justify-content:center;color:#aaa;">
<div style="text-align:center;">
<div style="font-size:18px;color:#00ff88;font-weight:700;">3D Camera Preview</div>
<div style="margin-top:8px;">(Gradio 6) JS ↔ Python sync removed</div>
<div style="margin-top:6px;">Use dropdowns to control camera</div>
</div>
</div>
"""
css = """
#camera-3d-control { min-height: 400px; }
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("## 🎬 Qwen Image Edit — 2511 + Multiple Angles LoRA (UI only)")
with gr.Row():
with gr.Column(scale=1):
image = gr.Image(label="Input Image", type="pil", height=280)
with gr.Tab("🎮 3D Camera Preview"):
_ = gr.HTML(value=CAMERA_3D_HTML_TEMPLATE, elem_id="camera-3d-control")
with gr.Tab("🎚️ Angles (fal schema)"):
azimuth = gr.Dropdown(
choices=AZIMUTH_CHOICES,
value="front view",
label="Azimuth (8 directions)",
)
elevation = gr.Dropdown(
choices=ELEVATION_CHOICES,
value="eye-level shot",
label="Elevation (4 levels)",
)
distance = gr.Dropdown(
choices=DISTANCE_CHOICES,
value="medium shot",
label="Distance (3 levels)",
)
run_btn = gr.Button("🚀 Generate", variant="primary")
with gr.Column(scale=1):
result = gr.Image(label="Output Image", height=350)
prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)
with gr.Accordion("⚙️ Advanced", open=False):
seed = gr.Slider(0, MAX_SEED, step=1, label="Seed", value=0)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
# Thường 2511 edit chạy ổn với steps 8–16, true_cfg_scale khoảng 3–6 (tùy ảnh).
true_guidance_scale = gr.Slider(1.0, 10.0, step=0.1, label="True CFG Scale", value=4.0)
num_inference_steps = gr.Slider(1, 40, step=1, label="Steps", value=12)
height = gr.Slider(256, 2048, step=8, value=1024, label="Height")
width = gr.Slider(256, 2048, step=8, value=1024, label="Width")
# (Optional) Quick tips
gr.Markdown(
"- Prompt format used: `<sks> azimuth elevation distance`\n"
"- Nếu ảnh bị méo: giảm **True CFG Scale** hoặc tăng **Steps** nhẹ.\n"
"- Nếu đổi góc chưa mạnh: tăng **True CFG Scale** một chút (không cần tăng LoRA strength)."
)
run_btn.click(
fn=infer_angles_edit,
inputs=[
image,
azimuth,
elevation,
distance,
seed,
randomize_seed,
true_guidance_scale,
num_inference_steps,
height,
width,
],
outputs=[result, seed, prompt_preview],
# chưa cần api -> không set api_name
)
if __name__ == "__main__":
demo.queue(max_size=16)
demo.launch(
server_name="127.0.0.1",
server_port=7861,
share=False,
debug=True,
show_error=True,
prevent_thread_lock=True,
)
# giữ process sống nếu chạy bằng python app_2511.py
import time as _time
while True:
_time.sleep(3600)