Qwen-Image-Edit-Angles

Running on Zero

File size: 8,382 Bytes

import gradio as gr
import numpy as np
import random
import torch
import spaces

from PIL import Image
from diffusers import FlowMatchEulerDiscreteScheduler
from optimization import optimize_pipeline_
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3

import math
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

import os
import base64
from io import BytesIO
import json
import time  # Added for history update delay

from gradio_client import Client, handle_file
import tempfile
from PIL import Image
import os
import gradio as gr


# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = QwenImageEditPlusPipeline.from_pretrained("Qwen/Qwen-Image-Edit-2509", 
                                                transformer= QwenImageTransformer2DModel.from_pretrained("linoyts/Qwen-Image-Edit-Rapid-AIO", 
                                                                                                         subfolder='transformer',
                                                                                                         torch_dtype=dtype,
                                                                                                         device_map='cuda'),torch_dtype=dtype).to(device)

pipe.load_lora_weights(
        "dx8152/Qwen-Edit-2509-Multiple-angles", 
        weight_name="镜头转换.safetensors", adapter_name="angles"
    )

pipe.load_lora_weights(
        "lovis93/next-scene-qwen-image-lora-2509", 
        weight_name="next-scene_lora-v2-3000.safetensors", adapter_name="next-scene"
    )
pipe.set_adapters(["angles","next-scene"], adapter_weights=[1., 1.])
pipe.fuse_lora(adapter_names=["angles"], lora_scale=1.)
pipe.fuse_lora(adapter_names=["next-scene"], lora_scale=1.)
pipe.unload_lora_weights()


# # Apply the same optimizations from the first version
pipe.transformer.__class__ = QwenImageTransformer2DModel
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())

# # --- Ahead-of-time compilation ---
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")

# --- UI Constants and Helpers ---
MAX_SEED = np.iinfo(np.int32).max

# --- Build natural language prompt from sliders ---
def build_camera_prompt(rotate_deg, move_lr, move_forward, topdown, wideangle, closeup):
    prompt_parts = []

    # Rotation
    if rotate_deg != 0:
        direction = "left" if rotate_deg > 0 else "right"
        prompt_parts.append(f"Rotate the camera {abs(rotate_deg)} degrees to the {direction}.")

    # Movement
    if move_lr > 0:
        prompt_parts.append("Move the camera left.")
    elif move_lr < 0:
        prompt_parts.append("Move the camera right.")

    if move_forward > 0:
        prompt_parts.append("Move the camera forward.")
    elif move_forward < 0:
        prompt_parts.append("Move the camera backward.")

    # Lens / perspective options
    if topdown:
        prompt_parts.append("Turn the camera to a top-down view.")
    if wideangle:
        prompt_parts.append("Turn the camera to a wide-angle lens.")
    if closeup:
        prompt_parts.append("Turn the camera to a close-up lens.")

    final_prompt = " ".join(prompt_parts).strip()
    return final_prompt if final_prompt else "No camera movement."


# --- Main inference function (unchanged backend) ---
@spaces.GPU(duration=300)
def infer_camera_edit(
    image,
    prev_output,
    rotate_deg,
    move_lr,
    move_forward,
    topdown,
    wideangle,
    closeup,
    seed,
    randomize_seed,
    true_guidance_scale,
    num_inference_steps,
    height,
    width,
):
    prompt = build_camera_prompt(rotate_deg, move_lr, move_forward, topdown, wideangle, closeup)
    print(f"Generated Prompt: {prompt}")

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)

    # Use previous output if no new image uploaded
    pil_images = []
    if image is not None:
        if isinstance(image, Image.Image):
            pil_images.append(image.convert("RGB"))
        elif hasattr(image, "name"):
            pil_images.append(Image.open(image.name).convert("RGB"))
    elif prev_output is not None:
        pil_images.append(prev_output.convert("RGB"))

    if len(pil_images) == 0:
        raise gr.Error("Please upload an image first.")

    result = pipe(
        image=pil_images,
        prompt=prompt,
        height=height if height != 0 else None,
        width=width if width != 0 else None,
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=1,
    ).images[0]

    return result, seed, prompt


# --- Gradio UI ---
css = '''
#col-container { max-width: 800px; margin: 0 auto; }
'''

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("## 🎬 Qwen Image Edit — Camera Angle Control")
        gr.Markdown("Edit the same image from multiple camera angles using Qwen Edit and the 'Multiple Angles' LoRA. Each edit applies to the latest output for fluid camera movement.")

        with gr.Row():
            with gr.Column():
                image = gr.Image(label="Input Image", type="pil", sources=["upload"])
                prev_output = gr.State(value=None)

                with gr.Tab("Camera Controls"):
                    rotate_deg = gr.Slider(
                        label="Rotate Left–Right (°)",
                       minimum=-90, maximum=90, step=45, value=0)
                    move_lr = gr.Slider(label="Move Right–Left", minimum=-10, maximum=10, step=1, value=0)
                    move_forward = gr.Slider(label="Move Forward/Backward", minimum=-10, maximum=10, step=1, value=0)
                    topdown = gr.Checkbox(label="Top-Down View", value=False)
                    wideangle = gr.Checkbox(label="Wide-Angle Lens", value=False)
                    closeup = gr.Checkbox(label="Close-Up Lens", value=False)

                with gr.Accordion("Advanced Settings", open=False):
                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                    true_guidance_scale = gr.Slider(label="True Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
                    num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=40, step=1, value=4)
                    height = gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024)
                    width = gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024)

                with gr.Row():
                    reset_btn = gr.Button("Reset")
                    run_btn = gr.Button("Generate", variant="primary")

            with gr.Column():
                result = gr.Image(label="Output Image")
                prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)
                gr.Markdown("_Each change applies a fresh camera instruction to the last output image._")

    # Define inputs & outputs
    inputs = [
        image, prev_output, rotate_deg, move_lr, move_forward,
        topdown, wideangle, closeup,
        seed, randomize_seed, true_guidance_scale, num_inference_steps, height, width
    ]
    outputs = [result, seed, prompt_preview]

    def reset_all():
        return [0, 0, 0, False, False, False]

    reset_btn.click(
        fn=reset_all,
        inputs=None,
        outputs=[rotate_deg, move_lr, move_forward, topdown, wideangle, closeup],
        queue=False
    )

    run_event = run_btn.click(
        fn=infer_camera_edit,
        inputs=inputs,
        outputs=outputs
    )

    # Live updates on control release
    for control in [rotate_deg, move_lr, move_forward, topdown, wideangle, closeup]:
        control.change(fn=infer_camera_edit, inputs=inputs, outputs=outputs, show_progress="minimal")

    # Save latest output as next input
    run_event.then(lambda img, *_: img, inputs=outputs, outputs=[prev_output])

demo.launch()