Qwen-Image-to-LoRA

Running on Zero

File size: 7,684 Bytes

import gradio as gr
import numpy as np
import torch, random, json, spaces, time
from diffsynth.pipelines.qwen_image import (
    QwenImagePipeline, ModelConfig,
    QwenImageUnit_Image2LoRAEncode, QwenImageUnit_Image2LoRADecode
)
from safetensors.torch import save_file
import torch
from PIL import Image
# from utils import repo_utils, image_utils, prompt_utils


# repo_utils.clone_repo_if_not_exists("https://github.com/apple/ml-starflow.git", "app/models")
# repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")

DTYPE = torch.bfloat16
MAX_SEED = np.iinfo(np.int32).max

vram_config_disk_offload = {
    "offload_dtype": "disk",
    "offload_device": "disk",
    "onload_dtype": "disk",
    "onload_device": "disk",
    "preparing_dtype": torch.bfloat16,
    "preparing_device": "cuda",
    "computation_dtype": torch.bfloat16,
    "computation_device": "cuda",
}

# Load models
pipe = QwenImagePipeline.from_pretrained(
    torch_dtype=torch.bfloat16,
    device="cuda",
    model_configs=[
        ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="SigLIP2-G384/model.safetensors", **vram_config_disk_offload),
        ModelConfig(model_id="DiffSynth-Studio/General-Image-Encoders", origin_file_pattern="DINOv3-7B/model.safetensors", **vram_config_disk_offload),
        ModelConfig(model_id="DiffSynth-Studio/Qwen-Image-i2L", origin_file_pattern="Qwen-Image-i2L-Style.safetensors", **vram_config_disk_offload),
    ],
    processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
    vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
)



# pipe = ZImageControlPipeline(
#     vae=vae,
#     tokenizer=tokenizer,
#     text_encoder=text_encoder,
#     transformer=transformer,
#     scheduler=scheduler,
# )
# pipe.to("cuda", DTYPE)


# def prepare(prompt, is_polish_prompt):
#     if not is_polish_prompt: return prompt, False
#     polished_prompt = prompt_utils.polish_prompt(prompt)
#     return polished_prompt, True


@spaces.GPU
def inference(
    prompt,
    negative_prompt,
    seed=42,
    randomize_seed=True,
    guidance_scale=1.5,
    num_inference_steps=8,
    progress=gr.Progress(track_tqdm=True),
):
    timestamp = time.time()
    print(f"timestamp: {timestamp}")

    # Load images 
    images = [
        Image.open("examples/style/1/0.jpg"),
        Image.open("examples/style/1/1.jpg"),
        Image.open("examples/style/1/2.jpg"),
        Image.open("examples/style/1/3.jpg"),
        Image.open("examples/style/1/4.jpg"),
    ]


    # Model inference
    with torch.no_grad():
        embs = QwenImageUnit_Image2LoRAEncode().process(pipe, image2lora_images=images)
        lora = QwenImageUnit_Image2LoRADecode().process(pipe, **embs)["lora"]

    save_file(lora, "model_style.safetensors")

    return True

#     # process image
#     print("DEBUG: process image")
#     if input_image is None:
#         print("Error: input_image is empty.")
#         return None


#     print("DEBUG: control_image_torch")
#     orig_width, orig_height = input_image.size
#     control_image, width, height = image_utils.rescale_image(input_image, image_scale, 16, 2048)
#     control_image_torch = image_utils.get_image_latent(control_image, sample_size=[height, width])[:, :, 0]
    
#     # generation
#     if randomize_seed: seed = random.randint(0, MAX_SEED)
#     generator = torch.Generator().manual_seed(seed)

#     output_image = pipe(
#         prompt=prompt,
#         negative_prompt = negative_prompt,
#         width=width,
#         height=height,
#         generator=generator,
#         guidance_scale=guidance_scale,
#         control_image=control_image_torch,
#         num_inference_steps=num_inference_steps,
#         control_context_scale=control_context_scale,
#     ).images[0]

#     output_image = output_image.resize((orig_width * image_scale, orig_height * image_scale))
#     return output_image, seed


def read_file(path: str) -> str:
    with open(path, 'r', encoding='utf-8') as f:
        content = f.read()
    return content


css = """
#col-container {
    margin: 0 auto;
    max-width: 960px;
}
"""

with open('examples/0_examples.json', 'r') as file: examples = json.load(file)

with gr.Blocks() as demo:
    with gr.Column(elem_id="col-container"):
        with gr.Column():
            gr.HTML(read_file("static/header.html"))
        with gr.Row():
            with gr.Column():

                prompt = gr.Textbox(
                    label="Prompt",
                    show_label=False,
                    lines=2,
                    placeholder="Enter your prompt",
                    value="a man in a fishing boat. high quality, detailed"
                    # container=False,
                )
                # is_polish_prompt = gr.Checkbox(label="Polish prompt", value=True)
                # control_mode = gr.Radio(
                #     choices=["Canny", "Depth", "HED", "MLSD", "Pose"],
                #     value="Canny",
                #     label="Control Mode"
                # )
                run_button = gr.Button("Generate", variant="primary")
                with gr.Accordion("Advanced Settings", open=False):
                    
                    negative_prompt = gr.Textbox(
                        label="Negative prompt",
                        lines=2,
                        container=False,
                        placeholder="Enter your negative prompt",
                        value="blurry, ugly, bad"
                    )
                    with gr.Row():
                        num_inference_steps = gr.Slider(
                            label="Steps",
                            minimum=1,
                            maximum=30,
                            step=1,
                            value=9,
                        )
                        control_context_scale = gr.Slider(
                            label="Context scale",
                            minimum=0.0,
                            maximum=1.0,
                            step=0.01,
                            value=0.75,
                        )

                    with gr.Row():
                        guidance_scale = gr.Slider(
                            label="Guidance scale",
                            minimum=0.0,
                            maximum=10.0,
                            step=0.1,
                            value=1.0,
                        )

                    seed = gr.Slider(
                        label="Seed",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=42,
                    )
                    randomize_seed = gr.Checkbox(label="Randomize seed", value=False)

            with gr.Column():
                output_image = gr.Image(label="Generated image", show_label=False)
                # polished_prompt = gr.Textbox(label="Polished prompt", interactive=False)

                # with gr.Accordion("Preprocessor output", open=False):
                #     control_image = gr.Image(label="Control image", show_label=False)

                    
        # gr.Examples(examples=examples, inputs=[input_image])
        gr.Markdown(read_file("static/footer.md"))

    run_button.click(
        fn=inference,
        inputs=[
            prompt,
            negative_prompt,
            seed,
            randomize_seed,
            guidance_scale,
            num_inference_steps,
        ],
        outputs=[output_image, seed],
    )


if __name__ == "__main__":
    demo.launch(mcp_server=True, css=css)