qwen-image-to-lora

Running on Zero

File size: 19,163 Bytes

import gradio as gr
import numpy as np
import torch, random, json, spaces
from ulid import ULID
from diffsynth.pipelines.qwen_image import (
    QwenImagePipeline, ModelConfig,
    QwenImageUnit_Image2LoRAEncode, QwenImageUnit_Image2LoRADecode
)
from safetensors.torch import save_file
from PIL import Image

# from utils import repo_utils, image_utils, prompt_utils
# repo_utils.clone_repo_if_not_exists("git clone https://huggingface.co/DiffSynth-Studio/General-Image-Encoders", "app/repos")
# repo_utils.clone_repo_if_not_exists("https://huggingface.co/apple/starflow", "app/models")

URL_PUBLIC = "https://huggingface.co/spaces/AiSudo/Qwen-Image-to-LoRA/blob/main"
DTYPE = torch.bfloat16
MAX_SEED = np.iinfo(np.int32).max


vram_config_disk_offload = {
    "offload_dtype": "disk",
    "offload_device": "disk",
    "onload_dtype": "disk",
    "onload_device": "disk",
    "preparing_dtype": torch.bfloat16,
    "preparing_device": "cuda",
    "computation_dtype": torch.bfloat16,
    "computation_device": "cuda",
}

# Load models (LoRA encoder/decoder)
pipe_lora = QwenImagePipeline.from_pretrained(
    torch_dtype=torch.bfloat16,
    device="cuda",
    model_configs=[
        ModelConfig(
            download_source="huggingface",
            model_id="DiffSynth-Studio/General-Image-Encoders",
            origin_file_pattern="SigLIP2-G384/model.safetensors",
            **vram_config_disk_offload,
        ),
        ModelConfig(
            download_source="huggingface",
            model_id="DiffSynth-Studio/General-Image-Encoders",
            origin_file_pattern="DINOv3-7B/model.safetensors",
            **vram_config_disk_offload,
        ),
        ModelConfig(
            download_source="huggingface",
            model_id="DiffSynth-Studio/Qwen-Image-i2L",
            origin_file_pattern="Qwen-Image-i2L-Style.safetensors",
            **vram_config_disk_offload,
        ),
    ],
    processor_config=ModelConfig(model_id="Qwen/Qwen-Image-Edit", origin_file_pattern="processor/"),
    vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
)

vram_config = {
    "offload_dtype": "disk",
    "offload_device": "disk",
    "onload_dtype": torch.bfloat16,
    "onload_device": "cuda",
    "preparing_dtype": torch.bfloat16,
    "preparing_device": "cuda",
    "computation_dtype": torch.bfloat16,
    "computation_device": "cuda",
}

# Load image generation pipeline
pipe_imagen = QwenImagePipeline.from_pretrained(
    torch_dtype=torch.bfloat16,
    device="cuda",
    model_configs=[
        ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors", **vram_config),
        ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors", **vram_config),
        ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors", **vram_config),
    ],
    tokenizer_config=ModelConfig(download_source="huggingface", model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"),
    vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
)


def read_file(path: str) -> str:
    with open(path, "r", encoding="utf-8") as f:
        return f.read()


def show_user(profile: gr.OAuthProfile | None):
    """
    Displays who is logged in (or nothing if not logged in).
    Works in Spaces OAuth; locally uses hf auth login if available.
    """
    if profile is None:
        return ""
    username = getattr(profile, "username", None) or "unknown"
    return f"✅ Signed in as **{username}**"


@spaces.GPU
def generate_lora(
    input_images,
    profile: gr.OAuthProfile | None = None,
    oauth_token: gr.OAuthToken | None = None,
    progress=gr.Progress(track_tqdm=True),
):
    """
    - Always generates and saves LoRA locally under ./loras/
    - If user is signed in (OAuth), also uploads to the user's *own* Hub repo.
    """
    import os
    from huggingface_hub import HfApi

    ulid = str(ULID()).lower()[:12]
    print(f"ulid: {ulid}")

    if not input_images:
        return (
            "",
            gr.update(value="⚠️ Please upload at least 1 image."),
            gr.update(interactive=False),
            gr.update(interactive=False, link=""),
        )

    # Gradio Gallery returns list of (filepath, metadata)
    input_images = [Image.open(filepath).convert("RGB") for filepath, _ in input_images]

    # Model inference
    with torch.no_grad():
        embs = QwenImageUnit_Image2LoRAEncode().process(pipe_lora, image2lora_images=input_images)
        lora = QwenImageUnit_Image2LoRADecode().process(pipe_lora, **embs)["lora"]

    lora_name = f"{ulid}.safetensors"
    os.makedirs("loras", exist_ok=True)
    lora_path = f"loras/{lora_name}"
    save_file(lora, lora_path)

    # Default: local-only message (still lets user generate images from local LoRA)
    hub_url = ""
    hub_markdown = "✅ LoRA generated locally. Sign in to upload it to your Hugging Face account."

    # Upload to the signed-in user's own account if available
    if profile is not None and oauth_token is not None and getattr(oauth_token, "token", None):
        try:
            username = getattr(profile, "username", None) or ""
            if not username:
                raise ValueError("Could not read username from OAuth profile.")

            api = HfApi(token=oauth_token.token)

            # Create / reuse a user repo (model repo recommended for LoRAs)
            # Change name if you want:
            repo_id = f"{username}/qwen-image-loras"
            api.create_repo(repo_id=repo_id, repo_type="model", exist_ok=True)

            api.upload_file(
                path_or_fileobj=lora_path,
                path_in_repo=f"loras/{lora_name}",
                repo_id=repo_id,
                repo_type="model",
                commit_message=f"Add LoRA: {lora_name}",
            )

            hub_url = f"https://huggingface.co/{repo_id}/blob/main/loras/{lora_name}"
            hub_markdown = f"✅ **Uploaded to your account:** {hub_url}"

        except Exception as e:
            print(f"Error uploading to user repo: {e}")
            hub_markdown = f"⚠️ Upload failed (still saved locally): `{str(e)}`"

    # Enable image generation button regardless
    return (
        lora_name,
        gr.update(value=hub_markdown),
        gr.update(interactive=True),
        gr.update(interactive=bool(hub_url), link=hub_url),
    )


@spaces.GPU
def generate_image(
    lora_name,
    prompt,
    negative_prompt="blurry ugly bad",
    width=1024,
    height=1024,
    seed=42,
    randomize_seed=True,
    guidance_scale=3.5,
    num_inference_steps=8,
    progress=gr.Progress(track_tqdm=True),
):
    if not lora_name:
        return None, seed

    lora_path = f"loras/{lora_name}"

    pipe_imagen.clear_lora()
    pipe_imagen.load_lora(pipe_imagen.dit, lora_path)

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    # generator = torch.Generator().manual_seed(seed)

    output_image = pipe_imagen(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        # generator=generator,
        # true_cfg_scale=guidance_scale,
        # guidance_scale=1.0  # Use a fixed default for distilled guidance
    )

    return output_image, seed


# Enhanced Apple-style CSS - more minimalist and clean
css = """
/* Pure Apple Design System */
.gradio-container {
    font-family: -apple-system, BlinkMacSystemFont, "SF Pro Display", "SF Pro Text", "Helvetica Neue", Helvetica, Arial, sans-serif !important;
    background: #ffffff !important;
    color: #1d1d1f !important;
    line-height: 1.47059 !important;
    font-weight: 400 !important;
    letter-spacing: -.022em !important;
}

#col-container {
    margin: 0 auto;
    max-width: 980px;
    padding: 40px 20px;
}

/* Ultra-minimalist header */
.gradio-container .gr-block-header {
    background: transparent !important;
    border: none !important;
    padding: 0 !important;
    margin-bottom: 60px !important;
    box-shadow: none !important;
}

.gradio-container h1 {
    font-weight: 600 !important;
    font-size: 3rem !important;
    color: #1d1d1f !important;
    text-align: center !important;
    margin-bottom: 16px !important;
    letter-spacing: -.003em !important;
}

.gradio-container .subtitle {
    font-size: 1.25rem !important;
    font-weight: 400 !important;
    color: #6e6e73 !important;
    text-align: center !important;
    margin-bottom: 8px !important;
    line-height: 1.4 !important;
}

/* Clean card sections */
.section-card {
    background: #f2f2f7 !important;
    border-radius: 18px !important;
    padding: 32px !important;
    margin-bottom: 32px !important;
    border: none !important;
    box-shadow: none !important;
}

/* Apple-style buttons */
.gradio-container .gr-button {
    background: #007aff !important;
    border: none !important;
    border-radius: 8px !important;
    color: white !important;
    font-weight: 500 !important;
    font-size: 17px !important;
    padding: 16px 32px !important;
    min-height: 44px !important;
    transition: all 0.15s ease !important;
    box-shadow: none !important;
    letter-spacing: -.022em !important;
}

.gradio-container .gr-button:hover {
    background: #0051d5 !important;
    transform: none !important;
    box-shadow: none !important;
}

.gradio-container .gr-button:active {
    background: #004bb8 !important;
    transform: scale(0.98) !important;
}

/* Clean input fields */
.gradio-container .gr-textbox,
.gradio-container .gr-slider {
    background: #ffffff !important;
    border: 1px solid #d2d2d7 !important;
    border-radius: 10px !important;
    padding: 12px 16px !important;
    font-size: 17px !important;
    color: #1d1d1f !important;
    transition: all 0.15s ease !important;
    min-height: 44px !important;
}

.gradio-container .gr-textbox:focus,
.gradio-container .gr-slider:focus {
    border-color: #007aff !important;
    box-shadow: 0 0 0 3px rgba(0, 122, 255, 0.1) !important;
    outline: none !important;
}

/* Gallery styling */
.gradio-container .gr-gallery {
    border-radius: 12px !important;
    border: 1px solid #d2d2d7 !important;
    background: #ffffff !important;
    overflow: hidden !important;
}

/* Image output */
.gradio-container .gr-image {
    border-radius: 12px !important;
    border: 1px solid #d2d2d7 !important;
    background: #ffffff !important;
    overflow: hidden !important;
}

/* Accordion - Apple style */
.gradio-container .gr-accordion {
    background: #f2f2f7 !important;
    border: none !important;
    border-radius: 12px !important;
    padding: 0 !important;
    margin-top: 24px !important;
}

.gradio-container .gr-accordion .gr-accordion-button {
    background: transparent !important;
    border: none !important;
    padding: 16px !important;
    font-weight: 500 !important;
    color: #1d1d1f !important;
}

/* Download button */
.gradio-container .gr-download-button {
    background: #34c759 !important;
    border: none !important;
    border-radius: 8px !important;
    color: white !important;
    font-weight: 500 !important;
    font-size: 17px !important;
    padding: 16px 32px !important;
    min-height: 44px !important;
}

.gradio-container .gr-download-button:hover {
    background: #30a14a !important;
}

/* Examples section */
.gradio-container .gr-examples {
    background: #f2f2f7 !important;
    border-radius: 18px !important;
    padding: 24px !important;
    border: none !important;
}

/* Mobile responsiveness */
@media (max-width: 768px) {
    #col-container { padding: 20px 16px !important; max-width: 100% !important; }
    .gradio-container h1 { font-size: 2rem !important; margin-bottom: 12px !important; }
    .gradio-container .subtitle { font-size: 1.1rem !important; }
    .section-card { padding: 24px !important; margin-bottom: 24px !important; }
    .gradio-container .gr-button { padding: 14px 28px !important; font-size: 16px !important; }
    .gradio-container .gr-gallery { height: 200px !important; columns: 2 !important; }
    .gradio-container .gr-row { flex-direction: column !important; gap: 20px !important; }
}

@media (max-width: 480px) {
    .gradio-container h1 { font-size: 1.75rem !important; }
    .section-card { padding: 20px !important; }
    .gradio-container .gr-gallery { height: 180px !important; columns: 1 !important; }
}

/* Hide gradio header/footer */
.gradio-container .gr-footer,
.gradio-container .gr-header {
    display: none !important;
}
"""


# Load examples
with open("examples/0_examples.json", "r") as file:
    examples = json.load(file)
print(examples)


with gr.Blocks() as demo:
    with gr.Column(elem_id="col-container"):
        # Header
        gr.HTML(
            """
            <div style="text-align: center; max-width: 700px; margin: 0 auto;">
                <h1>Qwen Image to LoRA</h1>
                <p class="subtitle">Generate custom LoRA models from your images</p>
                <p style="font-size: 14px; color: #86868b; margin-top: 16px;">
                    Demo by <a href="https://aisudo.com/" target="_blank" style="color: #007aff; text-decoration: none;">AiSudo</a> •
                    <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #007aff; text-decoration: none;">Built with anycoder</a>
                </p>
            </div>
            """
        )

        # ✅ Hugging Face Login Button
        with gr.Row():
            with gr.Column(scale=1):
                login_btn = gr.LoginButton(
                    value="Sign in with Hugging Face",
                    logout_value="Logout ({})",
                    variant="huggingface",
                    size="lg",
                )
            with gr.Column(scale=3):
                whoami = gr.Markdown(value="", elem_id="whoami")

        with gr.Row():
            with gr.Column(elem_classes=["section-card"]):
                input_images = gr.Gallery(
                    label="Input Images",
                    file_types=["image"],
                    show_label=True,
                    columns=2,
                    object_fit="cover",
                    height=250,
                )
                lora_button = gr.Button("Generate LoRA", size="lg")

            with gr.Column(elem_classes=["section-card"]):
                lora_name = gr.Textbox(
                    label="Generated LoRA",
                    lines=2,
                    interactive=False,
                    placeholder="Your LoRA will appear here...",
                )
                hub_link = gr.Markdown(value="", label="Hub Link")

                # This becomes clickable only after upload
                lora_download = gr.Button(
                    value="View on Hub",
                    interactive=False,
                    size="lg",
                    link="",
                )

        with gr.Column(elem_classes=["section-card"]) as imagen_container:
            gr.Markdown("### Generate Images")
            with gr.Row():
                with gr.Column():
                    prompt = gr.Textbox(
                        label="Prompt",
                        lines=2,
                        placeholder="Describe what you want to generate...",
                        value="a person in a fishing boat.",
                    )

                    imagen_button = gr.Button("Generate Image", interactive=False, size="lg")

                    with gr.Accordion("Settings", open=False):
                        negative_prompt = gr.Textbox(
                            label="Negative Prompt",
                            lines=1,
                            placeholder="What to avoid...",
                            value="blurry, low quality",
                        )
                        num_inference_steps = gr.Slider(
                            label="Steps",
                            minimum=1,
                            maximum=50,
                            step=1,
                            value=25,
                        )
                        guidance_scale = gr.Slider(
                            label="Guidance Scale",
                            minimum=1.0,
                            maximum=10.0,
                            step=0.1,
                            value=3.5,
                        )
                        with gr.Row():
                            width = gr.Slider(
                                label="Width",
                                minimum=512,
                                maximum=1280,
                                step=32,
                                value=768,
                            )
                            height = gr.Slider(
                                label="Height",
                                minimum=512,
                                maximum=1280,
                                step=32,
                                value=1024,
                            )
                        seed = gr.Slider(
                            label="Seed",
                            minimum=0,
                            maximum=MAX_SEED,
                            step=1,
                            value=42,
                        )
                        randomize_seed = gr.Checkbox(label="Randomize Seed", value=False)

                with gr.Column():
                    output_image = gr.Image(label="Generated Image", height=350)

        gr.Examples(examples=examples, inputs=[input_images], label="Examples")
        gr.Markdown(read_file("static/footer.md"))

    # Login click shows the user
    login_btn.click(fn=show_user, inputs=[login_btn], outputs=[whoami], api_visibility="public")

    # Generate LoRA (auto-uploads to user account if signed in)
    lora_button.click(
        fn=generate_lora,
        inputs=[input_images],
        outputs=[lora_name, hub_link, imagen_button, lora_download],
        api_visibility="public",
    )

    # Generate Image
    imagen_button.click(
        fn=generate_image,
        inputs=[
            lora_name,
            prompt,
            negative_prompt,
            width,
            height,
            seed,
            randomize_seed,
            guidance_scale,
            num_inference_steps,
        ],
        outputs=[output_image, seed],
        api_visibility="public",
    )


if __name__ == "__main__":
    demo.launch(
        css=css,  # Gradio 6: pass css here
        mcp_server=True,
        theme=gr.themes.Base(
            primary_hue="blue",
            secondary_hue="gray",
            neutral_hue="gray",
            font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
            font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "Consolas", "monospace"],
        ),
        footer_links=[{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}],
    )