Spaces:

EarthnDusk
/

SDXL_To_Diffusers

Running

File size: 11,739 Bytes

import os
import gradio as gr
import torch
from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, AutoencoderKL
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTextConfig
from safetensors.torch import load_file
from collections import OrderedDict
import requests
from urllib.parse import urlparse, unquote
from pathlib import Path
import hashlib
from huggingface_hub import login, HfApi, hf_hub_download
from huggingface_hub.utils import validate_repo_id, HFValidationError
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from huggingface_hub.utils import HfHubHTTPError

# ---------------------- UTILITY FUNCTIONS ----------------------
# (download_model, create_model_repo, etc. - All unchanged, but included for completeness)

def download_model(model_path_or_url):
    """Downloads a model, handling URLs, HF repos, and local paths."""
    try:
        # 1. Check if it's a valid Hugging Face repo ID
        try:
            validate_repo_id(model_path_or_url)
            local_path = hf_hub_download(repo_id=model_path_or_url)
            return local_path
        except HFValidationError:
            pass

        # 2. Check if it's a URL
        if model_path_or_url.startswith("http://") or model_path_or_url.startswith("https://"):
            response = requests.get(model_path_or_url, stream=True)
            response.raise_for_status()

            parsed_url = urlparse(model_path_or_url)
            filename = os.path.basename(unquote(parsed_url.path))
            if not filename:
                filename = hashlib.sha256(model_path_or_url.encode()).hexdigest()

            cache_dir = os.path.join(HUGGINGFACE_HUB_CACHE, "downloads")
            os.makedirs(cache_dir, exist_ok=True)
            local_path = os.path.join(cache_dir, filename)

            with open(local_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            return local_path

        # 3. Check if it's a local file
        elif os.path.isfile(model_path_or_url):
            return model_path_or_url

        # 4. Handle Hugging Face repo with a specific file
        else:
            try:
                parts = model_path_or_url.split("/", 1)
                if len(parts) == 2:
                    repo_id, filename = parts
                    validate_repo_id(repo_id)
                    local_path = hf_hub_download(repo_id=repo_id, filename=filename)
                    return local_path
                else:
                    raise ValueError("Invalid input format.")
            except HFValidationError:
                raise ValueError(f"Invalid model path or URL: {model_path_or_url}")

    except Exception as e:
        raise ValueError(f"Error downloading or accessing model: {e}")


def create_model_repo(api, user, orgs_name, model_name, make_private=False):
    """Creates a Hugging Face model repository."""
    repo_id = (
        f"{orgs_name}/{model_name.strip()}"
        if orgs_name
        else f"{user['name']}/{model_name.strip()}"
    )
    try:
        api.create_repo(repo_id=repo_id, repo_type="model", private=make_private)
        print(f"Model repo '{repo_id}' created.")
    except HfHubHTTPError:
        print(f"Model repo '{repo_id}' already exists.")
    return repo_id
def load_sdxl_checkpoint(checkpoint_path):
    """Loads checkpoint and extracts state dicts."""
    if checkpoint_path.endswith(".safetensors"):
        state_dict = load_file(checkpoint_path, device="cpu")
    elif checkpoint_path.endswith(".ckpt"):
        state_dict = torch.load(checkpoint_path, map_location="cpu")["state_dict"]
    else:
        raise ValueError("Unsupported checkpoint format. Must be .safetensors or .ckpt")

    text_encoder1_state = OrderedDict()
    text_encoder2_state = OrderedDict()
    vae_state = OrderedDict()
    unet_state = OrderedDict()

    for key, value in state_dict.items():
        if key.startswith("first_stage_model."):
            vae_state[key.replace("first_stage_model.", "")] = value.to(torch.float16)
        elif key.startswith("condition_model.model.text_encoder."):
            text_encoder1_state[key.replace("condition_model.model.text_encoder.", "")] = value.to(torch.float16)
        elif key.startswith("condition_model.model.text_encoder_2."):
            text_encoder2_state[key.replace("condition_model.model.text_encoder_2.", "")] = value.to(torch.float16)
        elif key.startswith("model.diffusion_model."):
            unet_state[key.replace("model.diffusion_model.", "")] = value.to(torch.float16)

    return text_encoder1_state, text_encoder2_state, vae_state, unet_state



def build_diffusers_model(text_encoder1_state, text_encoder2_state, vae_state, unet_state, reference_model_path=None):
    """Builds Diffusers components, loading state dicts with strict=False."""
    if not reference_model_path:
        reference_model_path = "stabilityai/stable-diffusion-xl-base-1.0"

    config_text_encoder1 = CLIPTextConfig.from_pretrained(reference_model_path, subfolder="text_encoder")
    config_text_encoder2 = CLIPTextConfig.from_pretrained(reference_model_path, subfolder="text_encoder_2")
    config_vae = AutoencoderKL.from_pretrained(reference_model_path, subfolder="vae").config
    config_unet = UNet2DConditionModel.from_pretrained(reference_model_path, subfolder="unet").config

    text_encoder1 = CLIPTextModel(config_text_encoder1)
    text_encoder2 = CLIPTextModelWithProjection(config_text_encoder2)  # Correct class
    vae = AutoencoderKL(config=config_vae)
    unet = UNet2DConditionModel(config=config_unet)

    text_encoder1.load_state_dict(text_encoder1_state, strict=False)
    text_encoder2.load_state_dict(text_encoder2_state, strict=False)
    vae.load_state_dict(vae_state, strict=False)
    unet.load_state_dict(unet_state, strict=False)

    text_encoder1.to(torch.float16).to("cpu")
    text_encoder2.to(torch.float16).to("cpu")
    vae.to(torch.float16).to("cpu")
    unet.to(torch.float16).to("cpu")

    return text_encoder1, text_encoder2, vae, unet

def convert_and_save_sdxl_to_diffusers(checkpoint_path_or_url, output_path, reference_model_path):
    """Converts and saves the checkpoint to Diffusers format."""
    checkpoint_path = download_model(checkpoint_path_or_url)
    text_encoder1_state, text_encoder2_state, vae_state, unet_state = load_sdxl_checkpoint(checkpoint_path)
    text_encoder1, text_encoder2, vae, unet = build_diffusers_model(
        text_encoder1_state, text_encoder2_state, vae_state, unet_state, reference_model_path
    )

    pipeline = StableDiffusionXLPipeline.from_pretrained(
        reference_model_path,
        text_encoder=text_encoder1,
        text_encoder_2=text_encoder2,
        vae=vae,
        unet=unet,
        torch_dtype=torch.float16,
    )
    pipeline.to("cpu")
    pipeline.save_pretrained(output_path)
    print(f"Model saved as Diffusers format: {output_path}")

def upload_to_huggingface(model_path, hf_token, orgs_name, model_name, make_private):
    """Uploads a model to the Hugging Face Hub."""
    login(token=hf_token, add_to_git_credential=True)
    api = HfApi()
    user = api.whoami(token=hf_token)
    model_repo = create_model_repo(api, user, orgs_name, model_name, make_private)
    api.upload_folder(folder_path=model_path, repo_id=model_repo)
    print(f"Model uploaded to: https://huggingface.co/{model_repo}")

# ---------------------- MAIN FUNCTION (with Debugging Prints) ----------------------

def main(
    model_to_load,
    reference_model,
    output_path,
    hf_token,
    orgs_name,
    model_name,
    make_private,
):
    """Main function: SDXL checkpoint to Diffusers, with debugging prints."""

    print("---- Main Function Called ----")  # Debug Print
    print(f"  model_to_load: {model_to_load}")  # Debug Print
    print(f"  reference_model: {reference_model}")  # Debug Print
    print(f"  output_path: {output_path}")  # Debug Print
    print(f"  hf_token: {hf_token}")  # Debug Print
    print(f"  orgs_name: {orgs_name}")  # Debug Print
    print(f"  model_name: {model_name}")  # Debug Print
    print(f"  make_private: {make_private}")  # Debug Print

    try:
        convert_and_save_sdxl_to_diffusers(
            model_to_load, output_path, reference_model
        )
        upload_to_huggingface(
            output_path, hf_token, orgs_name, model_name, make_private
        )
        result = "Conversion and upload completed successfully!"
        print(f"---- Main Function Successful: {result} ----")  # Debug Print
        return result
    except Exception as e:
        error_message = f"An error occurred: {e}"
        print(f"---- Main Function Error: {error_message} ----")  # Debug Print
        return error_message

# ---------------------- GRADIO INTERFACE (Corrected Button Placement) ----------------------

css = """
#main-container {
    display: flex;
    flex-direction: column;
    font-family: 'Arial', sans-serif;
    font-size: 16px;
    color: #333;
}
#convert-button {
    margin-top: 1em;
}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown(
        """
    # 🎨 SDXL Model Converter
    Convert SDXL checkpoints to Diffusers format (FP16, CPU-only).

    ### 📥 Input Sources Supported:
    - Local model files (.safetensors, .ckpt)
    - Direct URLs to model files
    - Hugging Face model repositories (e.g., 'my-org/my-model' or 'my-org/my-model/file.safetensors')

    ### ℹ️ Important Notes:
    - This tool runs on **CPU**, conversion might be slower than on GPU.
    - For Hugging Face uploads, you need a **WRITE** token (not a read token).
    - Get your HF token here: [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)

    ### 💾 Memory Usage:
    - This space is configured for **FP16** precision to reduce memory usage.
    - Close other applications during conversion.
    - For large models, ensure you have at least 16GB of RAM.

    ### 💻 Source Code:
    - [GitHub Repository](https://github.com/Ktiseos-Nyx/Gradio-SDXL-Diffusers)

    ### 🙏 Support:
    - If you're interested in funding more projects: [Ko-fi](https://ko-fi.com/duskfallcrew)
    """
    )

    with gr.Row():
        with gr.Column():
            model_to_load = gr.Textbox(
                label="SDXL Checkpoint (Path, URL, or HF Repo)",
                placeholder="Path, URL, or Hugging Face Repo ID (e.g., my-org/my-model or my-org/my-model/file.safetensors)",
            )
            reference_model = gr.Textbox(
                label="Reference Diffusers Model (Optional)",
                placeholder="e.g., stabilityai/stable-diffusion-xl-base-1.0 (Leave blank for default)",
            )
            output_path = gr.Textbox(label="Output Path (Diffusers Format)", value="output")
            hf_token = gr.Textbox(label="Hugging Face Token", placeholder="Your Hugging Face write token", type="password")
            orgs_name = gr.Textbox(label="Organization Name (Optional)", placeholder="Your organization name")
            model_name = gr.Textbox(label="Model Name", placeholder="The name of your model on Hugging Face")
            make_private = gr.Checkbox(label="Make Repository Private", value=False)
            convert_button = gr.Button("Convert and Upload")

        with gr.Column(variant="panel"):
            output = gr.Markdown(container=False)

    # --- CORRECT BUTTON CLICK PLACEMENT ---
    convert_button.click(
        fn=main,
        inputs=[
            model_to_load,
            reference_model,
            output_path,
            hf_token,
            orgs_name,
            model_name,
            make_private,
        ],
        outputs=output,
    )

demo.launch()