Spaces:

Leteint
/

img2img

Sleeping

File size: 7,029 Bytes

5a12518
 
8ce6ac8
 
eacdd18
 
f335e05
8ce6ac8
f335e05
5a12518
f335e05
47c5516
 
 
 
8ce6ac8
47c5516
5a12518
f335e05
 
 
 
 
85ecbd8
64fa7d3
 
9f0ba2e
85ecbd8
eacdd18
 
8ce6ac8
f335e05
eacdd18
 
f335e05
2fd0b9a
f335e05
 
 
85ecbd8
f335e05
 
 
 
85ecbd8
f335e05
 
 
 
 
85ecbd8
f335e05
85ecbd8
 
f335e05
 
 
 
85ecbd8
f335e05
 
 
 
 
 
 
 
 
 
47c5516
f335e05
eacdd18
 
47c5516
f335e05
47c5516
eacdd18
 
 
f335e05
 
 
47c5516
 
f335e05
 
 
 
 
47c5516
f335e05
47c5516
2fd0b9a
 
 
 
 
47c5516
85ecbd8
47c5516
 
 
 
 
 
85ecbd8
47c5516
 
 
 
 
 
 
f335e05
 
 
85ecbd8
f335e05
 
eacdd18
85ecbd8
eacdd18
 
 
 
 
85ecbd8
eacdd18
 
 
 
 
 
 
 
85ecbd8
 
eacdd18
85ecbd8
f335e05
b03f1b8
eacdd18
 
 
 
 
 
 
f335e05
 
 
eacdd18
 
a1145cd
47c5516
 
 
 
 
d96b332
2fd0b9a
6b374e0
2fd0b9a
85ecbd8
c789538
 
 
 
 
47c5516
 
 
c789538
47c5516
 
 
 
c789538
 
 
47c5516
 
c789538
 
df225fa
6b374e0
 
c789538
 
 
9fd576e
 
d425872
9fd576e
 
1558644
2fd0b9a
f335e05
 
 
 
5a12518
85ecbd8
eacdd18
5a12518
 
2fd0b9a
eacdd18
5a12518
 
 
47c5516
85ecbd8
 
 
47c5516
5a12518
eacdd18
5a12518
 
47c5516
a1145cd
 
47c5516
5a12518
eacdd18
47c5516
6b374e0
 
 
eacdd18
5a12518
eacdd18
5a12518
 
 
eacdd18
 
6b374e0
eacdd18
 
5a12518

import spaces  # Doit être importé AVANT torch / diffusers sur ZeroGPU

import gradio as gr
import torch
import numpy as np
from PIL import Image
import cv2

from huggingface_hub import hf_hub_download

from diffusers.models import ControlNetModel
from pipeline_stable_diffusion_xl_instantid import (
    StableDiffusionXLInstantIDPipeline,
    draw_kps,
)

from insightface.app import FaceAnalysis


# ---------------------------
# Config globale
# ---------------------------

# ⚠️ Mets ici TON repo diffusers ArtFusion (après conversion + push_to_hub)
BASE_MODEL_ID = "Leteint/artfusionXLReal_v16Lightning"
#BASE_MODEL_ID = "Niggendar/autismmixSDXL_autismmixPony"
#BASE_MODEL_ID = "Meina/MeinaPastel_V7"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32

CHECKPOINT_DIR = "./checkpoints"


# ---------------------------
# Téléchargement des poids InstantID
# ---------------------------

def download_checkpoints():
    # ControlNet InstantID (dossier ControlNetModel complet)
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ControlNetModel/config.json",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ControlNetModel/diffusion_pytorch_model.safetensors",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )

    # IP-Adapter InstantID
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ip-adapter.bin",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )


download_checkpoints()

CONTROLNET_PATH = f"{CHECKPOINT_DIR}/ControlNetModel"
IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"


# ---------------------------
# InsightFace (ID de visage)
# ---------------------------

def setup_face_analyzer():
    # CPU pour éviter les embrouilles GPU dans ZeroGPU
    app = FaceAnalysis(name="buffalo_l")
    app.prepare(ctx_id=-1)
    return app


face_app = setup_face_analyzer()


def get_face_info(image: Image.Image):
    """Retourne (embedding, keypoints) pour le plus grand visage de l'image."""
    img = np.array(image.convert("RGB"))
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    faces = face_app.get(img_bgr)
    if len(faces) == 0:
        raise RuntimeError("Aucun visage détecté sur l'image.")

    # On prend le plus grand visage
    face = sorted(
        faces,
        key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
        reverse=True,
    )[0]

    # Embedding
    emb = getattr(face, "normed_embedding", None)
    if emb is None and isinstance(face, dict):
        emb = face.get("embedding", None)
    if emb is None:
        raise RuntimeError("Impossible de récupérer l'embedding du visage.")

    # Keypoints
    kps = getattr(face, "kps", None)
    if kps is None and isinstance(face, dict):
        kps = face.get("kps", None)
    if kps is None:
        raise RuntimeError("Impossible de récupérer les keypoints du visage.")

    return np.array(emb, dtype=np.float32), np.array(kps)


# ---------------------------
# Chargement du pipeline InstantID SDXL + ArtFusion
# ---------------------------

def load_pipeline():
    # ControlNet InstantID
    controlnet = ControlNetModel.from_pretrained(
        CONTROLNET_PATH,
        torch_dtype=DTYPE,
    )

    # Pipeline InstantID SDXL avec ArtFusion comme base
    pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
        BASE_MODEL_ID,
        controlnet=controlnet,
        torch_dtype=DTYPE,
    )

    if DEVICE == "cuda":
        pipe.to("cuda")
    else:
        pipe.to("cpu")

    # IP-Adapter InstantID
    pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH)
    pipe.set_ip_adapter_scale(0.6)

    return pipe


pipe = load_pipeline()


# ---------------------------
# Fonction de génération (ZeroGPU)
# ---------------------------

@spaces.GPU
def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5, height=1024, width=768):
    """
    face_image : image contenant le visage de référence
    prompt : description du corps, tenue, décor, style
    """

    try:
        if face_image is None:
            raise gr.Error("Merci de fournir une image de visage.")

        # On s'assure que le pipe est bien sur le bon device
        if DEVICE == "cuda":
            pipe.to("cuda")
        else:
            pipe.to("cpu")

        # 1) Embedding + keypoints du visage
        face_emb, face_kps = get_face_info(face_image)   # (512,), (5,2) typiquement
        face_emb_batch = face_emb[None]                  # (1,512)

        # 2) Génération de l'image de keypoints (condition ControlNet)
        kps_image = draw_kps(face_image, face_kps)       # PIL.Image, comme dans l'exemple officiel

        # 3) Appel du pipeline InstantID SDXL
        out = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            image=kps_image,
            image_embeds=face_emb_batch,
            num_inference_steps=int(steps),
            guidance_scale=float(guidance_scale),
            controlnet_conditioning_scale=0.4,
            height=int(height),
            width=int(width),
        )

        return out.images[0]

    except Exception as e:
        import traceback
        traceback.print_exc()
        raise gr.Error(str(e))


# ---------------------------
# UI Gradio
# ---------------------------

with gr.Blocks() as demo:
    gr.Markdown("## InstantID + ArtFusion XL Real (SDXL) – Visage → corps généré (ZeroGPU)")

    with gr.Row():
        with gr.Column():
            face_img = gr.Image(type="pil", label="Image visage (référence ID)")

            prompt = gr.Textbox(
                label="Prompt",
                lines=3,
                value=(
                    "photorealistic full body portrait, white European man, realistic skin texture, "
                    "firefighter uniform with detailed fabric, realistic studio lighting, 35mm DSLR, "
                    "sharp focus on face, clean background, high resolution"
                ),
            )

            neg_prompt = gr.Textbox(
                label="Negative",
                value=(
                    "cartoon, anime, painting, illustration, lowres, blurry, deformed, bad anatomy, "
                    "extra limbs, waxy skin, oversharpen, text, watermark"
                ),
            )

            steps = gr.Slider(5, 60, 30, step=1, label="Steps")
            guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
            height = gr.Slider(640, 1536, 1024, step=64, label="Height")
            width = gr.Slider(640, 1024, 768, step=64, label="Width")

            btn = gr.Button("Generate")

        with gr.Column():
            output = gr.Image(label="Result")

    btn.click(
        generate,
        [face_img, prompt, neg_prompt, steps, guidance, height, width],
        output,
    )

demo.launch()