File size: 7,029 Bytes
5a12518
 
8ce6ac8
 
eacdd18
 
f335e05
8ce6ac8
f335e05
5a12518
f335e05
47c5516
 
 
 
8ce6ac8
47c5516
5a12518
f335e05
 
 
 
 
85ecbd8
64fa7d3
 
9f0ba2e
85ecbd8
eacdd18
 
8ce6ac8
f335e05
eacdd18
 
f335e05
2fd0b9a
f335e05
 
 
85ecbd8
f335e05
 
 
 
85ecbd8
f335e05
 
 
 
 
85ecbd8
f335e05
85ecbd8
 
f335e05
 
 
 
85ecbd8
f335e05
 
 
 
 
 
 
 
 
 
47c5516
f335e05
eacdd18
 
47c5516
f335e05
47c5516
eacdd18
 
 
f335e05
 
 
47c5516
 
f335e05
 
 
 
 
47c5516
f335e05
47c5516
2fd0b9a
 
 
 
 
47c5516
85ecbd8
47c5516
 
 
 
 
 
85ecbd8
47c5516
 
 
 
 
 
 
f335e05
 
 
85ecbd8
f335e05
 
eacdd18
85ecbd8
eacdd18
 
 
 
 
85ecbd8
eacdd18
 
 
 
 
 
 
 
85ecbd8
 
eacdd18
85ecbd8
f335e05
b03f1b8
eacdd18
 
 
 
 
 
 
f335e05
 
 
eacdd18
 
a1145cd
47c5516
 
 
 
 
d96b332
2fd0b9a
6b374e0
2fd0b9a
85ecbd8
c789538
 
 
 
 
47c5516
 
 
c789538
47c5516
 
 
 
c789538
 
 
47c5516
 
c789538
 
df225fa
6b374e0
 
c789538
 
 
9fd576e
 
d425872
9fd576e
 
1558644
2fd0b9a
f335e05
 
 
 
5a12518
85ecbd8
eacdd18
5a12518
 
2fd0b9a
eacdd18
5a12518
 
 
47c5516
85ecbd8
 
 
47c5516
5a12518
eacdd18
5a12518
 
47c5516
a1145cd
 
47c5516
5a12518
eacdd18
47c5516
6b374e0
 
 
eacdd18
5a12518
eacdd18
5a12518
 
 
eacdd18
 
6b374e0
eacdd18
 
5a12518
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import spaces  # Doit être importé AVANT torch / diffusers sur ZeroGPU

import gradio as gr
import torch
import numpy as np
from PIL import Image
import cv2

from huggingface_hub import hf_hub_download

from diffusers.models import ControlNetModel
from pipeline_stable_diffusion_xl_instantid import (
    StableDiffusionXLInstantIDPipeline,
    draw_kps,
)

from insightface.app import FaceAnalysis


# ---------------------------
# Config globale
# ---------------------------

# ⚠️ Mets ici TON repo diffusers ArtFusion (après conversion + push_to_hub)
BASE_MODEL_ID = "Leteint/artfusionXLReal_v16Lightning"
#BASE_MODEL_ID = "Niggendar/autismmixSDXL_autismmixPony"
#BASE_MODEL_ID = "Meina/MeinaPastel_V7"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32

CHECKPOINT_DIR = "./checkpoints"


# ---------------------------
# Téléchargement des poids InstantID
# ---------------------------

def download_checkpoints():
    # ControlNet InstantID (dossier ControlNetModel complet)
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ControlNetModel/config.json",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ControlNetModel/diffusion_pytorch_model.safetensors",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )

    # IP-Adapter InstantID
    hf_hub_download(
        repo_id="InstantX/InstantID",
        filename="ip-adapter.bin",
        local_dir=CHECKPOINT_DIR,
        local_dir_use_symlinks=False,
    )


download_checkpoints()

CONTROLNET_PATH = f"{CHECKPOINT_DIR}/ControlNetModel"
IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"


# ---------------------------
# InsightFace (ID de visage)
# ---------------------------

def setup_face_analyzer():
    # CPU pour éviter les embrouilles GPU dans ZeroGPU
    app = FaceAnalysis(name="buffalo_l")
    app.prepare(ctx_id=-1)
    return app


face_app = setup_face_analyzer()


def get_face_info(image: Image.Image):
    """Retourne (embedding, keypoints) pour le plus grand visage de l'image."""
    img = np.array(image.convert("RGB"))
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    faces = face_app.get(img_bgr)
    if len(faces) == 0:
        raise RuntimeError("Aucun visage détecté sur l'image.")

    # On prend le plus grand visage
    face = sorted(
        faces,
        key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
        reverse=True,
    )[0]

    # Embedding
    emb = getattr(face, "normed_embedding", None)
    if emb is None and isinstance(face, dict):
        emb = face.get("embedding", None)
    if emb is None:
        raise RuntimeError("Impossible de récupérer l'embedding du visage.")

    # Keypoints
    kps = getattr(face, "kps", None)
    if kps is None and isinstance(face, dict):
        kps = face.get("kps", None)
    if kps is None:
        raise RuntimeError("Impossible de récupérer les keypoints du visage.")

    return np.array(emb, dtype=np.float32), np.array(kps)


# ---------------------------
# Chargement du pipeline InstantID SDXL + ArtFusion
# ---------------------------

def load_pipeline():
    # ControlNet InstantID
    controlnet = ControlNetModel.from_pretrained(
        CONTROLNET_PATH,
        torch_dtype=DTYPE,
    )

    # Pipeline InstantID SDXL avec ArtFusion comme base
    pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
        BASE_MODEL_ID,
        controlnet=controlnet,
        torch_dtype=DTYPE,
    )

    if DEVICE == "cuda":
        pipe.to("cuda")
    else:
        pipe.to("cpu")

    # IP-Adapter InstantID
    pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH)
    pipe.set_ip_adapter_scale(0.6)

    return pipe


pipe = load_pipeline()


# ---------------------------
# Fonction de génération (ZeroGPU)
# ---------------------------

@spaces.GPU
def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5, height=1024, width=768):
    """
    face_image : image contenant le visage de référence
    prompt : description du corps, tenue, décor, style
    """

    try:
        if face_image is None:
            raise gr.Error("Merci de fournir une image de visage.")

        # On s'assure que le pipe est bien sur le bon device
        if DEVICE == "cuda":
            pipe.to("cuda")
        else:
            pipe.to("cpu")

        # 1) Embedding + keypoints du visage
        face_emb, face_kps = get_face_info(face_image)   # (512,), (5,2) typiquement
        face_emb_batch = face_emb[None]                  # (1,512)

        # 2) Génération de l'image de keypoints (condition ControlNet)
        kps_image = draw_kps(face_image, face_kps)       # PIL.Image, comme dans l'exemple officiel

        # 3) Appel du pipeline InstantID SDXL
        out = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            image=kps_image,
            image_embeds=face_emb_batch,
            num_inference_steps=int(steps),
            guidance_scale=float(guidance_scale),
            controlnet_conditioning_scale=0.4,
            height=int(height),
            width=int(width),
        )

        return out.images[0]

    except Exception as e:
        import traceback
        traceback.print_exc()
        raise gr.Error(str(e))


# ---------------------------
# UI Gradio
# ---------------------------

with gr.Blocks() as demo:
    gr.Markdown("## InstantID + ArtFusion XL Real (SDXL) – Visage → corps généré (ZeroGPU)")

    with gr.Row():
        with gr.Column():
            face_img = gr.Image(type="pil", label="Image visage (référence ID)")

            prompt = gr.Textbox(
                label="Prompt",
                lines=3,
                value=(
                    "photorealistic full body portrait, white European man, realistic skin texture, "
                    "firefighter uniform with detailed fabric, realistic studio lighting, 35mm DSLR, "
                    "sharp focus on face, clean background, high resolution"
                ),
            )

            neg_prompt = gr.Textbox(
                label="Negative",
                value=(
                    "cartoon, anime, painting, illustration, lowres, blurry, deformed, bad anatomy, "
                    "extra limbs, waxy skin, oversharpen, text, watermark"
                ),
            )

            steps = gr.Slider(5, 60, 30, step=1, label="Steps")
            guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
            height = gr.Slider(640, 1536, 1024, step=64, label="Height")
            width = gr.Slider(640, 1024, 768, step=64, label="Width")

            btn = gr.Button("Generate")

        with gr.Column():
            output = gr.Image(label="Result")

    btn.click(
        generate,
        [face_img, prompt, neg_prompt, steps, guidance, height, width],
        output,
    )

demo.launch()