Update app.py
Browse files
app.py
CHANGED
|
@@ -9,9 +9,12 @@ import cv2
|
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
|
| 11 |
from diffusers.models import ControlNetModel
|
| 12 |
-
from
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
from
|
| 15 |
|
| 16 |
|
| 17 |
# ---------------------------
|
|
@@ -54,33 +57,49 @@ IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"
|
|
| 54 |
|
| 55 |
|
| 56 |
# ---------------------------
|
| 57 |
-
# InsightFace (
|
| 58 |
# ---------------------------
|
| 59 |
|
| 60 |
def setup_face_analyzer():
|
|
|
|
| 61 |
app = FaceAnalysis(name="buffalo_l")
|
| 62 |
-
app.prepare(ctx_id=-1)
|
| 63 |
return app
|
| 64 |
|
| 65 |
|
| 66 |
face_app = setup_face_analyzer()
|
| 67 |
|
| 68 |
|
| 69 |
-
def
|
|
|
|
| 70 |
img = np.array(image.convert("RGB"))
|
| 71 |
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 72 |
|
| 73 |
faces = face_app.get(img_bgr)
|
| 74 |
if len(faces) == 0:
|
| 75 |
-
raise RuntimeError("Aucun visage détecté sur l'image
|
| 76 |
|
|
|
|
| 77 |
face = sorted(
|
| 78 |
faces,
|
| 79 |
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
|
| 80 |
reverse=True,
|
| 81 |
)[0]
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
|
| 86 |
# ---------------------------
|
|
@@ -116,7 +135,12 @@ pipe = load_pipeline()
|
|
| 116 |
# ---------------------------
|
| 117 |
|
| 118 |
@spaces.GPU
|
| 119 |
-
def generate(face_image, prompt, negative_prompt="", steps=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
try:
|
| 121 |
if face_image is None:
|
| 122 |
raise gr.Error("Merci de fournir une image de visage.")
|
|
@@ -127,17 +151,23 @@ def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.
|
|
| 127 |
else:
|
| 128 |
pipe.to("cpu")
|
| 129 |
|
| 130 |
-
# 1) Embedding
|
| 131 |
-
face_emb =
|
| 132 |
-
face_emb_batch = face_emb[None]
|
| 133 |
|
| 134 |
-
# 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
out = pipe(
|
| 136 |
prompt=prompt,
|
| 137 |
negative_prompt=negative_prompt,
|
| 138 |
-
|
|
|
|
| 139 |
num_inference_steps=int(steps),
|
| 140 |
guidance_scale=float(guidance_scale),
|
|
|
|
| 141 |
height=int(height),
|
| 142 |
width=int(width),
|
| 143 |
)
|
|
@@ -155,7 +185,7 @@ def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.
|
|
| 155 |
# ---------------------------
|
| 156 |
|
| 157 |
with gr.Blocks() as demo:
|
| 158 |
-
gr.Markdown("## InstantID + SDXL (ZeroGPU) –
|
| 159 |
|
| 160 |
with gr.Row():
|
| 161 |
with gr.Column():
|
|
@@ -164,15 +194,21 @@ with gr.Blocks() as demo:
|
|
| 164 |
prompt = gr.Textbox(
|
| 165 |
label="Prompt",
|
| 166 |
lines=3,
|
| 167 |
-
value=
|
|
|
|
|
|
|
|
|
|
| 168 |
)
|
| 169 |
|
| 170 |
neg_prompt = gr.Textbox(
|
| 171 |
label="Negative",
|
| 172 |
-
value=
|
|
|
|
|
|
|
|
|
|
| 173 |
)
|
| 174 |
|
| 175 |
-
steps = gr.Slider(5,
|
| 176 |
guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
|
| 177 |
height = gr.Slider(640, 1536, 1024, step=64, label="Height")
|
| 178 |
width = gr.Slider(640, 1024, 768, step=64, label="Width")
|
|
@@ -189,4 +225,3 @@ with gr.Blocks() as demo:
|
|
| 189 |
)
|
| 190 |
|
| 191 |
demo.launch()
|
| 192 |
-
|
|
|
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
|
| 11 |
from diffusers.models import ControlNetModel
|
| 12 |
+
from pipeline_stable_diffusion_xl_instantid import (
|
| 13 |
+
StableDiffusionXLInstantIDPipeline,
|
| 14 |
+
draw_kps,
|
| 15 |
+
)
|
| 16 |
|
| 17 |
+
from insightface.app import FaceAnalysis
|
| 18 |
|
| 19 |
|
| 20 |
# ---------------------------
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
# ---------------------------
|
| 60 |
+
# InsightFace (ID de visage)
|
| 61 |
# ---------------------------
|
| 62 |
|
| 63 |
def setup_face_analyzer():
|
| 64 |
+
# CPU pour éviter les embrouilles GPU dans ZeroGPU
|
| 65 |
app = FaceAnalysis(name="buffalo_l")
|
| 66 |
+
app.prepare(ctx_id=-1)
|
| 67 |
return app
|
| 68 |
|
| 69 |
|
| 70 |
face_app = setup_face_analyzer()
|
| 71 |
|
| 72 |
|
| 73 |
+
def get_face_info(image: Image.Image):
|
| 74 |
+
"""Retourne (embedding, keypoints) pour le plus grand visage de l'image."""
|
| 75 |
img = np.array(image.convert("RGB"))
|
| 76 |
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 77 |
|
| 78 |
faces = face_app.get(img_bgr)
|
| 79 |
if len(faces) == 0:
|
| 80 |
+
raise RuntimeError("Aucun visage détecté sur l'image.")
|
| 81 |
|
| 82 |
+
# On prend le plus grand visage
|
| 83 |
face = sorted(
|
| 84 |
faces,
|
| 85 |
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
|
| 86 |
reverse=True,
|
| 87 |
)[0]
|
| 88 |
+
|
| 89 |
+
# Selon la version d'insightface, accès par attribut ou dict
|
| 90 |
+
emb = getattr(face, "normed_embedding", None)
|
| 91 |
+
if emb is None and isinstance(face, dict):
|
| 92 |
+
emb = face.get("embedding", None)
|
| 93 |
+
if emb is None:
|
| 94 |
+
raise RuntimeError("Impossible de récupérer l'embedding du visage.")
|
| 95 |
+
|
| 96 |
+
kps = getattr(face, "kps", None)
|
| 97 |
+
if kps is None and isinstance(face, dict):
|
| 98 |
+
kps = face.get("kps", None)
|
| 99 |
+
if kps is None:
|
| 100 |
+
raise RuntimeError("Impossible de récupérer les keypoints du visage.")
|
| 101 |
+
|
| 102 |
+
return np.array(emb, dtype=np.float32), np.array(kps)
|
| 103 |
|
| 104 |
|
| 105 |
# ---------------------------
|
|
|
|
| 135 |
# ---------------------------
|
| 136 |
|
| 137 |
@spaces.GPU
|
| 138 |
+
def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5.0, height=1024, width=768):
|
| 139 |
+
"""
|
| 140 |
+
face_image : image contenant le visage de référence
|
| 141 |
+
prompt : description du corps, tenue, décor, style
|
| 142 |
+
"""
|
| 143 |
+
|
| 144 |
try:
|
| 145 |
if face_image is None:
|
| 146 |
raise gr.Error("Merci de fournir une image de visage.")
|
|
|
|
| 151 |
else:
|
| 152 |
pipe.to("cpu")
|
| 153 |
|
| 154 |
+
# 1) Embedding + keypoints du visage
|
| 155 |
+
face_emb, face_kps = get_face_info(face_image) # (512,), (5,2) typiquement
|
| 156 |
+
face_emb_batch = face_emb[None] # (1,512)
|
| 157 |
|
| 158 |
+
# 2) Génération de l'image de keypoints (condition ControlNet)
|
| 159 |
+
kps_image = draw_kps(face_image, face_kps) # PIL.Image, comme dans l'exemple officiel
|
| 160 |
+
|
| 161 |
+
# 3) Appel du pipeline InstantID SDXL
|
| 162 |
+
# Note : image_embeds = face_emb, image = kps_image (comme dans l'exemple)
|
| 163 |
out = pipe(
|
| 164 |
prompt=prompt,
|
| 165 |
negative_prompt=negative_prompt,
|
| 166 |
+
image=kps_image,
|
| 167 |
+
image_embeds=face_emb_batch,
|
| 168 |
num_inference_steps=int(steps),
|
| 169 |
guidance_scale=float(guidance_scale),
|
| 170 |
+
controlnet_conditioning_scale=0.8,
|
| 171 |
height=int(height),
|
| 172 |
width=int(width),
|
| 173 |
)
|
|
|
|
| 185 |
# ---------------------------
|
| 186 |
|
| 187 |
with gr.Blocks() as demo:
|
| 188 |
+
gr.Markdown("## InstantID + SDXL (ZeroGPU) – 1 image visage → corps généré")
|
| 189 |
|
| 190 |
with gr.Row():
|
| 191 |
with gr.Column():
|
|
|
|
| 194 |
prompt = gr.Textbox(
|
| 195 |
label="Prompt",
|
| 196 |
lines=3,
|
| 197 |
+
value=(
|
| 198 |
+
"photorealistic full body firefighter uniform, helmet, reflective stripes, "
|
| 199 |
+
"standing in front of a fire truck at night, cinematic lighting, same identity"
|
| 200 |
+
),
|
| 201 |
)
|
| 202 |
|
| 203 |
neg_prompt = gr.Textbox(
|
| 204 |
label="Negative",
|
| 205 |
+
value=(
|
| 206 |
+
"lowres, deformed, extra limbs, bad anatomy, text, watermark, "
|
| 207 |
+
"blurry, cartoon, drawing, illustration"
|
| 208 |
+
),
|
| 209 |
)
|
| 210 |
|
| 211 |
+
steps = gr.Slider(5, 60, 30, step=1, label="Steps")
|
| 212 |
guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
|
| 213 |
height = gr.Slider(640, 1536, 1024, step=64, label="Height")
|
| 214 |
width = gr.Slider(640, 1024, 768, step=64, label="Width")
|
|
|
|
| 225 |
)
|
| 226 |
|
| 227 |
demo.launch()
|
|
|