Spaces:

Leteint
/

img2img

Sleeping

App Files Files Community

Leteint commited on Jan 7

Commit

47c5516

verified ·

1 Parent(s): 6b374e0

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -19

app.py CHANGED Viewed

@@ -9,9 +9,12 @@ import cv2
 from huggingface_hub import hf_hub_download
 from diffusers.models import ControlNetModel
-from insightface.app import FaceAnalysis
-from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline
 # ---------------------------
@@ -54,33 +57,49 @@ IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"
 # ---------------------------
-# InsightFace (CPU pour éviter les galères GPU dans ZeroGPU)
 # ---------------------------
 def setup_face_analyzer():
     app = FaceAnalysis(name="buffalo_l")
-    app.prepare(ctx_id=-1)  # CPU
     return app
 face_app = setup_face_analyzer()
-def extract_face_emb(image: Image.Image):
     img = np.array(image.convert("RGB"))
     img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
     faces = face_app.get(img_bgr)
     if len(faces) == 0:
-        raise RuntimeError("Aucun visage détecté sur l'image de référence.")
     face = sorted(
         faces,
         key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
         reverse=True,
     )[0]
-    face_emb = face.normed_embedding
-    return np.array(face_emb, dtype=np.float32)
 # ---------------------------
@@ -116,7 +135,12 @@ pipe = load_pipeline()
 # ---------------------------
 @spaces.GPU
-def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.0, height=1024, width=768):
     try:
         if face_image is None:
             raise gr.Error("Merci de fournir une image de visage.")
@@ -127,17 +151,23 @@ def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.
         else:
             pipe.to("cpu")
-        # 1) Embedding de visage (ID) depuis face_image
-        face_emb = extract_face_emb(face_image)   # (512,)
-        face_emb_batch = face_emb[None]          # (1, 512)
-        # 2) txt2img SDXL + InstantID (pas d'image de corps)
         out = pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
-            face_embeds=face_emb_batch,
             num_inference_steps=int(steps),
             guidance_scale=float(guidance_scale),
             height=int(height),
             width=int(width),
         )
@@ -155,7 +185,7 @@ def generate(face_image, prompt, negative_prompt="", steps=20, guidance_scale=5.
 # ---------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## InstantID + SDXL (ZeroGPU) – Visage → Corps généré")
     with gr.Row():
         with gr.Column():
@@ -164,15 +194,21 @@ with gr.Blocks() as demo:
             prompt = gr.Textbox(
                 label="Prompt",
                 lines=3,
-                value="photorealistic full body firefighter uniform, detailed, same identity, studio lighting",
             )
             neg_prompt = gr.Textbox(
                 label="Negative",
-                value="lowres, deformed, extra limbs, bad anatomy, text, watermark",
             )
-            steps = gr.Slider(5, 50, 25, step=1, label="Steps")
             guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
             height = gr.Slider(640, 1536, 1024, step=64, label="Height")
             width = gr.Slider(640, 1024, 768, step=64, label="Width")
@@ -189,4 +225,3 @@ with gr.Blocks() as demo:
     )
 demo.launch()

 from huggingface_hub import hf_hub_download
 from diffusers.models import ControlNetModel
+from pipeline_stable_diffusion_xl_instantid import (
+    StableDiffusionXLInstantIDPipeline,
+    draw_kps,
+)
+from insightface.app import FaceAnalysis
 # ---------------------------
 # ---------------------------
+# InsightFace (ID de visage)
 # ---------------------------
 def setup_face_analyzer():
+    # CPU pour éviter les embrouilles GPU dans ZeroGPU
     app = FaceAnalysis(name="buffalo_l")
+    app.prepare(ctx_id=-1)
     return app
 face_app = setup_face_analyzer()
+def get_face_info(image: Image.Image):
+    """Retourne (embedding, keypoints) pour le plus grand visage de l'image."""
     img = np.array(image.convert("RGB"))
     img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
     faces = face_app.get(img_bgr)
     if len(faces) == 0:
+        raise RuntimeError("Aucun visage détecté sur l'image.")
+    # On prend le plus grand visage
     face = sorted(
         faces,
         key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
         reverse=True,
     )[0]
+    # Selon la version d'insightface, accès par attribut ou dict
+    emb = getattr(face, "normed_embedding", None)
+    if emb is None and isinstance(face, dict):
+        emb = face.get("embedding", None)
+    if emb is None:
+        raise RuntimeError("Impossible de récupérer l'embedding du visage.")
+    kps = getattr(face, "kps", None)
+    if kps is None and isinstance(face, dict):
+        kps = face.get("kps", None)
+    if kps is None:
+        raise RuntimeError("Impossible de récupérer les keypoints du visage.")
+    return np.array(emb, dtype=np.float32), np.array(kps)
 # ---------------------------
 # ---------------------------
 @spaces.GPU
+def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5.0, height=1024, width=768):
+    """
+    face_image : image contenant le visage de référence
+    prompt : description du corps, tenue, décor, style
+    """
     try:
         if face_image is None:
             raise gr.Error("Merci de fournir une image de visage.")
         else:
             pipe.to("cpu")
+        # 1) Embedding + keypoints du visage
+        face_emb, face_kps = get_face_info(face_image)   # (512,), (5,2) typiquement
+        face_emb_batch = face_emb[None]                  # (1,512)
+        # 2) Génération de l'image de keypoints (condition ControlNet)
+        kps_image = draw_kps(face_image, face_kps)       # PIL.Image, comme dans l'exemple officiel
+        # 3) Appel du pipeline InstantID SDXL
+        # Note : image_embeds = face_emb, image = kps_image (comme dans l'exemple)
         out = pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
+            image=kps_image,
+            image_embeds=face_emb_batch,
             num_inference_steps=int(steps),
             guidance_scale=float(guidance_scale),
+            controlnet_conditioning_scale=0.8,
             height=int(height),
             width=int(width),
         )
 # ---------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## InstantID + SDXL (ZeroGPU) – 1 image visage → corps généré")
     with gr.Row():
         with gr.Column():
             prompt = gr.Textbox(
                 label="Prompt",
                 lines=3,
+                value=(
+                    "photorealistic full body firefighter uniform, helmet, reflective stripes, "
+                    "standing in front of a fire truck at night, cinematic lighting, same identity"
+                ),
             )
             neg_prompt = gr.Textbox(
                 label="Negative",
+                value=(
+                    "lowres, deformed, extra limbs, bad anatomy, text, watermark, "
+                    "blurry, cartoon, drawing, illustration"
+                ),
             )
+            steps = gr.Slider(5, 60, 30, step=1, label="Steps")
             guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
             height = gr.Slider(640, 1536, 1024, step=64, label="Height")
             width = gr.Slider(640, 1024, 768, step=64, label="Width")
     )
 demo.launch()