Spaces:

Leteint
/

img2img

Sleeping

App Files Files Community

Leteint commited on Jan 7

Commit

2fd0b9a

verified ·

1 Parent(s): 71113c4

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -42

app.py CHANGED Viewed

@@ -11,9 +11,6 @@ from huggingface_hub import hf_hub_download
 from diffusers.models import ControlNetModel
 from insightface.app import FaceAnalysis
-# IMPORTANT :
-# Assure-toi d'avoir `pipeline_stable_diffusion_xl_instantid.py`
-# dans le même dossier, contenant la classe StableDiffusionXLInstantIDPipeline
 from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline
@@ -29,30 +26,24 @@ CHECKPOINT_DIR = "./checkpoints"
 # ---------------------------
-# Téléchargement des poids InstantID depuis le Hub
 # ---------------------------
 def download_checkpoints():
-    # ControlNet (IdentityNet)
     hf_hub_download(
         repo_id="InstantX/InstantID",
         filename="ControlNetModel/config.json",
         local_dir=CHECKPOINT_DIR,
-        local_dir_use_symlinks=False,
     )
     hf_hub_download(
         repo_id="InstantX/InstantID",
         filename="ControlNetModel/diffusion_pytorch_model.safetensors",
         local_dir=CHECKPOINT_DIR,
-        local_dir_use_symlinks=False,
     )
-    # IP-Adapter / InstantID adapter
     hf_hub_download(
         repo_id="InstantX/InstantID",
         filename="ip-adapter.bin",
         local_dir=CHECKPOINT_DIR,
-        local_dir_use_symlinks=False,
     )
@@ -63,12 +54,12 @@ IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"
 # ---------------------------
-# InsightFace pour l'ID de visage
 # ---------------------------
 def setup_face_analyzer():
     app = FaceAnalysis(name="buffalo_l")
-    app.prepare(ctx_id=0 if DEVICE == "cuda" else -1)
     return app
@@ -76,16 +67,18 @@ face_app = setup_face_analyzer()
 def extract_face_emb(image: Image.Image):
-    # Insightface attend du BGR numpy
     img = np.array(image.convert("RGB"))
     img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
     faces = face_app.get(img_bgr)
     if len(faces) == 0:
-        raise RuntimeError("Aucun visage détecté sur l'image d'entrée.")
-    # On prend le plus grand visage
-    face = sorted(faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]), reverse=True)[0]
     face_emb = face.normed_embedding
     return np.array(face_emb, dtype=np.float32)
@@ -95,13 +88,11 @@ def extract_face_emb(image: Image.Image):
 # ---------------------------
 def load_pipeline():
-    # 1) ControlNet
     controlnet = ControlNetModel.from_pretrained(
         CONTROLNET_PATH,
         torch_dtype=DTYPE,
     )
-    # 2) Pipeline InstantID (custom) basé sur SDXL
     pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
         BASE_MODEL_ID,
         controlnet=controlnet,
@@ -111,10 +102,7 @@ def load_pipeline():
     if DEVICE == "cuda":
         pipe.to("cuda")
-    # 3) Charger l'adapter InstantID (IP-Adapter-like)
     pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH)
-    # Réglage de l'influence de l'adapter (ID strength)
     pipe.set_ip_adapter_scale(0.8)
     return pipe
@@ -128,39 +116,40 @@ pipe = load_pipeline()
 # ---------------------------
 @spaces.GPU
-def generate(image, prompt, negative_prompt="", steps=20, strength=0.45, guidance_scale=1.0):
     try:
         # Device
         if DEVICE == "cuda":
             pipe.to("cuda")
         else:
             pipe.to("cpu")
-        # 1) Embedding de visage via insightface
-        face_emb = extract_face_emb(image)          # shape (512,)
-        face_emb_batch = face_emb[None]            # shape (1, 512)
-        # 2) Embeddings d'image pour InstantID
-        # Dans ton pipeline (paste.txt), tu as une méthode encode_image(...)
-        # qui renvoie (image_embeds, negative_image_embeds).
-        image_embeds, negative_image_embeds = pipe.encode_image(image)
-        # 3) Appel du pipeline InstantID SDXL
         out = pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
-            image=image,                       # img2img input
-            image_embeds=image_embeds,         # embeddings image positifs
-            negative_image_embeds=negative_image_embeds,  # embeddings image négatifs
-            face_embeds=face_emb_batch,        # embedding identité
             num_inference_steps=int(steps),
             strength=float(strength),
             guidance_scale=float(guidance_scale),
         )
         return out.images[0]
     except Exception as e:
@@ -168,16 +157,18 @@ def generate(image, prompt, negative_prompt="", steps=20, strength=0.45, guidanc
         traceback.print_exc()
         raise gr.Error(str(e))
 # ---------------------------
 # UI Gradio
 # ---------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## Face Swap (InstantID + SDXL, ZeroGPU)")
     with gr.Row():
         with gr.Column():
-            input_img = gr.Image(type="pil", label="Upload Face Photo")
             prompt = gr.Textbox(
                 label="Prompt",
@@ -191,7 +182,7 @@ with gr.Blocks() as demo:
             )
             steps = gr.Slider(5, 40, 20, step=1, label="Steps")
-            strength = gr.Slider(0.2, 0.9, 0.45, step=0.05, label="Strength (img2img / modification)")
             guidance = gr.Slider(0.0, 3.0, 1.0, step=0.1, label="Guidance scale (InstantID, rester bas)")
             btn = gr.Button("Generate")
@@ -201,7 +192,7 @@ with gr.Blocks() as demo:
     btn.click(
         generate,
-        [input_img, prompt, neg_prompt, steps, strength, guidance],
         output,
     )

 from diffusers.models import ControlNetModel
 from insightface.app import FaceAnalysis
 from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline
 # ---------------------------
+# Téléchargement des poids InstantID
 # ---------------------------
 def download_checkpoints():
     hf_hub_download(
         repo_id="InstantX/InstantID",
         filename="ControlNetModel/config.json",
         local_dir=CHECKPOINT_DIR,
     )
     hf_hub_download(
         repo_id="InstantX/InstantID",
         filename="ControlNetModel/diffusion_pytorch_model.safetensors",
         local_dir=CHECKPOINT_DIR,
     )
     hf_hub_download(
         repo_id="InstantX/InstantID",
         filename="ip-adapter.bin",
         local_dir=CHECKPOINT_DIR,
     )
 # ---------------------------
+# InsightFace (CPU pour éviter les galères GPU dans ZeroGPU)
 # ---------------------------
 def setup_face_analyzer():
     app = FaceAnalysis(name="buffalo_l")
+    app.prepare(ctx_id=-1)  # CPU
     return app
 def extract_face_emb(image: Image.Image):
     img = np.array(image.convert("RGB"))
     img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
     faces = face_app.get(img_bgr)
     if len(faces) == 0:
+        raise RuntimeError("Aucun visage détecté sur l'image de référence.")
+    face = sorted(
+        faces,
+        key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
+        reverse=True,
+    )[0]
     face_emb = face.normed_embedding
     return np.array(face_emb, dtype=np.float32)
 # ---------------------------
 def load_pipeline():
     controlnet = ControlNetModel.from_pretrained(
         CONTROLNET_PATH,
         torch_dtype=DTYPE,
     )
     pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
         BASE_MODEL_ID,
         controlnet=controlnet,
     if DEVICE == "cuda":
         pipe.to("cuda")
     pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH)
     pipe.set_ip_adapter_scale(0.8)
     return pipe
 # ---------------------------
 @spaces.GPU
+def generate(face_image, body_image, prompt, negative_prompt="", steps=20, strength=0.45, guidance_scale=1.0):
+    """
+    face_image : image pour l'identité (visage)
+    body_image : image pour la pose/corps (img2img de base)
+    """
     try:
+        if face_image is None:
+            raise RuntimeError("Merci de fournir une image de visage.")
+        if body_image is None:
+            raise RuntimeError("Merci de fournir une image de corps/pose.")
         # Device
         if DEVICE == "cuda":
             pipe.to("cuda")
         else:
             pipe.to("cpu")
+        # 1) Embedding de visage (ID) depuis face_image
+        face_emb = extract_face_emb(face_image)   # (512,)
+        face_emb_batch = face_emb[None]          # (1, 512)
+        # 2) Appel du pipeline InstantID SDXL
+        # body_image sert d'input img2img (corps/pose), le prompt décrit la tenue / contexte.
         out = pipe(
             prompt=prompt,
             negative_prompt=negative_prompt,
+            image=body_image,                 # image de base (corps/pose)
+            face_embeds=face_emb_batch,       # identité venant de face_image
             num_inference_steps=int(steps),
             strength=float(strength),
             guidance_scale=float(guidance_scale),
         )
         return out.images[0]
     except Exception as e:
         traceback.print_exc()
         raise gr.Error(str(e))
 # ---------------------------
 # UI Gradio
 # ---------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## InstantID + SDXL (ZeroGPU) – Visage A sur Corps B")
     with gr.Row():
         with gr.Column():
+            face_img = gr.Image(type="pil", label="Image visage (référence ID)")
+            body_img = gr.Image(type="pil", label="Image corps/pose (img2img)")
             prompt = gr.Textbox(
                 label="Prompt",
             )
             steps = gr.Slider(5, 40, 20, step=1, label="Steps")
+            strength = gr.Slider(0.2, 0.9, 0.45, step=0.05, label="Strength (img2img)")
             guidance = gr.Slider(0.0, 3.0, 1.0, step=0.1, label="Guidance scale (InstantID, rester bas)")
             btn = gr.Button("Generate")
     btn.click(
         generate,
+        [face_img, body_img, prompt, neg_prompt, steps, strength, guidance],
         output,
     )