Spaces:

Nad54
/

test2

Paused

App Files Files Community

Nad54 commited on Nov 2, 2025

Commit

719b317

verified ·

1 Parent(s): cf0441d

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -137

app.py CHANGED Viewed

@@ -1,181 +1,163 @@
-# app.py — InstantID (custom pipeline) + LoRA One Piece (Text-to-Image)
-# Compatible avec diffusers 0.29.x
-import os, traceback
-# Évite l'erreur libgomp: donne une valeur sûre si tu veux fixer OMP
 os.environ.setdefault("OMP_NUM_THREADS", "4")
 os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
 import torch, gradio as gr
 from PIL import Image, ImageOps
-from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
-from safetensors.torch import load_file
-# ============== Config ==============
-DEVICE = "cuda"
-DTYPE  = torch.float16
-# Modèle de base: pipeline InstantID officielle (chargée comme "custom_pipeline")
-INSTANTID_REPO = "InstantX/InstantID"
-CUSTOM_PIPE    = "instantid"
-# Ton LoRA de style One Piece (fichier local)
-LORA_PATH = "./wanostyle_2_offset.safetensors"
-LORA_NAME = "wanostyle"   # nom d'adapter utilisé dans set_adapters
-# ============== Utils ===============
-def preflight():
-    s = [f"torch: {torch.__version__}", f"cuda: {torch.cuda.is_available()}"]
-    if torch.cuda.is_available():
-        s += [f"gpu: {torch.cuda.get_device_name(0)}", f"cap: {torch.cuda.get_device_capability(0)}"]
-    return "\n".join(s)
-def is_lora_file(path: str) -> bool:
-    try:
-        sd = load_file(path)
-        return any("lora_down.weight" in k for k in sd.keys())
-    except Exception:
-        return False
-print("=== PREFLIGHT ===")
-print(preflight())
-# ============ Load pipeline =========
-pipe = None
 load_logs = []
 try:
-    load_logs.append("Chargement InstantID (custom pipeline)…")
-    pipe = DiffusionPipeline.from_pretrained(
-        INSTANTID_REPO,
-        custom_pipeline=CUSTOM_PIPE,    # << clé: charge la pipeline InstantID
-        torch_dtype=DTYPE,
-        use_safetensors=True,
-        safety_checker=None,            # remets-le si Space public strict
-    )
-    # planificateur stable
-    if hasattr(pipe, "scheduler"):
-        pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-    # Optimisations VRAM usuelles
-    if hasattr(pipe, "enable_attention_slicing"): pipe.enable_attention_slicing()
-    if hasattr(pipe, "enable_vae_slicing"):       pipe.enable_vae_slicing()
-    if hasattr(pipe, "enable_vae_tiling"):        pipe.enable_vae_tiling()
-    # Charger ton LoRA de style One Piece
-    if os.path.exists(LORA_PATH) and is_lora_file(LORA_PATH):
-        pipe.load_lora_weights(LORA_PATH, adapter_name=LORA_NAME, use_safetensors=True)
-        load_logs.append("✅ LoRA (One Piece) chargé.")
-    else:
-        load_logs.append("⚠️ LoRA introuvable ou non standard — vérifie le .safetensors.")
-    pipe.to(DEVICE)
-    load_logs.append("✅ Pipeline InstantID prête.")
 except Exception:
     load_logs += ["❌ ERREUR au chargement:", traceback.format_exc()]
     print("\n".join(load_logs))
 if pipe is None:
-    raise RuntimeError("Échec de chargement du pipeline. Voir logs.")
-# ============== Inference ==============
-def generate(
-    ref_face,                # photo utilisateur (obligatoire)
-    prompt,
-    negative_prompt,
-    id_strength=0.85,        # force d'identité InstantID (0.7–0.95)
-    lora_scale=1.05,         # force du style One Piece
-    cfg=7.0,
-    steps=30,
-    width=640,
-    height=768,
-    seed=-1,
-):
-    run_logs = []
     try:
-        if ref_face is None:
-            return None, "Merci d'ajouter ta photo (portrait/visage).", "\n".join(load_logs)
-        run_logs.append(preflight())
-        # Seed
-        gen = None if seed is None or int(seed) < 0 else torch.Generator(DEVICE).manual_seed(int(seed))
-        # Préparer visage (carré conseillé pour l'embedding)
-        face = ImageOps.exif_transpose(ref_face).convert("RGB")
-        min_side = min(face.size)
-        x = (face.width - min_side)//2; y = (face.height - min_side)//2
-        face_sq = face.crop((x, y, x+min_side, y+min_side)).resize((512, 512), Image.Resampling.LANCZOS)
-        # Appliquer LoRA (intensité via set_adapters)
-        ca_kwargs = None
-        if LORA_NAME in getattr(pipe, "loaded_adapters", [LORA_NAME]):
-            try:
-                pipe.set_adapters([LORA_NAME], adapter_weights=[float(lora_scale)])
-                ca_kwargs = {"scale": float(lora_scale)}
-                run_logs.append(f"✅ LoRA actif (scale={float(lora_scale)})")
-            except Exception as e:
-                run_logs.append(f"⚠️ set_adapters erreur: {e}")
-        else:
-            run_logs.append("ℹ️ LoRA non chargé.")
-        # Appel InstantID — la pipeline attend 'image' = référence visage
-        # Génération T2I (image neuve)
-        result = pipe(
             prompt=prompt.strip(),
             negative_prompt=(negative_prompt or "").strip(),
-            image=face_sq,                   # <<< identité
-            id_strength=float(id_strength),  # <<< verrouillage identité
             width=int(width),
             height=int(height),
-            guidance_scale=float(cfg),
-            num_inference_steps=int(steps),
             generator=gen,
-            cross_attention_kwargs=ca_kwargs,   # LoRA intensity
-        )
-        return result.images[0], "", "\n".join(load_logs + run_logs)
     except torch.cuda.OutOfMemoryError as oom:
-        msg = "CUDA OOM: baisse résolution (ex: 576×704), steps (24–28), CFG (6–7)."
-        return None, f"{msg}\n{oom}", "\n".join(load_logs + run_logs)
-    except Exception:
-        return None, "Erreur:\n" + traceback.format_exc(), "\n".join(load_logs + run_logs)
-# ===================== UI =====================
 EX_PROMPT = (
-    "one piece style, Eiichiro Oda style, anime portrait, upper body, pirate outfit, straw hat, "
-    "cel shading, clean lineart, vibrant colors, expressive eyes, symmetrical face, looking at camera, "
-    "dynamic lighting, detailed face, simple background, high detail"
 )
 EX_NEG = (
-    "worst quality, low quality, bad anatomy, lowres, blurry, noisy, text, watermark, logo, cropped, "
-    "jpeg artifacts, 3d render, photorealistic, realistic skin, bad proportions, deformed face, distorted eyes, "
-    "cross-eye, asymmetrical eyes, extra limbs, extra fingers, fused fingers, multiple faces, mutated, signature, username, nsfw"
 )
 with gr.Blocks(css="footer{display:none !important}") as demo:
-    gr.Markdown("# 🏴‍☠️ One Piece — InstantID (custom) + LoRA")
     with gr.Row():
         with gr.Column():
-            ref_face = gr.Image(type="pil", label="Photo visage (référence)", value=None)
             prompt = gr.Textbox(label="Prompt", value=EX_PROMPT, lines=3)
             negative = gr.Textbox(label="Negative Prompt", value=EX_NEG, lines=3)
-            id_strength = gr.Slider(0.5, 1.0, value=0.85, step=0.05, label="Force identité (InstantID)")
-            lora_scale  = gr.Slider(0.0, 1.5, value=1.05, step=0.05, label="Force du style One Piece")
-            cfg         = gr.Slider(1, 12, value=7.0, step=0.5, label="CFG (guidance)")
-            steps       = gr.Slider(10, 60, value=30, step=1, label="Steps")
-            width       = gr.Dropdown(choices=[512, 576, 640, 704, 768], value=640, label="Largeur")
-            height      = gr.Dropdown(choices=[640, 704, 768], value=768, label="Hauteur")
-            seed        = gr.Number(value=-1, label="Seed (-1 aléatoire)")
-            btn         = gr.Button("🎨 Générer", variant="primary")
         with gr.Column():
             out_image = gr.Image(label="Résultat", interactive=False)
             err_box   = gr.Textbox(label="Erreurs", visible=False)
-            log_box   = gr.Textbox(label="Logs", value="\n".join(load_logs), lines=10)
     def wrap(*args):
         img, err, logs = generate(*args)
@@ -183,7 +165,7 @@ with gr.Blocks(css="footer{display:none !important}") as demo:
     btn.click(
         wrap,
-        inputs=[ref_face, prompt, negative, id_strength, lora_scale, cfg, steps, width, height, seed],
         outputs=[out_image, err_box, log_box],
     )

+# app.py — InstantID (SDXL) minimal + UI simple
+# Télécharge la pipeline custom depuis le Space officiel et lance InstantID (IdentityNet uniquement).
+import os, traceback, importlib.util
+# Evite l'erreur libgomp
 os.environ.setdefault("OMP_NUM_THREADS", "4")
 os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
 import torch, gradio as gr
 from PIL import Image, ImageOps
+from huggingface_hub import hf_hub_download
+from diffusers.models import ControlNetModel
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype  = torch.float16 if device == "cuda" else torch.float32
+# --------- Téléchargements nécessaires depuis le Space officiel InstantID ---------
+# 1) pipeline custom (fichier .py)
+PIPE_FILENAME = "pipeline_stable_diffusion_xl_instantid_full.py"
+local_pipeline_path = hf_hub_download(
+    repo_id="InstantX/InstantID",
+    filename=PIPE_FILENAME,
+    local_dir="./instantid"
+)
+# 2) Poids ControlNet IdentityNet + IP-Adapter
+cn_dir = hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
+cn_dir = os.path.dirname(cn_dir)  # ./checkpoints/ControlNetModel
+hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
+ip_adapter_path = hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
+# --------- Import dynamique de la classe pipeline depuis le fichier téléchargé ---------
+spec = importlib.util.spec_from_file_location("instantid_pipeline", local_pipeline_path)
+mod  = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(mod)
+StableDiffusionXLInstantIDPipeline = mod.StableDiffusionXLInstantIDPipeline
+draw_kps = mod.draw_kps  # pour les landmarks
+# --------- Base model SDXL (plus stylé que SDXL base pour anime) ---------
+# Remplace par "stabilityai/stable-diffusion-xl-base-1.0" si tu préfères un rendu neutre
+BASE_MODEL = "wangqixun/YamerMIX_v8"
+# --------- Chargement pipeline + IdentityNet ---------
 load_logs = []
 try:
+    load_logs.append("Chargement ControlNet IdentityNet…")
+    controlnet_identitynet = ControlNetModel.from_pretrained(cn_dir, torch_dtype=dtype)
+    load_logs.append(f"Chargement pipeline InstantID (base={BASE_MODEL})…")
+    pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
+        BASE_MODEL,
+        controlnet=[controlnet_identitynet],  # seulement IdentityNet
+        torch_dtype=dtype,
+        safety_checker=None,
+        feature_extractor=None,
+    ).to(device)
+    # Charger l’adapter InstantID (ip-adapter.bin)
+    pipe.load_ip_adapter_instantid(ip_adapter_path)
+    if device == "cuda":
+        pipe.image_proj_model.to("cuda")
+        pipe.unet.to("cuda")
+    load_logs.append("✅ InstantID prêt.")
 except Exception:
     load_logs += ["❌ ERREUR au chargement:", traceback.format_exc()]
+    pipe = None
     print("\n".join(load_logs))
 if pipe is None:
+    raise RuntimeError("Échec de chargement du pipeline. Voir logs container.")
+# --------- Face encoder (InsightFace) pour landmarks + embedding ---------
+from insightface.app import FaceAnalysis
+fa = FaceAnalysis(name="antelopev2", root="./", providers=["CPUExecutionProvider"])
+fa.prepare(ctx_id=0, det_size=(640, 640))
+def extract_face_info(pil_img: Image.Image):
+    import numpy as np, cv2
+    img_cv2 = cv2.cvtColor(np.array(pil_img.convert("RGB")), cv2.COLOR_RGB2BGR)
+    faces = fa.get(img_cv2)
+    if not faces:
+        raise ValueError("Aucun visage détecté. Utilise une photo portrait nette.")
+    face = faces[-1]  # plus grand visage
+    # kps = (5,2), bbox etc.
+    kps_image = draw_kps(pil_img, face["kps"])
+    return face, kps_image
+# --------- Inference ---------
+def generate(face_image, prompt, negative_prompt, identity_strength, adapter_strength, steps, cfg, width, height, seed):
     try:
+        if face_image is None:
+            return None, "Merci d'ajouter une photo visage.", "\n".join(load_logs)
+        gen = None if seed is None or int(seed) < 0 else torch.Generator(device=device).manual_seed(int(seed))
+        # Recadrage conseillé pour meilleure stabilité
+        face = ImageOps.exif_transpose(face_image).convert("RGB")
+        ms = min(face.size); x=(face.width-ms)//2; y=(face.height-ms)//2
+        face_cropped = face.crop((x, y, x+ms, y+ms)).resize((512,512), Image.Resampling.LANCZOS)
+        # Embedding + landmarks avec InsightFace
+        face_info, face_kps = extract_face_info(face_cropped)
+        # La pipeline s’occupe d’utiliser l’embedding interne; on lui passe:
+        #  - image = landmarks (kps) pour IdentityNet
+        #  - image_embeds = embedding visage calculé en interne
+        pipe.set_ip_adapter_scale(float(adapter_strength))
+        images = pipe(
             prompt=prompt.strip(),
             negative_prompt=(negative_prompt or "").strip(),
+            image=face_kps,                            # contrôle spatial (landmarks)
+            controlnet_conditioning_scale=float(identity_strength),
+            num_inference_steps=int(steps),
+            guidance_scale=float(cfg),
             width=int(width),
             height=int(height),
             generator=gen,
+        ).images
+        return images[0], "", "\n".join(load_logs)
     except torch.cuda.OutOfMemoryError as oom:
+        msg = "CUDA OOM: baisse la résolution (ex: 704×896 → 576×704), steps 24–28, CFG 5–7."
+        return None, f"{msg}\n{oom}", "\n".join(load_logs)
+    except Exception as e:
+        return None, "Erreur:\n"+traceback.format_exc(), "\n".join(load_logs)
+# --------- UI ---------
 EX_PROMPT = (
+    "one piece style, Eiichiro Oda style, anime portrait, upper body, straw hat, pirate outfit, "
+    "clean lineart, cel shading, vibrant colors, expressive eyes, symmetrical face, looking at camera, "
+    "dynamic lighting, simple background, high detail"
 )
 EX_NEG = (
+    "low quality, worst quality, lowres, blurry, noisy, watermark, text, logo, jpeg artifacts, "
+    "bad anatomy, distorted eyes, cross-eye, asymmetrical eyes, deformed, multiple faces, nsfw"
 )
 with gr.Blocks(css="footer{display:none !important}") as demo:
+    gr.Markdown("# 🏴‍☠️ One Piece — InstantID (SDXL)")
     with gr.Row():
         with gr.Column():
+            face_image = gr.Image(type="pil", label="Photo visage", height=360)
             prompt = gr.Textbox(label="Prompt", value=EX_PROMPT, lines=3)
             negative = gr.Textbox(label="Negative Prompt", value=EX_NEG, lines=3)
+            identity_strength = gr.Slider(0.2, 1.5, value=0.85, step=0.05, label="IdentityNet strength (fidélité)")
+            adapter_strength  = gr.Slider(0.2, 1.5, value=0.85, step=0.05, label="Adapter strength (détails)")
+            steps  = gr.Slider(10, 60, value=30, step=1, label="Steps")
+            cfg    = gr.Slider(0.1, 12.0, value=5.0, step=0.1, label="CFG")
+            width  = gr.Dropdown(choices=[576, 640, 704, 768, 896], value=704, label="Largeur")
+            height = gr.Dropdown(choices=[704, 768, 896, 1024], value=896, label="Hauteur")
+            seed   = gr.Number(value=-1, label="Seed (-1 aléatoire)")
+            btn    = gr.Button("🎨 Générer", variant="primary")
         with gr.Column():
             out_image = gr.Image(label="Résultat", interactive=False)
             err_box   = gr.Textbox(label="Erreurs", visible=False)
+            log_box   = gr.Textbox(label="Logs", value="\n".join(load_logs), lines=8)
     def wrap(*args):
         img, err, logs = generate(*args)
     btn.click(
         wrap,
+        inputs=[face_image, prompt, negative, identity_strength, adapter_strength, steps, cfg, width, height, seed],
         outputs=[out_image, err_box, log_box],
     )