import spaces # Doit être importé AVANT torch / diffusers sur ZeroGPU import gradio as gr import torch import numpy as np from PIL import Image import cv2 from huggingface_hub import hf_hub_download from diffusers.models import ControlNetModel from pipeline_stable_diffusion_xl_instantid import ( StableDiffusionXLInstantIDPipeline, draw_kps, ) from insightface.app import FaceAnalysis # --------------------------- # Config globale # --------------------------- # ⚠️ Mets ici TON repo diffusers ArtFusion (après conversion + push_to_hub) BASE_MODEL_ID = "Leteint/artfusionXLReal_v16Lightning" #BASE_MODEL_ID = "Niggendar/autismmixSDXL_autismmixPony" #BASE_MODEL_ID = "Meina/MeinaPastel_V7" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32 CHECKPOINT_DIR = "./checkpoints" # --------------------------- # Téléchargement des poids InstantID # --------------------------- def download_checkpoints(): # ControlNet InstantID (dossier ControlNetModel complet) hf_hub_download( repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir=CHECKPOINT_DIR, local_dir_use_symlinks=False, ) hf_hub_download( repo_id="InstantX/InstantID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir=CHECKPOINT_DIR, local_dir_use_symlinks=False, ) # IP-Adapter InstantID hf_hub_download( repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir=CHECKPOINT_DIR, local_dir_use_symlinks=False, ) download_checkpoints() CONTROLNET_PATH = f"{CHECKPOINT_DIR}/ControlNetModel" IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin" # --------------------------- # InsightFace (ID de visage) # --------------------------- def setup_face_analyzer(): # CPU pour éviter les embrouilles GPU dans ZeroGPU app = FaceAnalysis(name="buffalo_l") app.prepare(ctx_id=-1) return app face_app = setup_face_analyzer() def get_face_info(image: Image.Image): """Retourne (embedding, keypoints) pour le plus grand visage de l'image.""" img = np.array(image.convert("RGB")) img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) faces = face_app.get(img_bgr) if len(faces) == 0: raise RuntimeError("Aucun visage détecté sur l'image.") # On prend le plus grand visage face = sorted( faces, key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]), reverse=True, )[0] # Embedding emb = getattr(face, "normed_embedding", None) if emb is None and isinstance(face, dict): emb = face.get("embedding", None) if emb is None: raise RuntimeError("Impossible de récupérer l'embedding du visage.") # Keypoints kps = getattr(face, "kps", None) if kps is None and isinstance(face, dict): kps = face.get("kps", None) if kps is None: raise RuntimeError("Impossible de récupérer les keypoints du visage.") return np.array(emb, dtype=np.float32), np.array(kps) # --------------------------- # Chargement du pipeline InstantID SDXL + ArtFusion # --------------------------- def load_pipeline(): # ControlNet InstantID controlnet = ControlNetModel.from_pretrained( CONTROLNET_PATH, torch_dtype=DTYPE, ) # Pipeline InstantID SDXL avec ArtFusion comme base pipe = StableDiffusionXLInstantIDPipeline.from_pretrained( BASE_MODEL_ID, controlnet=controlnet, torch_dtype=DTYPE, ) if DEVICE == "cuda": pipe.to("cuda") else: pipe.to("cpu") # IP-Adapter InstantID pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH) pipe.set_ip_adapter_scale(0.6) return pipe pipe = load_pipeline() # --------------------------- # Fonction de génération (ZeroGPU) # --------------------------- @spaces.GPU def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5, height=1024, width=768): """ face_image : image contenant le visage de référence prompt : description du corps, tenue, décor, style """ try: if face_image is None: raise gr.Error("Merci de fournir une image de visage.") # On s'assure que le pipe est bien sur le bon device if DEVICE == "cuda": pipe.to("cuda") else: pipe.to("cpu") # 1) Embedding + keypoints du visage face_emb, face_kps = get_face_info(face_image) # (512,), (5,2) typiquement face_emb_batch = face_emb[None] # (1,512) # 2) Génération de l'image de keypoints (condition ControlNet) kps_image = draw_kps(face_image, face_kps) # PIL.Image, comme dans l'exemple officiel # 3) Appel du pipeline InstantID SDXL out = pipe( prompt=prompt, negative_prompt=negative_prompt, image=kps_image, image_embeds=face_emb_batch, num_inference_steps=int(steps), guidance_scale=float(guidance_scale), controlnet_conditioning_scale=0.4, height=int(height), width=int(width), ) return out.images[0] except Exception as e: import traceback traceback.print_exc() raise gr.Error(str(e)) # --------------------------- # UI Gradio # --------------------------- with gr.Blocks() as demo: gr.Markdown("## InstantID + ArtFusion XL Real (SDXL) – Visage → corps généré (ZeroGPU)") with gr.Row(): with gr.Column(): face_img = gr.Image(type="pil", label="Image visage (référence ID)") prompt = gr.Textbox( label="Prompt", lines=3, value=( "photorealistic full body portrait, white European man, realistic skin texture, " "firefighter uniform with detailed fabric, realistic studio lighting, 35mm DSLR, " "sharp focus on face, clean background, high resolution" ), ) neg_prompt = gr.Textbox( label="Negative", value=( "cartoon, anime, painting, illustration, lowres, blurry, deformed, bad anatomy, " "extra limbs, waxy skin, oversharpen, text, watermark" ), ) steps = gr.Slider(5, 60, 30, step=1, label="Steps") guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale") height = gr.Slider(640, 1536, 1024, step=64, label="Height") width = gr.Slider(640, 1024, 768, step=64, label="Width") btn = gr.Button("Generate") with gr.Column(): output = gr.Image(label="Result") btn.click( generate, [face_img, prompt, neg_prompt, steps, guidance, height, width], output, ) demo.launch()