img2img / app.py
Leteint's picture
Update app.py
64fa7d3 verified
import spaces # Doit être importé AVANT torch / diffusers sur ZeroGPU
import gradio as gr
import torch
import numpy as np
from PIL import Image
import cv2
from huggingface_hub import hf_hub_download
from diffusers.models import ControlNetModel
from pipeline_stable_diffusion_xl_instantid import (
StableDiffusionXLInstantIDPipeline,
draw_kps,
)
from insightface.app import FaceAnalysis
# ---------------------------
# Config globale
# ---------------------------
# ⚠️ Mets ici TON repo diffusers ArtFusion (après conversion + push_to_hub)
BASE_MODEL_ID = "Leteint/artfusionXLReal_v16Lightning"
#BASE_MODEL_ID = "Niggendar/autismmixSDXL_autismmixPony"
#BASE_MODEL_ID = "Meina/MeinaPastel_V7"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
CHECKPOINT_DIR = "./checkpoints"
# ---------------------------
# Téléchargement des poids InstantID
# ---------------------------
def download_checkpoints():
# ControlNet InstantID (dossier ControlNetModel complet)
hf_hub_download(
repo_id="InstantX/InstantID",
filename="ControlNetModel/config.json",
local_dir=CHECKPOINT_DIR,
local_dir_use_symlinks=False,
)
hf_hub_download(
repo_id="InstantX/InstantID",
filename="ControlNetModel/diffusion_pytorch_model.safetensors",
local_dir=CHECKPOINT_DIR,
local_dir_use_symlinks=False,
)
# IP-Adapter InstantID
hf_hub_download(
repo_id="InstantX/InstantID",
filename="ip-adapter.bin",
local_dir=CHECKPOINT_DIR,
local_dir_use_symlinks=False,
)
download_checkpoints()
CONTROLNET_PATH = f"{CHECKPOINT_DIR}/ControlNetModel"
IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin"
# ---------------------------
# InsightFace (ID de visage)
# ---------------------------
def setup_face_analyzer():
# CPU pour éviter les embrouilles GPU dans ZeroGPU
app = FaceAnalysis(name="buffalo_l")
app.prepare(ctx_id=-1)
return app
face_app = setup_face_analyzer()
def get_face_info(image: Image.Image):
"""Retourne (embedding, keypoints) pour le plus grand visage de l'image."""
img = np.array(image.convert("RGB"))
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
faces = face_app.get(img_bgr)
if len(faces) == 0:
raise RuntimeError("Aucun visage détecté sur l'image.")
# On prend le plus grand visage
face = sorted(
faces,
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
reverse=True,
)[0]
# Embedding
emb = getattr(face, "normed_embedding", None)
if emb is None and isinstance(face, dict):
emb = face.get("embedding", None)
if emb is None:
raise RuntimeError("Impossible de récupérer l'embedding du visage.")
# Keypoints
kps = getattr(face, "kps", None)
if kps is None and isinstance(face, dict):
kps = face.get("kps", None)
if kps is None:
raise RuntimeError("Impossible de récupérer les keypoints du visage.")
return np.array(emb, dtype=np.float32), np.array(kps)
# ---------------------------
# Chargement du pipeline InstantID SDXL + ArtFusion
# ---------------------------
def load_pipeline():
# ControlNet InstantID
controlnet = ControlNetModel.from_pretrained(
CONTROLNET_PATH,
torch_dtype=DTYPE,
)
# Pipeline InstantID SDXL avec ArtFusion comme base
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
BASE_MODEL_ID,
controlnet=controlnet,
torch_dtype=DTYPE,
)
if DEVICE == "cuda":
pipe.to("cuda")
else:
pipe.to("cpu")
# IP-Adapter InstantID
pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH)
pipe.set_ip_adapter_scale(0.6)
return pipe
pipe = load_pipeline()
# ---------------------------
# Fonction de génération (ZeroGPU)
# ---------------------------
@spaces.GPU
def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5, height=1024, width=768):
"""
face_image : image contenant le visage de référence
prompt : description du corps, tenue, décor, style
"""
try:
if face_image is None:
raise gr.Error("Merci de fournir une image de visage.")
# On s'assure que le pipe est bien sur le bon device
if DEVICE == "cuda":
pipe.to("cuda")
else:
pipe.to("cpu")
# 1) Embedding + keypoints du visage
face_emb, face_kps = get_face_info(face_image) # (512,), (5,2) typiquement
face_emb_batch = face_emb[None] # (1,512)
# 2) Génération de l'image de keypoints (condition ControlNet)
kps_image = draw_kps(face_image, face_kps) # PIL.Image, comme dans l'exemple officiel
# 3) Appel du pipeline InstantID SDXL
out = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=kps_image,
image_embeds=face_emb_batch,
num_inference_steps=int(steps),
guidance_scale=float(guidance_scale),
controlnet_conditioning_scale=0.4,
height=int(height),
width=int(width),
)
return out.images[0]
except Exception as e:
import traceback
traceback.print_exc()
raise gr.Error(str(e))
# ---------------------------
# UI Gradio
# ---------------------------
with gr.Blocks() as demo:
gr.Markdown("## InstantID + ArtFusion XL Real (SDXL) – Visage → corps généré (ZeroGPU)")
with gr.Row():
with gr.Column():
face_img = gr.Image(type="pil", label="Image visage (référence ID)")
prompt = gr.Textbox(
label="Prompt",
lines=3,
value=(
"photorealistic full body portrait, white European man, realistic skin texture, "
"firefighter uniform with detailed fabric, realistic studio lighting, 35mm DSLR, "
"sharp focus on face, clean background, high resolution"
),
)
neg_prompt = gr.Textbox(
label="Negative",
value=(
"cartoon, anime, painting, illustration, lowres, blurry, deformed, bad anatomy, "
"extra limbs, waxy skin, oversharpen, text, watermark"
),
)
steps = gr.Slider(5, 60, 30, step=1, label="Steps")
guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale")
height = gr.Slider(640, 1536, 1024, step=64, label="Height")
width = gr.Slider(640, 1024, 768, step=64, label="Width")
btn = gr.Button("Generate")
with gr.Column():
output = gr.Image(label="Result")
btn.click(
generate,
[face_img, prompt, neg_prompt, steps, guidance, height, width],
output,
)
demo.launch()