|
|
import spaces |
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import cv2 |
|
|
|
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
from diffusers.models import ControlNetModel |
|
|
from pipeline_stable_diffusion_xl_instantid import ( |
|
|
StableDiffusionXLInstantIDPipeline, |
|
|
draw_kps, |
|
|
) |
|
|
|
|
|
from insightface.app import FaceAnalysis |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_MODEL_ID = "Leteint/artfusionXLReal_v16Lightning" |
|
|
|
|
|
|
|
|
|
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32 |
|
|
|
|
|
CHECKPOINT_DIR = "./checkpoints" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def download_checkpoints(): |
|
|
|
|
|
hf_hub_download( |
|
|
repo_id="InstantX/InstantID", |
|
|
filename="ControlNetModel/config.json", |
|
|
local_dir=CHECKPOINT_DIR, |
|
|
local_dir_use_symlinks=False, |
|
|
) |
|
|
hf_hub_download( |
|
|
repo_id="InstantX/InstantID", |
|
|
filename="ControlNetModel/diffusion_pytorch_model.safetensors", |
|
|
local_dir=CHECKPOINT_DIR, |
|
|
local_dir_use_symlinks=False, |
|
|
) |
|
|
|
|
|
|
|
|
hf_hub_download( |
|
|
repo_id="InstantX/InstantID", |
|
|
filename="ip-adapter.bin", |
|
|
local_dir=CHECKPOINT_DIR, |
|
|
local_dir_use_symlinks=False, |
|
|
) |
|
|
|
|
|
|
|
|
download_checkpoints() |
|
|
|
|
|
CONTROLNET_PATH = f"{CHECKPOINT_DIR}/ControlNetModel" |
|
|
IP_ADAPTER_PATH = f"{CHECKPOINT_DIR}/ip-adapter.bin" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup_face_analyzer(): |
|
|
|
|
|
app = FaceAnalysis(name="buffalo_l") |
|
|
app.prepare(ctx_id=-1) |
|
|
return app |
|
|
|
|
|
|
|
|
face_app = setup_face_analyzer() |
|
|
|
|
|
|
|
|
def get_face_info(image: Image.Image): |
|
|
"""Retourne (embedding, keypoints) pour le plus grand visage de l'image.""" |
|
|
img = np.array(image.convert("RGB")) |
|
|
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) |
|
|
|
|
|
faces = face_app.get(img_bgr) |
|
|
if len(faces) == 0: |
|
|
raise RuntimeError("Aucun visage détecté sur l'image.") |
|
|
|
|
|
|
|
|
face = sorted( |
|
|
faces, |
|
|
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]), |
|
|
reverse=True, |
|
|
)[0] |
|
|
|
|
|
|
|
|
emb = getattr(face, "normed_embedding", None) |
|
|
if emb is None and isinstance(face, dict): |
|
|
emb = face.get("embedding", None) |
|
|
if emb is None: |
|
|
raise RuntimeError("Impossible de récupérer l'embedding du visage.") |
|
|
|
|
|
|
|
|
kps = getattr(face, "kps", None) |
|
|
if kps is None and isinstance(face, dict): |
|
|
kps = face.get("kps", None) |
|
|
if kps is None: |
|
|
raise RuntimeError("Impossible de récupérer les keypoints du visage.") |
|
|
|
|
|
return np.array(emb, dtype=np.float32), np.array(kps) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_pipeline(): |
|
|
|
|
|
controlnet = ControlNetModel.from_pretrained( |
|
|
CONTROLNET_PATH, |
|
|
torch_dtype=DTYPE, |
|
|
) |
|
|
|
|
|
|
|
|
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained( |
|
|
BASE_MODEL_ID, |
|
|
controlnet=controlnet, |
|
|
torch_dtype=DTYPE, |
|
|
) |
|
|
|
|
|
if DEVICE == "cuda": |
|
|
pipe.to("cuda") |
|
|
else: |
|
|
pipe.to("cpu") |
|
|
|
|
|
|
|
|
pipe.load_ip_adapter_instantid(IP_ADAPTER_PATH) |
|
|
pipe.set_ip_adapter_scale(0.6) |
|
|
|
|
|
return pipe |
|
|
|
|
|
|
|
|
pipe = load_pipeline() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@spaces.GPU |
|
|
def generate(face_image, prompt, negative_prompt="", steps=30, guidance_scale=5, height=1024, width=768): |
|
|
""" |
|
|
face_image : image contenant le visage de référence |
|
|
prompt : description du corps, tenue, décor, style |
|
|
""" |
|
|
|
|
|
try: |
|
|
if face_image is None: |
|
|
raise gr.Error("Merci de fournir une image de visage.") |
|
|
|
|
|
|
|
|
if DEVICE == "cuda": |
|
|
pipe.to("cuda") |
|
|
else: |
|
|
pipe.to("cpu") |
|
|
|
|
|
|
|
|
face_emb, face_kps = get_face_info(face_image) |
|
|
face_emb_batch = face_emb[None] |
|
|
|
|
|
|
|
|
kps_image = draw_kps(face_image, face_kps) |
|
|
|
|
|
|
|
|
out = pipe( |
|
|
prompt=prompt, |
|
|
negative_prompt=negative_prompt, |
|
|
image=kps_image, |
|
|
image_embeds=face_emb_batch, |
|
|
num_inference_steps=int(steps), |
|
|
guidance_scale=float(guidance_scale), |
|
|
controlnet_conditioning_scale=0.4, |
|
|
height=int(height), |
|
|
width=int(width), |
|
|
) |
|
|
|
|
|
return out.images[0] |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
raise gr.Error(str(e)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## InstantID + ArtFusion XL Real (SDXL) – Visage → corps généré (ZeroGPU)") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
face_img = gr.Image(type="pil", label="Image visage (référence ID)") |
|
|
|
|
|
prompt = gr.Textbox( |
|
|
label="Prompt", |
|
|
lines=3, |
|
|
value=( |
|
|
"photorealistic full body portrait, white European man, realistic skin texture, " |
|
|
"firefighter uniform with detailed fabric, realistic studio lighting, 35mm DSLR, " |
|
|
"sharp focus on face, clean background, high resolution" |
|
|
), |
|
|
) |
|
|
|
|
|
neg_prompt = gr.Textbox( |
|
|
label="Negative", |
|
|
value=( |
|
|
"cartoon, anime, painting, illustration, lowres, blurry, deformed, bad anatomy, " |
|
|
"extra limbs, waxy skin, oversharpen, text, watermark" |
|
|
), |
|
|
) |
|
|
|
|
|
steps = gr.Slider(5, 60, 30, step=1, label="Steps") |
|
|
guidance = gr.Slider(1.0, 10.0, 5.0, step=0.5, label="Guidance scale") |
|
|
height = gr.Slider(640, 1536, 1024, step=64, label="Height") |
|
|
width = gr.Slider(640, 1024, 768, step=64, label="Width") |
|
|
|
|
|
btn = gr.Button("Generate") |
|
|
|
|
|
with gr.Column(): |
|
|
output = gr.Image(label="Result") |
|
|
|
|
|
btn.click( |
|
|
generate, |
|
|
[face_img, prompt, neg_prompt, steps, guidance, height, width], |
|
|
output, |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|