Stable-ControlNet-GPU / controlnet_facefix.py
Astridkraft's picture
Create controlnet_facefix.py
f0735ee verified
raw
history blame
2.4 kB
import torch
from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel
from controlnet_aux import OpenposeDetector, ZoeDetector
from PIL import Image
# ───── Globale Modelle (einmal laden, bleibt im VRAM) ─────
print("Lade OpenPose_faceonly + Depth für perfekte Gesichter/Hände...")
# OpenPose Face-Only Preprocessor + ControlNet
openpose_face = OpenposeDetector.from_pretrained(
"lllyasviel/ControlNet", model_name="openpose_face"
)
# Depth Preprocessor + ControlNet
depth_processor = ZoeDetector.from_pretrained("lllyasviel/ControlNet")
controlnet_face = ControlNetModel.from_pretrained(
"lllyasviel/control_v11p_sd15_openpose",
subfolder="faceonly",
torch_dtype=torch.float16
).to("cuda")
controlnet_depth = ControlNetModel.from_pretrained(
"lllyasviel/control_v11f1e_sd15_depth",
torch_dtype=torch.float16
).to("cuda")
# Pipeline-Cache (wird erst beim ersten Aufruf erstellt)
_facefix_pipe = None
def _get_facefix_pipeline(model_id: str):
global _facefix_pipe
if _facefix_pipe is None:
print(f"Lade Face-Fix-Pipeline mit Modell: {model_id}")
_facefix_pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
model_id,
controlnet=[controlnet_face, controlnet_depth],
torch_dtype=torch.float16,
safety_checker=None,
requires_safety_checker=False,
).to("cuda")
_facefix_pipe.enable_xformers_memory_efficient_attention()
_facefix_pipe.enable_model_cpu_offload() # spart ~2 GB bei 16 GB Karten
return _facefix_pipe
def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: int, base_model_path: str):
"""
Automatischer 20-Sekunden-Fix für perfekte Gesichter & Hände
"""
pipe = _get_facefix_pipeline(base_model_path)
# Control-Images aus dem generierten Bild erzeugen
pose_img = openpose_face(image)
depth_img = depth_processor(image)
fixed_image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=image,
control_image=[pose_img, depth_img],
controlnet_conditioning_scale=[0.85, 0.60], # Face stark, Depth mittel
strength=0.42,
num_inference_steps=20,
guidance_scale=7.0,
generator=torch.Generator("cuda").manual_seed(seed),
).images[0]
return fixed_image