Update controlnet_facefix.py
Browse files- controlnet_facefix.py +161 -55
controlnet_facefix.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# controlnet_facefix.py -
|
| 2 |
import torch
|
| 3 |
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
| 4 |
from PIL import Image
|
|
@@ -7,24 +7,23 @@ import cv2
|
|
| 7 |
import numpy as np
|
| 8 |
|
| 9 |
print("="*60)
|
| 10 |
-
print("FACE-FIX:
|
| 11 |
print("="*60)
|
| 12 |
|
| 13 |
-
# WICHTIG: Dieselben Modelle wie in controlnet_module.py!
|
| 14 |
_components_loaded = False
|
| 15 |
_controlnet_depth = None
|
| 16 |
_controlnet_pose = None
|
| 17 |
_pipeline = None
|
| 18 |
|
| 19 |
def _initialize_components():
|
| 20 |
-
"""Lade
|
| 21 |
global _components_loaded, _controlnet_depth, _controlnet_pose
|
| 22 |
|
| 23 |
if _components_loaded:
|
| 24 |
return True
|
| 25 |
|
| 26 |
try:
|
| 27 |
-
print("1. Lade ControlNet Depth...")
|
| 28 |
_controlnet_depth = ControlNetModel.from_pretrained(
|
| 29 |
"lllyasviel/sd-controlnet-depth",
|
| 30 |
torch_dtype=torch.float16
|
|
@@ -35,7 +34,7 @@ def _initialize_components():
|
|
| 35 |
return False
|
| 36 |
|
| 37 |
try:
|
| 38 |
-
print("2. Lade ControlNet OpenPose...")
|
| 39 |
_controlnet_pose = ControlNetModel.from_pretrained(
|
| 40 |
"lllyasviel/sd-controlnet-openpose",
|
| 41 |
torch_dtype=torch.float16
|
|
@@ -46,70 +45,133 @@ def _initialize_components():
|
|
| 46 |
return False
|
| 47 |
|
| 48 |
_components_loaded = True
|
| 49 |
-
print("✅
|
| 50 |
return True
|
| 51 |
|
| 52 |
def _extract_depth_map(image):
|
| 53 |
-
"""Depth Map
|
| 54 |
try:
|
| 55 |
img_array = np.array(image.convert("RGB"))
|
|
|
|
|
|
|
| 56 |
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
return Image.fromarray(depth_rgb)
|
| 60 |
except Exception as e:
|
| 61 |
print(f"Depth Map Fehler: {e}")
|
| 62 |
-
|
|
|
|
| 63 |
|
| 64 |
-
def
|
| 65 |
-
"""
|
| 66 |
try:
|
| 67 |
img_array = np.array(image.convert("RGB"))
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
except Exception as e:
|
| 72 |
-
print(f"Pose
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
|
| 75 |
def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: int, model_id: str):
|
| 76 |
-
"""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
start_time = time.time()
|
| 84 |
|
| 85 |
# 1. Komponenten initialisieren
|
| 86 |
if not _initialize_components():
|
| 87 |
-
print("❌
|
| 88 |
return image
|
| 89 |
|
| 90 |
-
# 2. Control
|
| 91 |
-
print("
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
# 3. Pipeline erstellen
|
| 96 |
global _pipeline
|
| 97 |
if _pipeline is None:
|
| 98 |
try:
|
| 99 |
-
print("🔄 Lade
|
| 100 |
_pipeline = StableDiffusionControlNetPipeline.from_pretrained(
|
| 101 |
model_id,
|
| 102 |
-
controlnet=[_controlnet_pose, _controlnet_depth],
|
| 103 |
torch_dtype=torch.float16,
|
| 104 |
safety_checker=None,
|
| 105 |
requires_safety_checker=False,
|
| 106 |
)
|
| 107 |
|
| 108 |
-
# Optimierungen
|
| 109 |
_pipeline.enable_attention_slicing()
|
| 110 |
_pipeline.enable_vae_slicing()
|
| 111 |
|
| 112 |
-
print("✅ Pipeline geladen")
|
| 113 |
except Exception as e:
|
| 114 |
print(f"❌ Pipeline Fehler: {e}")
|
| 115 |
return image
|
|
@@ -120,43 +182,87 @@ def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: i
|
|
| 120 |
print(f" Device: {device}")
|
| 121 |
pipeline = _pipeline.to(device)
|
| 122 |
|
| 123 |
-
# 5.
|
| 124 |
-
#
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
# 6. OPTIMIERTE INFERENCE mit minimaler Stärke
|
| 128 |
result = pipeline(
|
| 129 |
-
prompt=
|
| 130 |
-
negative_prompt=
|
| 131 |
-
image=[pose_img, depth_img],
|
| 132 |
-
controlnet_conditioning_scale=[0.
|
| 133 |
-
num_inference_steps=
|
| 134 |
-
guidance_scale=
|
| 135 |
-
generator=torch.Generator(device).manual_seed(seed), #
|
| 136 |
height=512,
|
| 137 |
width=512,
|
| 138 |
).images[0]
|
| 139 |
|
| 140 |
-
# Zurück auf Originalgröße
|
| 141 |
-
if
|
| 142 |
-
result = result.resize(
|
| 143 |
|
| 144 |
duration = time.time() - start_time
|
| 145 |
-
|
| 146 |
-
print(f"
|
| 147 |
-
print(
|
| 148 |
-
print(f"
|
| 149 |
-
print(f"
|
| 150 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
return result
|
| 153 |
|
| 154 |
except Exception as e:
|
| 155 |
-
print(f"\n
|
| 156 |
import traceback
|
| 157 |
traceback.print_exc()
|
| 158 |
return image
|
| 159 |
|
| 160 |
print("="*60)
|
| 161 |
-
print("FACE-FIX
|
| 162 |
print("="*60)
|
|
|
|
| 1 |
+
# controlnet_facefix.py - NUR QUALITÄTSVERBESSERUNG MIT OPENPOSE + DEPTH
|
| 2 |
import torch
|
| 3 |
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
|
| 4 |
from PIL import Image
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
|
| 9 |
print("="*60)
|
| 10 |
+
print("FACE-FIX: QUALITÄTSVERBESSERUNG MIT OPENPOSE + DEPTH")
|
| 11 |
print("="*60)
|
| 12 |
|
|
|
|
| 13 |
_components_loaded = False
|
| 14 |
_controlnet_depth = None
|
| 15 |
_controlnet_pose = None
|
| 16 |
_pipeline = None
|
| 17 |
|
| 18 |
def _initialize_components():
|
| 19 |
+
"""Lade OpenPose und Depth ControlNets"""
|
| 20 |
global _components_loaded, _controlnet_depth, _controlnet_pose
|
| 21 |
|
| 22 |
if _components_loaded:
|
| 23 |
return True
|
| 24 |
|
| 25 |
try:
|
| 26 |
+
print("1. Lade ControlNet Depth (für 3D-Struktur)...")
|
| 27 |
_controlnet_depth = ControlNetModel.from_pretrained(
|
| 28 |
"lllyasviel/sd-controlnet-depth",
|
| 29 |
torch_dtype=torch.float16
|
|
|
|
| 34 |
return False
|
| 35 |
|
| 36 |
try:
|
| 37 |
+
print("2. Lade ControlNet OpenPose (für Pose-Erhaltung)...")
|
| 38 |
_controlnet_pose = ControlNetModel.from_pretrained(
|
| 39 |
"lllyasviel/sd-controlnet-openpose",
|
| 40 |
torch_dtype=torch.float16
|
|
|
|
| 45 |
return False
|
| 46 |
|
| 47 |
_components_loaded = True
|
| 48 |
+
print("✅ OPENPOSE + DEPTH GELADEN")
|
| 49 |
return True
|
| 50 |
|
| 51 |
def _extract_depth_map(image):
|
| 52 |
+
"""Depth Map für maximale Strukturerhaltung"""
|
| 53 |
try:
|
| 54 |
img_array = np.array(image.convert("RGB"))
|
| 55 |
+
|
| 56 |
+
# Konvertiere zu Graustufen
|
| 57 |
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
|
| 58 |
+
|
| 59 |
+
# Wende Gaußschen Blur an für weichere Depth Map
|
| 60 |
+
blurred = cv2.GaussianBlur(gray, (7, 7), 0)
|
| 61 |
+
|
| 62 |
+
# Adaptive Histogram Equalization für bessere Tiefenwahrnehmung
|
| 63 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 64 |
+
enhanced = clahe.apply(blurred)
|
| 65 |
+
|
| 66 |
+
# Invertiere für bessere Depth-Darstellung (helle = nah, dunkel = fern)
|
| 67 |
+
inverted = 255 - enhanced
|
| 68 |
+
|
| 69 |
+
# Normalisiere
|
| 70 |
+
depth_normalized = cv2.normalize(inverted, None, 0, 255, cv2.NORM_MINMAX)
|
| 71 |
+
|
| 72 |
+
# Zu RGB konvertieren
|
| 73 |
+
depth_rgb = cv2.cvtColor(depth_normalized.astype(np.uint8), cv2.COLOR_GRAY2RGB)
|
| 74 |
+
|
| 75 |
return Image.fromarray(depth_rgb)
|
| 76 |
except Exception as e:
|
| 77 |
print(f"Depth Map Fehler: {e}")
|
| 78 |
+
# Fallback: einfache Graustufen
|
| 79 |
+
return image.convert("L").convert("RGB")
|
| 80 |
|
| 81 |
+
def _extract_pose_map(image):
|
| 82 |
+
"""Pose Map mit Fokus auf Gesichtskonturen"""
|
| 83 |
try:
|
| 84 |
img_array = np.array(image.convert("RGB"))
|
| 85 |
+
|
| 86 |
+
# Mehrere Canny-Ebenen für verschiedene Detailstufen
|
| 87 |
+
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
|
| 88 |
+
|
| 89 |
+
# 1. Feine Gesichtsdetails (niedriger Threshold)
|
| 90 |
+
fine_details = cv2.Canny(gray, 20, 60)
|
| 91 |
+
|
| 92 |
+
# 2. Mittlere Konturen
|
| 93 |
+
medium_contours = cv2.Canny(gray, 40, 100)
|
| 94 |
+
|
| 95 |
+
# 3. Starke Kanten
|
| 96 |
+
strong_edges = cv2.Canny(gray, 80, 160)
|
| 97 |
+
|
| 98 |
+
# Kombiniere mit Gewichtung (feine Details stärker gewichtet)
|
| 99 |
+
combined = cv2.addWeighted(fine_details, 0.6, medium_contours, 0.3, 0)
|
| 100 |
+
combined = cv2.addWeighted(combined, 0.8, strong_edges, 0.2, 0)
|
| 101 |
+
|
| 102 |
+
# Minimal dilation für Kontinuität
|
| 103 |
+
kernel = np.ones((1, 1), np.uint8)
|
| 104 |
+
pose_edges = cv2.dilate(combined, kernel, iterations=1)
|
| 105 |
+
|
| 106 |
+
# Konvertiere zu RGB
|
| 107 |
+
pose_rgb = cv2.cvtColor(pose_edges, cv2.COLOR_GRAY2RGB)
|
| 108 |
+
|
| 109 |
+
return Image.fromarray(pose_rgb)
|
| 110 |
except Exception as e:
|
| 111 |
+
print(f"Pose Map Fehler: {e}")
|
| 112 |
+
# Fallback
|
| 113 |
+
edges = cv2.Canny(np.array(image.convert("RGB")), 50, 150)
|
| 114 |
+
return Image.fromarray(edges).convert("RGB")
|
| 115 |
|
| 116 |
def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: int, model_id: str):
|
| 117 |
+
"""
|
| 118 |
+
QUALITÄTSVERBESSERUNG MIT MAXIMALER INHALTSERHALTUNG
|
| 119 |
+
|
| 120 |
+
Verwendet:
|
| 121 |
+
1. OpenPose: Behält exakte Pose und Gesichtsstruktur
|
| 122 |
+
2. Depth: Behält 3D-Struktur und räumliche Anordnung
|
| 123 |
+
|
| 124 |
+
Strategie: MAXIMALE ControlNet-Stärke + Qualitäts-prompts
|
| 125 |
+
"""
|
| 126 |
+
print("\n" + "🔧"*50)
|
| 127 |
+
print("FACE-FIX: QUALITÄTSVERBESSERUNG MIT OPENPOSE+DEPTH")
|
| 128 |
+
print(f" Original: {image.size}")
|
| 129 |
+
print(f" Seed: {seed}")
|
| 130 |
+
print("🔧"*50)
|
| 131 |
|
| 132 |
start_time = time.time()
|
| 133 |
|
| 134 |
# 1. Komponenten initialisieren
|
| 135 |
if not _initialize_components():
|
| 136 |
+
print("❌ OpenPose/Depth konnten nicht geladen werden")
|
| 137 |
return image
|
| 138 |
|
| 139 |
+
# 2. Control Maps erstellen
|
| 140 |
+
print("\n📐 Erstelle Control Maps...")
|
| 141 |
+
original_size = image.size
|
| 142 |
+
|
| 143 |
+
# Standardgröße für ControlNet
|
| 144 |
+
control_size = (512, 512)
|
| 145 |
+
resized_image = image.resize(control_size, Image.Resampling.LANCZOS)
|
| 146 |
+
|
| 147 |
+
# Depth Map (für 3D-Struktur)
|
| 148 |
+
depth_img = _extract_depth_map(resized_image)
|
| 149 |
+
|
| 150 |
+
# Pose Map (für Gesichts- und Körperstruktur)
|
| 151 |
+
pose_img = _extract_pose_map(resized_image)
|
| 152 |
+
|
| 153 |
+
# Optional: Debug speichern
|
| 154 |
+
depth_img.save("debug_depth_enhanced.png")
|
| 155 |
+
pose_img.save("debug_pose_enhanced.png")
|
| 156 |
|
| 157 |
# 3. Pipeline erstellen
|
| 158 |
global _pipeline
|
| 159 |
if _pipeline is None:
|
| 160 |
try:
|
| 161 |
+
print("🔄 Lade Pipeline mit OpenPose + Depth...")
|
| 162 |
_pipeline = StableDiffusionControlNetPipeline.from_pretrained(
|
| 163 |
model_id,
|
| 164 |
+
controlnet=[_controlnet_pose, _controlnet_depth], # OpenPose zuerst, dann Depth
|
| 165 |
torch_dtype=torch.float16,
|
| 166 |
safety_checker=None,
|
| 167 |
requires_safety_checker=False,
|
| 168 |
)
|
| 169 |
|
| 170 |
+
# Optimierungen
|
| 171 |
_pipeline.enable_attention_slicing()
|
| 172 |
_pipeline.enable_vae_slicing()
|
| 173 |
|
| 174 |
+
print("✅ Pipeline mit OpenPose+Depth geladen")
|
| 175 |
except Exception as e:
|
| 176 |
print(f"❌ Pipeline Fehler: {e}")
|
| 177 |
return image
|
|
|
|
| 182 |
print(f" Device: {device}")
|
| 183 |
pipeline = _pipeline.to(device)
|
| 184 |
|
| 185 |
+
# 5. PROMPT-STRATEGIE FÜR QUALITÄTSVERBESSERUNG:
|
| 186 |
+
# Original-Prompt + Qualitäts-Keywords, ABER KEINE neuen Inhalte
|
| 187 |
+
|
| 188 |
+
# Basierend auf originalem Prompt, aber fokus auf Qualität
|
| 189 |
+
if "face" in prompt.lower() or "portrait" in prompt.lower():
|
| 190 |
+
quality_prompt = f"{prompt}, professional portrait, sharp focus, detailed skin, perfect face, clear eyes, high resolution, 8k"
|
| 191 |
+
else:
|
| 192 |
+
quality_prompt = f"{prompt}, high quality, sharp focus, detailed, professional photography, no artifacts"
|
| 193 |
+
|
| 194 |
+
# Negative Prompts für Qualitätsverbesserung
|
| 195 |
+
quality_negative = (
|
| 196 |
+
f"{negative_prompt}, "
|
| 197 |
+
"blurry, out of focus, lowres, low quality, jpeg artifacts, "
|
| 198 |
+
"compression artifacts, pixelated, grainy, noisy, "
|
| 199 |
+
"deformed, distorted, bad anatomy, mutation, ugly"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
# 6. KRITISCHE PARAMETER FÜR INHALTSERHALTUNG:
|
| 203 |
+
# Hohe ControlNet-Stärken für maximale Kontrolle
|
| 204 |
+
# OpenPose: Hoch für Pose-Erhaltung
|
| 205 |
+
# Depth: Hoch für Strukturerhaltung
|
| 206 |
+
|
| 207 |
+
print("\n⚙️ Starte Qualitätsverbesserung mit Parametern:")
|
| 208 |
+
print(f" • OpenPose Strength: 0.95 (sehr hoch für Pose-Erhaltung)")
|
| 209 |
+
print(f" • Depth Strength: 0.85 (hoch für 3D-Struktur)")
|
| 210 |
+
print(f" • Steps: 25")
|
| 211 |
+
print(f" • CFG: 5.0 (niedrig für weniger 'Kreativität')")
|
| 212 |
|
|
|
|
| 213 |
result = pipeline(
|
| 214 |
+
prompt=quality_prompt,
|
| 215 |
+
negative_prompt=quality_negative,
|
| 216 |
+
image=[pose_img, depth_img], # OpenPose zuerst, dann Depth
|
| 217 |
+
controlnet_conditioning_scale=[0.95, 0.85], # SEHR HOHE WERTE
|
| 218 |
+
num_inference_steps=25, # Ausreichend für Qualität
|
| 219 |
+
guidance_scale=5.0, # NIEDRIG für minimale Änderung
|
| 220 |
+
generator=torch.Generator(device).manual_seed(seed), # GLEICHER SEED
|
| 221 |
height=512,
|
| 222 |
width=512,
|
| 223 |
).images[0]
|
| 224 |
|
| 225 |
+
# 7. Zurück auf Originalgröße
|
| 226 |
+
if original_size != (512, 512):
|
| 227 |
+
result = result.resize(original_size, Image.Resampling.LANCZOS)
|
| 228 |
|
| 229 |
duration = time.time() - start_time
|
| 230 |
+
|
| 231 |
+
print(f"\n" + "✅"*50)
|
| 232 |
+
print("✅ QUALITÄTSVERBESSERUNG ABGESCHLOSSEN")
|
| 233 |
+
print(f"✅ Dauer: {duration:.1f}s")
|
| 234 |
+
print(f"✅ Parameter: OpenPose=0.95, Depth=0.85")
|
| 235 |
+
print(f"✅ Gleicher Seed: {seed}")
|
| 236 |
+
print(f"✅ Größe: {original_size} → {result.size}")
|
| 237 |
+
print("✅"*50)
|
| 238 |
+
|
| 239 |
+
# Optional: Vergleich erstellen
|
| 240 |
+
try:
|
| 241 |
+
comparison = Image.new('RGB', (original_size[0] * 2, original_size[1]))
|
| 242 |
+
comparison.paste(image, (0, 0))
|
| 243 |
+
comparison.paste(result, (original_size[0], 0))
|
| 244 |
+
|
| 245 |
+
# Füge Beschriftung hinzu
|
| 246 |
+
from PIL import ImageDraw, ImageFont
|
| 247 |
+
draw = ImageDraw.Draw(comparison)
|
| 248 |
+
|
| 249 |
+
# Einfache Beschriftung
|
| 250 |
+
draw.text((10, 10), "Vorher", fill=(255, 255, 255))
|
| 251 |
+
draw.text((original_size[0] + 10, 10), "Nachher", fill=(255, 255, 255))
|
| 252 |
+
|
| 253 |
+
comparison.save("quality_improvement_comparison.png")
|
| 254 |
+
print(f"📊 Vergleich gespeichert: quality_improvement_comparison.png")
|
| 255 |
+
except Exception as e:
|
| 256 |
+
print(f"⚠️ Konnte Vergleich nicht speichern: {e}")
|
| 257 |
|
| 258 |
return result
|
| 259 |
|
| 260 |
except Exception as e:
|
| 261 |
+
print(f"\n❌ FEHLER: {e}")
|
| 262 |
import traceback
|
| 263 |
traceback.print_exc()
|
| 264 |
return image
|
| 265 |
|
| 266 |
print("="*60)
|
| 267 |
+
print("FACE-FIX BEREIT (OpenPose + Depth)")
|
| 268 |
print("="*60)
|