Update controlnet_module.py
Browse files- controlnet_module.py +237 -95
controlnet_module.py
CHANGED
|
@@ -7,11 +7,10 @@ import cv2
|
|
| 7 |
import numpy as np
|
| 8 |
import gradio as gr
|
| 9 |
import torch.nn.functional as F
|
| 10 |
-
# WICHTIG: Importiere die neuen SAM2-Klassen aus Transformers
|
| 11 |
from transformers import Sam2Model, Sam2Processor
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
class ControlNetProgressCallback:
|
| 16 |
def __init__(self, progress, total_steps):
|
| 17 |
self.progress = progress
|
|
@@ -36,7 +35,6 @@ class ControlNetProcessor:
|
|
| 36 |
self.pose_detector = None
|
| 37 |
self.midas_model = None
|
| 38 |
self.midas_transform = None
|
| 39 |
-
# Ändere die Variablennamen für die neue API
|
| 40 |
self.sam_processor = None
|
| 41 |
self.sam_model = None
|
| 42 |
self.sam_initialized = False
|
|
@@ -47,53 +45,45 @@ class ControlNetProcessor:
|
|
| 47 |
return True
|
| 48 |
|
| 49 |
try:
|
| 50 |
-
print("
|
| 51 |
-
|
| 52 |
-
#
|
| 53 |
model_id = "facebook/sam2-hiera-tiny"
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
# Lade Processor und Modell mit der neuen API
|
| 58 |
self.sam_processor = Sam2Processor.from_pretrained(model_id)
|
|
|
|
| 59 |
self.sam_model = Sam2Model.from_pretrained(model_id, torch_dtype=torch.float32).to(self.device)
|
| 60 |
-
|
| 61 |
-
self.sam_model.eval() # Setze Modell in Evaluierungsmodus
|
| 62 |
-
|
| 63 |
|
| 64 |
self.sam_initialized = True
|
| 65 |
print("✅ SAM 2 erfolgreich geladen (via Transformers)")
|
| 66 |
return True
|
| 67 |
|
| 68 |
except Exception as e:
|
| 69 |
-
print(f"❌
|
| 70 |
-
self.sam_initialized = True
|
| 71 |
return False
|
| 72 |
|
| 73 |
def _validate_bbox(self, image, bbox_coords):
|
| 74 |
"""Validiert und korrigiert BBox-Koordinaten"""
|
| 75 |
width, height = image.size
|
| 76 |
|
| 77 |
-
# Extrahiere Koordinaten - unterstützt beide Formate
|
| 78 |
if isinstance(bbox_coords, (list, tuple)) and len(bbox_coords) == 4:
|
| 79 |
x1, y1, x2, y2 = bbox_coords
|
| 80 |
else:
|
| 81 |
-
# Für den Fall, dass Koordinaten einzeln übergeben werden
|
| 82 |
x1, y1, x2, y2 = bbox_coords
|
| 83 |
|
| 84 |
-
# Stelle sicher, dass x1 <= x2 und y1 <= y2
|
| 85 |
x1, x2 = min(x1, x2), max(x1, x2)
|
| 86 |
y1, y2 = min(y1, y2), max(y1, y2)
|
| 87 |
|
| 88 |
-
# Begrenze auf Bildgrenzen
|
| 89 |
x1 = max(0, min(x1, width - 1))
|
| 90 |
y1 = max(0, min(y1, height - 1))
|
| 91 |
x2 = max(0, min(x2, width - 1))
|
| 92 |
y2 = max(0, min(y2, height - 1))
|
| 93 |
|
| 94 |
-
# Stelle sicher, dass BBox gültig ist
|
| 95 |
if x2 - x1 < 10 or y2 - y1 < 10:
|
| 96 |
-
# Fallback auf sinnvolle Größe
|
| 97 |
size = min(width, height) * 0.3
|
| 98 |
x1 = max(0, width/2 - size/2)
|
| 99 |
y1 = max(0, height/2 - size/2)
|
|
@@ -106,7 +96,6 @@ class ControlNetProcessor:
|
|
| 106 |
"""Glättet die Maske für bessere Übergänge"""
|
| 107 |
try:
|
| 108 |
if blur_radius > 0:
|
| 109 |
-
# Verwende median blur für bessere Kantenerhaltung als Gaussian
|
| 110 |
mask_array = cv2.medianBlur(mask_array, blur_radius*2+1)
|
| 111 |
return mask_array
|
| 112 |
except Exception as e:
|
|
@@ -115,127 +104,250 @@ class ControlNetProcessor:
|
|
| 115 |
|
| 116 |
def create_sam_mask(self, image, bbox_coords, mode):
|
| 117 |
"""
|
| 118 |
-
Erstellt präzise Maske mit SAM 2
|
| 119 |
Gibt PIL Image in L-Modus zurück (0=schwarz=erhalten, 255=weiß=verändern)
|
| 120 |
"""
|
| 121 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# 1. SAM2 laden (falls noch nicht geschehen)
|
| 123 |
if not self.sam_initialized:
|
|
|
|
| 124 |
self._lazy_load_sam()
|
| 125 |
|
| 126 |
if self.sam_model is None or self.sam_processor is None:
|
| 127 |
print("⚠️ SAM 2 Model nicht verfügbar, verwende Fallback")
|
| 128 |
return self._create_rectangular_mask(image, bbox_coords, mode)
|
|
|
|
|
|
|
| 129 |
|
| 130 |
-
# 2. Validiere BBox
|
| 131 |
x1, y1, x2, y2 = self._validate_bbox(image, bbox_coords)
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
image_np = np.array(image.convert("RGB"))
|
| 136 |
-
|
| 137 |
-
# 3. Vorbereiten der Eingabe für SAM2
|
| 138 |
-
# BBox im Format [x_min, y_min, x_max, y_max] erstellen
|
| 139 |
-
# Dreifach verschachteltes Format: [[[x1, y1, x2, y2]]]
|
| 140 |
input_boxes = [[[x1, y1, x2, y2]]]
|
| 141 |
-
|
| 142 |
-
|
|
|
|
|
|
|
| 143 |
inputs = self.sam_processor(
|
| 144 |
image_np,
|
| 145 |
input_boxes=input_boxes,
|
| 146 |
return_tensors="pt"
|
| 147 |
).to(self.device)
|
|
|
|
| 148 |
|
| 149 |
-
# 4. Vorhersage
|
| 150 |
-
print(
|
|
|
|
| 151 |
with torch.no_grad():
|
|
|
|
| 152 |
outputs = self.sam_model(**inputs)
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
print(f"
|
| 158 |
-
|
| 159 |
-
# 5. Maske auswählen (erste Maske der ersten Batch-Dimension)
|
| 160 |
-
single_mask = outputs.pred_masks[:, :, 0, :, :] # Shape: [1, 1, 256, 256]
|
| 161 |
-
|
| 162 |
-
print(f"🔍 Single mask shape: {single_mask.shape}")
|
| 163 |
-
print(f"🔍 Single mask dimensions: {single_mask.dim()}")
|
| 164 |
-
|
| 165 |
-
# 6. KRITISCHE KORREKTUR: Direkte Skalierung statt post_process_masks
|
| 166 |
-
import torch.nn.functional as F
|
| 167 |
|
| 168 |
-
# Skaliere die 256x256 Rohmaske direkt auf Ihre Zielgröße (image.height, image.width)
|
| 169 |
final_mask = F.interpolate(
|
| 170 |
-
single_mask,
|
| 171 |
-
size=(image.height, image.width),
|
| 172 |
mode='bilinear',
|
| 173 |
align_corners=False
|
| 174 |
-
).squeeze()
|
| 175 |
-
|
| 176 |
-
print(f"🔍 Final mask shape after interpolation: {final_mask.shape}")
|
| 177 |
|
| 178 |
-
#
|
| 179 |
mask_np = final_mask.sigmoid().cpu().numpy()
|
|
|
|
|
|
|
| 180 |
mask_array = (mask_np > 0.5).astype(np.uint8) * 255
|
|
|
|
| 181 |
|
| 182 |
-
#
|
| 183 |
-
|
|
|
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
|
| 190 |
-
#
|
| 191 |
if mode == "environment_change":
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
return mask
|
| 201 |
-
|
| 202 |
except Exception as e:
|
| 203 |
-
print(
|
| 204 |
-
print(
|
| 205 |
-
print(
|
| 206 |
-
print(f"
|
| 207 |
import traceback
|
| 208 |
traceback.print_exc()
|
| 209 |
print("ℹ️ Fallback auf rechteckige Maske")
|
| 210 |
return self._create_rectangular_mask(image, bbox_coords, mode)
|
| 211 |
-
|
| 212 |
-
|
| 213 |
def _create_rectangular_mask(self, image, bbox_coords, mode):
|
| 214 |
"""Fallback: Erstellt rechteckige Maske"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
from PIL import ImageDraw
|
| 216 |
|
| 217 |
mask = Image.new("L", image.size, 0)
|
|
|
|
| 218 |
|
| 219 |
if bbox_coords and all(coord is not None for coord in bbox_coords):
|
| 220 |
x1, y1, x2, y2 = self._validate_bbox(image, bbox_coords)
|
| 221 |
draw = ImageDraw.Draw(mask)
|
| 222 |
|
| 223 |
if mode == "environment_change":
|
| 224 |
-
# MODUS 1: Alles außer Box verändern
|
| 225 |
draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255)
|
| 226 |
draw.rectangle([x1, y1, x2, y2], fill=0)
|
| 227 |
-
print("
|
| 228 |
else:
|
| 229 |
-
# MODUS 2 & 3: Nur Box verändern
|
| 230 |
draw.rectangle([x1, y1, x2, y2], fill=255)
|
| 231 |
-
print("
|
| 232 |
|
|
|
|
| 233 |
return mask
|
| 234 |
|
| 235 |
def load_pose_detector(self):
|
| 236 |
"""Lädt nur den Pose-Detector"""
|
| 237 |
if self.pose_detector is None:
|
| 238 |
-
print("
|
|
|
|
|
|
|
| 239 |
try:
|
| 240 |
self.pose_detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
|
| 241 |
print("✅ Pose-Detector geladen")
|
|
@@ -246,7 +358,9 @@ class ControlNetProcessor:
|
|
| 246 |
def load_midas_model(self):
|
| 247 |
"""Lädt MiDaS Model für Depth Maps"""
|
| 248 |
if self.midas_model is None:
|
| 249 |
-
print("
|
|
|
|
|
|
|
| 250 |
try:
|
| 251 |
import torchvision.transforms as T
|
| 252 |
|
|
@@ -275,6 +389,9 @@ class ControlNetProcessor:
|
|
| 275 |
|
| 276 |
def extract_pose_simple(self, image):
|
| 277 |
"""Einfache Pose-Extraktion ohne komplexe Abhängigkeiten"""
|
|
|
|
|
|
|
|
|
|
| 278 |
try:
|
| 279 |
img_array = np.array(image.convert("RGB"))
|
| 280 |
edges = cv2.Canny(img_array, 100, 200)
|
|
@@ -287,12 +404,18 @@ class ControlNetProcessor:
|
|
| 287 |
|
| 288 |
def extract_pose(self, image):
|
| 289 |
"""Extrahiert Pose-Map aus Bild mit Fallback"""
|
|
|
|
|
|
|
|
|
|
| 290 |
try:
|
| 291 |
detector = self.load_pose_detector()
|
| 292 |
if detector is None:
|
|
|
|
| 293 |
return self.extract_pose_simple(image)
|
| 294 |
|
|
|
|
| 295 |
pose_image = detector(image, hand_and_face=True)
|
|
|
|
| 296 |
return pose_image
|
| 297 |
except Exception as e:
|
| 298 |
print(f"Fehler bei Pose-Extraktion: {e}")
|
|
@@ -300,6 +423,9 @@ class ControlNetProcessor:
|
|
| 300 |
|
| 301 |
def extract_canny_edges(self, image):
|
| 302 |
"""Extrahiert Canny Edges für Umgebungserhaltung"""
|
|
|
|
|
|
|
|
|
|
| 303 |
try:
|
| 304 |
img_array = np.array(image.convert("RGB"))
|
| 305 |
|
|
@@ -319,6 +445,9 @@ class ControlNetProcessor:
|
|
| 319 |
"""
|
| 320 |
Extrahiert Depth Map mit MiDaS (Fallback auf Filter)
|
| 321 |
"""
|
|
|
|
|
|
|
|
|
|
| 322 |
try:
|
| 323 |
midas = self.load_midas_model()
|
| 324 |
if midas is not None:
|
|
@@ -329,6 +458,7 @@ class ControlNetProcessor:
|
|
| 329 |
img_transformed = self.midas_transform(image).unsqueeze(0).to(self.device)
|
| 330 |
|
| 331 |
with torch.no_grad():
|
|
|
|
| 332 |
prediction = midas(img_transformed)
|
| 333 |
prediction = torch.nn.functional.interpolate(
|
| 334 |
prediction.unsqueeze(1),
|
|
@@ -339,6 +469,7 @@ class ControlNetProcessor:
|
|
| 339 |
|
| 340 |
depth_np = prediction.cpu().numpy()
|
| 341 |
depth_min, depth_max = depth_np.min(), depth_np.max()
|
|
|
|
| 342 |
|
| 343 |
if depth_max > depth_min:
|
| 344 |
depth_np = (depth_np - depth_min) / (depth_max - depth_min)
|
|
@@ -372,22 +503,33 @@ class ControlNetProcessor:
|
|
| 372 |
"""
|
| 373 |
ERSTELLT NUR CONDITIONING-MAPS, generiert KEIN Bild.
|
| 374 |
"""
|
| 375 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
if keep_environment:
|
| 378 |
print(" Modus: Depth + Canny")
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
|
|
|
| 383 |
else:
|
| 384 |
print(" Modus: OpenPose + Canny")
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
return conditioning_images
|
| 392 |
|
| 393 |
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
import gradio as gr
|
| 9 |
import torch.nn.functional as F
|
|
|
|
| 10 |
from transformers import Sam2Model, Sam2Processor
|
| 11 |
+
from scipy import ndimage
|
| 12 |
|
| 13 |
+
# === CONTROLNET FORTSCHRITTS-CALLBACK (Für Gradio-UI) ===
|
|
|
|
| 14 |
class ControlNetProgressCallback:
|
| 15 |
def __init__(self, progress, total_steps):
|
| 16 |
self.progress = progress
|
|
|
|
| 35 |
self.pose_detector = None
|
| 36 |
self.midas_model = None
|
| 37 |
self.midas_transform = None
|
|
|
|
| 38 |
self.sam_processor = None
|
| 39 |
self.sam_model = None
|
| 40 |
self.sam_initialized = False
|
|
|
|
| 45 |
return True
|
| 46 |
|
| 47 |
try:
|
| 48 |
+
print("#" * 80)
|
| 49 |
+
print("# 🔄 LADE SAM 2 (Segment Anything Model 2)")
|
| 50 |
+
print("#" * 80)
|
| 51 |
model_id = "facebook/sam2-hiera-tiny"
|
| 52 |
+
|
| 53 |
+
print(f"📥 Modell-ID: {model_id}")
|
| 54 |
+
print(f"📥 Lade Processor...")
|
|
|
|
| 55 |
self.sam_processor = Sam2Processor.from_pretrained(model_id)
|
| 56 |
+
print(f"📥 Lade Modell...")
|
| 57 |
self.sam_model = Sam2Model.from_pretrained(model_id, torch_dtype=torch.float32).to(self.device)
|
| 58 |
+
self.sam_model.eval()
|
|
|
|
|
|
|
| 59 |
|
| 60 |
self.sam_initialized = True
|
| 61 |
print("✅ SAM 2 erfolgreich geladen (via Transformers)")
|
| 62 |
return True
|
| 63 |
|
| 64 |
except Exception as e:
|
| 65 |
+
print(f"❌ FEHLER beim Laden von SAM 2: {str(e)[:200]}")
|
| 66 |
+
self.sam_initialized = True
|
| 67 |
return False
|
| 68 |
|
| 69 |
def _validate_bbox(self, image, bbox_coords):
|
| 70 |
"""Validiert und korrigiert BBox-Koordinaten"""
|
| 71 |
width, height = image.size
|
| 72 |
|
|
|
|
| 73 |
if isinstance(bbox_coords, (list, tuple)) and len(bbox_coords) == 4:
|
| 74 |
x1, y1, x2, y2 = bbox_coords
|
| 75 |
else:
|
|
|
|
| 76 |
x1, y1, x2, y2 = bbox_coords
|
| 77 |
|
|
|
|
| 78 |
x1, x2 = min(x1, x2), max(x1, x2)
|
| 79 |
y1, y2 = min(y1, y2), max(y1, y2)
|
| 80 |
|
|
|
|
| 81 |
x1 = max(0, min(x1, width - 1))
|
| 82 |
y1 = max(0, min(y1, height - 1))
|
| 83 |
x2 = max(0, min(x2, width - 1))
|
| 84 |
y2 = max(0, min(y2, height - 1))
|
| 85 |
|
|
|
|
| 86 |
if x2 - x1 < 10 or y2 - y1 < 10:
|
|
|
|
| 87 |
size = min(width, height) * 0.3
|
| 88 |
x1 = max(0, width/2 - size/2)
|
| 89 |
y1 = max(0, height/2 - size/2)
|
|
|
|
| 96 |
"""Glättet die Maske für bessere Übergänge"""
|
| 97 |
try:
|
| 98 |
if blur_radius > 0:
|
|
|
|
| 99 |
mask_array = cv2.medianBlur(mask_array, blur_radius*2+1)
|
| 100 |
return mask_array
|
| 101 |
except Exception as e:
|
|
|
|
| 104 |
|
| 105 |
def create_sam_mask(self, image, bbox_coords, mode):
|
| 106 |
"""
|
| 107 |
+
Erstellt präzise Maske mit SAM 2 und Nachbearbeitung
|
| 108 |
Gibt PIL Image in L-Modus zurück (0=schwarz=erhalten, 255=weiß=verändern)
|
| 109 |
"""
|
| 110 |
try:
|
| 111 |
+
print("#" * 80)
|
| 112 |
+
print("# 🎯 STARTE SAM 2 SEGMENTIERUNG")
|
| 113 |
+
print("#" * 80)
|
| 114 |
+
print(f"📐 Eingabebild-Größe: {image.size}")
|
| 115 |
+
print(f"🎛️ Ausgewählter Modus: {mode}")
|
| 116 |
+
|
| 117 |
# 1. SAM2 laden (falls noch nicht geschehen)
|
| 118 |
if not self.sam_initialized:
|
| 119 |
+
print("📥 SAM 2 ist noch nicht geladen, starte Lazy Loading...")
|
| 120 |
self._lazy_load_sam()
|
| 121 |
|
| 122 |
if self.sam_model is None or self.sam_processor is None:
|
| 123 |
print("⚠️ SAM 2 Model nicht verfügbar, verwende Fallback")
|
| 124 |
return self._create_rectangular_mask(image, bbox_coords, mode)
|
| 125 |
+
else:
|
| 126 |
+
print("✅ SAM 2 Modell ist geladen und bereit")
|
| 127 |
|
| 128 |
+
# 2. Validiere BBox
|
| 129 |
x1, y1, x2, y2 = self._validate_bbox(image, bbox_coords)
|
| 130 |
+
print("-" * 60)
|
| 131 |
+
print(f"📦 BOUNDING BOX DETAILS:")
|
| 132 |
+
print(f" Ursprüngliche Koordinaten: {bbox_coords}")
|
| 133 |
+
print(f" Validierte Koordinaten: [{x1}, {y1}, {x2}, {y2}]")
|
| 134 |
+
print(f" BBox Dimensionen: {x2-x1}px × {y2-y1}px")
|
| 135 |
+
|
| 136 |
+
# 3. Vorbereitung für SAM2
|
| 137 |
+
print("-" * 60)
|
| 138 |
+
print("🖼️ BILDAUFBEREITUNG FÜR SAM 2")
|
| 139 |
image_np = np.array(image.convert("RGB"))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
input_boxes = [[[x1, y1, x2, y2]]]
|
| 141 |
+
print(f" Konvertiere Bild zu NumPy Array: {image_np.shape}")
|
| 142 |
+
print(f" Erstelle Input Boxes: {input_boxes}")
|
| 143 |
+
|
| 144 |
+
print(" Verarbeite Bild mit SAM 2 Processor...")
|
| 145 |
inputs = self.sam_processor(
|
| 146 |
image_np,
|
| 147 |
input_boxes=input_boxes,
|
| 148 |
return_tensors="pt"
|
| 149 |
).to(self.device)
|
| 150 |
+
print(f"✅ Processor-Ausgabe: {len(inputs)} Elemente")
|
| 151 |
|
| 152 |
+
# 4. SAM2 Vorhersage
|
| 153 |
+
print("-" * 60)
|
| 154 |
+
print("🧠 SAM 2 INFERENZ (Vorhersage)")
|
| 155 |
with torch.no_grad():
|
| 156 |
+
print(" Führe Vorhersage durch...")
|
| 157 |
outputs = self.sam_model(**inputs)
|
| 158 |
+
print(f"✅ Vorhersage abgeschlossen")
|
| 159 |
+
|
| 160 |
+
# 5. Maske extrahieren und auf Originalgröße skalieren
|
| 161 |
+
single_mask = outputs.pred_masks[:, :, 0, :, :]
|
| 162 |
+
print(f" Rohmaske Shape vor Interpolation: {single_mask.shape}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
|
|
|
| 164 |
final_mask = F.interpolate(
|
| 165 |
+
single_mask,
|
| 166 |
+
size=(image.height, image.width),
|
| 167 |
mode='bilinear',
|
| 168 |
align_corners=False
|
| 169 |
+
).squeeze()
|
| 170 |
+
print(f" Maske nach Interpolation: {final_mask.shape}")
|
|
|
|
| 171 |
|
| 172 |
+
# 6. In NumPy konvertieren und Schwellenwert anwenden
|
| 173 |
mask_np = final_mask.sigmoid().cpu().numpy()
|
| 174 |
+
print(f" Nach Sigmoid und CPU: {mask_np.shape}, Wertebereich: [{mask_np.min():.3f}, {mask_np.max():.3f}]")
|
| 175 |
+
|
| 176 |
mask_array = (mask_np > 0.5).astype(np.uint8) * 255
|
| 177 |
+
print(f" Nach Threshold (0.5): {mask_array.shape}, Unique Werte: {np.unique(mask_array)}")
|
| 178 |
|
| 179 |
+
# 7. BEIDE MASKEN ERSTELLEN (vor Nachbearbeitung)
|
| 180 |
+
original_mask_array = mask_array.copy() # Person weiß (255), Hintergrund schwarz (0)
|
| 181 |
+
inverted_mask_array = 255 - mask_array # Person schwarz (0), Hintergrund weiß (255)
|
| 182 |
|
| 183 |
+
print("-" * 60)
|
| 184 |
+
print(f"🔧 STARTE NACHBEARBEITUNG FÜR MODUS: {mode}")
|
| 185 |
+
print(f" Original-Maske (Person weiß): {original_mask_array.shape}")
|
| 186 |
+
print(f" Invertierte Maske (Person schwarz): {inverted_mask_array.shape}")
|
| 187 |
|
| 188 |
+
# 8. MODUS-SPEZIFISCHE NACHBEARBEITUNG
|
| 189 |
if mode == "environment_change":
|
| 190 |
+
print("🌳 MODUS: UMWELT ÄNDERN")
|
| 191 |
+
# Arbeite auf der INVERTIERTEN Maske (Person schwarz, Hintergrund weiß)
|
| 192 |
+
mask_array = inverted_mask_array.copy()
|
| 193 |
+
print(" Arbeite auf invertierter Maske (Person schwarz, Hintergrund weiß)")
|
| 194 |
+
|
| 195 |
+
# Größte weiße Komponente finden (Hintergrund)
|
| 196 |
+
labeled_array, num_features = ndimage.label(mask_array)
|
| 197 |
+
print(f" Gefundene weiße Komponenten (Hintergrund): {num_features}")
|
| 198 |
+
|
| 199 |
+
# Nur wenn wir mehrere weiße Komponenten haben (z.B. Hintergrund durch Person geteilt)
|
| 200 |
+
if num_features > 1:
|
| 201 |
+
# Finde alle weißen Komponenten
|
| 202 |
+
sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
|
| 203 |
+
print(f" Größen der weißen Komponenten: {sizes}")
|
| 204 |
+
|
| 205 |
+
# Verbinde alle weißen Komponenten (Hintergrundteile)
|
| 206 |
+
for i in range(1, num_features + 1):
|
| 207 |
+
mask_array = np.where(labeled_array == i, 255, mask_array)
|
| 208 |
+
print(f" ✅ Verbinde {num_features} Hintergrund-Komponenten")
|
| 209 |
+
|
| 210 |
+
# Morphologische Operationen für saubere Umgebung
|
| 211 |
+
kernel = np.ones((5,5), np.uint8)
|
| 212 |
+
print(f" Wende MORPH_CLOSE an (Kernel 5x5) um schwarze Löcher zu füllen...")
|
| 213 |
+
mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
|
| 214 |
+
print(f" Wende MORPH_OPEN an (Kernel 5x5) um kleine weiße Inseln zu entfernen...")
|
| 215 |
+
mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel)
|
| 216 |
+
|
| 217 |
+
# Umgebung erweitern für besseren Personenschutz (2 Pixel)
|
| 218 |
+
print(f" Wende DILATE an (Kernel 2x2) für Personenschutz...")
|
| 219 |
+
mask_array = cv2.dilate(mask_array, np.ones((2,2), np.uint8), iterations=1)
|
| 220 |
+
|
| 221 |
+
# Leichte Unschärfe für natürlichere Übergänge
|
| 222 |
+
print(f" Wende GaussianBlur an (Kernel 3x3) für glatte Übergänge...")
|
| 223 |
+
mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
|
| 224 |
+
|
| 225 |
+
print(" ✅ Umwelt-Modus: Person geschützt, Hintergrund optimiert")
|
| 226 |
+
|
| 227 |
+
elif mode == "focus_change":
|
| 228 |
+
print("🎯 MODUS: FOCUS ÄNDERN")
|
| 229 |
+
# Arbeite auf der ORIGINAL-Maske (Person weiß, Hintergrund schwarz)
|
| 230 |
+
mask_array = original_mask_array.copy()
|
| 231 |
+
print(" Arbeite auf originaler Maske (Person weiß, Hintergrund schwarz)")
|
| 232 |
+
|
| 233 |
+
# Größte weiße Komponente behalten (Person)
|
| 234 |
+
labeled_array, num_features = ndimage.label(mask_array)
|
| 235 |
+
print(f" Gefundene weiße Komponenten (Person): {num_features}")
|
| 236 |
+
|
| 237 |
+
if num_features > 1:
|
| 238 |
+
sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
|
| 239 |
+
print(f" Größen der weißen Komponenten: {sizes}")
|
| 240 |
+
largest_component = np.argmax(sizes) + 1
|
| 241 |
+
mask_array = np.where(labeled_array == largest_component, mask_array, 0)
|
| 242 |
+
print(f" ✅ Behalte größte Person-Komponente ({num_features} Komponenten)")
|
| 243 |
+
|
| 244 |
+
# Maske leicht erweitern für bessere Abdeckung
|
| 245 |
+
kernel = np.ones((3,3), np.uint8)
|
| 246 |
+
print(f" Wende DILATE an (Kernel 3x3) für bessere Abdeckung...")
|
| 247 |
+
mask_array = cv2.dilate(mask_array, kernel, iterations=1)
|
| 248 |
+
|
| 249 |
+
# Morphologische Glättung
|
| 250 |
+
print(f" Wende MORPH_CLOSE an (Kernel 3x3) für glatte Kanten...")
|
| 251 |
+
mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
|
| 252 |
+
|
| 253 |
+
print(" ✅ Focus-Modus: Person verändert, Hintergrund geschützt")
|
| 254 |
+
|
| 255 |
+
elif mode == "face_only_change":
|
| 256 |
+
print("👤 MODUS: NUR GESICHT ÄNDERN")
|
| 257 |
+
# Arbeite auf der ORIGINAL-Maske (Person weiß, Hintergrund schwarz)
|
| 258 |
+
mask_array = original_mask_array.copy()
|
| 259 |
+
print(" Arbeite auf originaler Maske (Person weiß, Hintergrund schwarz)")
|
| 260 |
+
|
| 261 |
+
# Größte weiße Komponente behalten (Person)
|
| 262 |
+
labeled_array, num_features = ndimage.label(mask_array)
|
| 263 |
+
print(f" Gefundene weiße Komponenten (Person): {num_features}")
|
| 264 |
+
|
| 265 |
+
if num_features > 1:
|
| 266 |
+
sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
|
| 267 |
+
print(f" Größen der weißen Komponenten: {sizes}")
|
| 268 |
+
largest_component = np.argmax(sizes) + 1
|
| 269 |
+
mask_array = np.where(labeled_array == largest_component, mask_array, 0)
|
| 270 |
+
print(f" ✅ Behalte größte Person-Komponente ({num_features} Komponenten)")
|
| 271 |
+
|
| 272 |
+
# Starke Erosion für präzises Gesicht
|
| 273 |
+
kernel = np.ones((3,3), np.uint8)
|
| 274 |
+
print(f" Wende ERODE an (Kernel 3x3, 2 Iterationen) für präzises Gesicht...")
|
| 275 |
+
mask_array = cv2.erode(mask_array, kernel, iterations=2)
|
| 276 |
+
|
| 277 |
+
# Zusätzliche Präzisions-Erosion
|
| 278 |
+
print(f" Wende zusätzliche ERODE an (Kernel 2x2, 1 Iteration)...")
|
| 279 |
+
mask_array = cv2.erode(mask_array, np.ones((2,2), np.uint8), iterations=1)
|
| 280 |
+
|
| 281 |
+
# Sanfte Glättung der Kanten
|
| 282 |
+
print(f" Wende GaussianBlur an (Kernel 3x3) für glatte Kanten...")
|
| 283 |
+
mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
|
| 284 |
+
|
| 285 |
+
print(" ✅ Gesichts-Modus: Nur Gesicht verändert")
|
| 286 |
+
|
| 287 |
+
# 9. Qualitätskontrolle und Statistik
|
| 288 |
+
white_pixels = np.sum(mask_array > 127)
|
| 289 |
+
total_pixels = mask_array.size
|
| 290 |
+
white_ratio = white_pixels / total_pixels * 100
|
| 291 |
+
black_pixels = total_pixels - white_pixels
|
| 292 |
+
black_ratio = 100 - white_ratio
|
| 293 |
+
|
| 294 |
+
print("-" * 60)
|
| 295 |
+
print("📊 MASKEN-STATISTIK (NACHBEARBEITET)")
|
| 296 |
+
print(f" Weiße Pixel (Veränderungsbereich): {white_pixels:,} ({white_ratio:.1f}%)")
|
| 297 |
+
print(f" Schwarze Pixel (Erhaltungsbereich): {black_pixels:,} ({black_ratio:.1f}%)")
|
| 298 |
+
print(f" Gesamtpixel: {total_pixels:,}")
|
| 299 |
|
| 300 |
+
# 10. Zurück zu PIL Image
|
| 301 |
+
mask = Image.fromarray(mask_array).convert("L")
|
| 302 |
+
|
| 303 |
+
print("#" * 80)
|
| 304 |
+
print(f"✅ SAM 2 SEGMENTIERUNG ABGESCHLOSSEN")
|
| 305 |
+
print(f"📐 Finale Maskengröße: {mask.size}")
|
| 306 |
+
print("#" * 80)
|
| 307 |
return mask
|
| 308 |
+
|
| 309 |
except Exception as e:
|
| 310 |
+
print("❌" * 40)
|
| 311 |
+
print("❌ FEHLER IN SAM 2 SEGMENTIERUNG")
|
| 312 |
+
print("❌" * 40)
|
| 313 |
+
print(f"Fehler: {str(e)[:200]}")
|
| 314 |
import traceback
|
| 315 |
traceback.print_exc()
|
| 316 |
print("ℹ️ Fallback auf rechteckige Maske")
|
| 317 |
return self._create_rectangular_mask(image, bbox_coords, mode)
|
| 318 |
+
|
|
|
|
| 319 |
def _create_rectangular_mask(self, image, bbox_coords, mode):
|
| 320 |
"""Fallback: Erstellt rechteckige Maske"""
|
| 321 |
+
print("#" * 80)
|
| 322 |
+
print("# ⚠️ FALLBACK: ERSTELLE RECHTECKIGE MASKE")
|
| 323 |
+
print("#" * 80)
|
| 324 |
+
|
| 325 |
from PIL import ImageDraw
|
| 326 |
|
| 327 |
mask = Image.new("L", image.size, 0)
|
| 328 |
+
print(f"📐 Erstelle leere Maske: {mask.size}")
|
| 329 |
|
| 330 |
if bbox_coords and all(coord is not None for coord in bbox_coords):
|
| 331 |
x1, y1, x2, y2 = self._validate_bbox(image, bbox_coords)
|
| 332 |
draw = ImageDraw.Draw(mask)
|
| 333 |
|
| 334 |
if mode == "environment_change":
|
|
|
|
| 335 |
draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255)
|
| 336 |
draw.rectangle([x1, y1, x2, y2], fill=0)
|
| 337 |
+
print(f" Modus: Umgebung ändern - BBox geschützt: [{x1}, {y1}, {x2}, {y2}]")
|
| 338 |
else:
|
|
|
|
| 339 |
draw.rectangle([x1, y1, x2, y2], fill=255)
|
| 340 |
+
print(f" Modus: Focus/Gesicht ändern - BBox verändert: [{x1}, {y1}, {x2}, {y2}]")
|
| 341 |
|
| 342 |
+
print("✅ Rechteckige Maske erstellt")
|
| 343 |
return mask
|
| 344 |
|
| 345 |
def load_pose_detector(self):
|
| 346 |
"""Lädt nur den Pose-Detector"""
|
| 347 |
if self.pose_detector is None:
|
| 348 |
+
print("#" * 80)
|
| 349 |
+
print("# 📥 LADE POSE DETECTOR")
|
| 350 |
+
print("#" * 80)
|
| 351 |
try:
|
| 352 |
self.pose_detector = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
|
| 353 |
print("✅ Pose-Detector geladen")
|
|
|
|
| 358 |
def load_midas_model(self):
|
| 359 |
"""Lädt MiDaS Model für Depth Maps"""
|
| 360 |
if self.midas_model is None:
|
| 361 |
+
print("#" * 80)
|
| 362 |
+
print("# 📥 LADE MIDAS MODELL FÜR DEPTH MAPS")
|
| 363 |
+
print("#" * 80)
|
| 364 |
try:
|
| 365 |
import torchvision.transforms as T
|
| 366 |
|
|
|
|
| 389 |
|
| 390 |
def extract_pose_simple(self, image):
|
| 391 |
"""Einfache Pose-Extraktion ohne komplexe Abhängigkeiten"""
|
| 392 |
+
print("#" * 80)
|
| 393 |
+
print("# ⚠️ ERSTELLE EINFACHE POSE-MAP (FALLBACK)")
|
| 394 |
+
print("#" * 80)
|
| 395 |
try:
|
| 396 |
img_array = np.array(image.convert("RGB"))
|
| 397 |
edges = cv2.Canny(img_array, 100, 200)
|
|
|
|
| 404 |
|
| 405 |
def extract_pose(self, image):
|
| 406 |
"""Extrahiert Pose-Map aus Bild mit Fallback"""
|
| 407 |
+
print("#" * 80)
|
| 408 |
+
print("# 🕺 ERSTELLE POSE-MAP")
|
| 409 |
+
print("#" * 80)
|
| 410 |
try:
|
| 411 |
detector = self.load_pose_detector()
|
| 412 |
if detector is None:
|
| 413 |
+
print("⚠️ Kein Pose-Detector verfügbar, verwende Fallback")
|
| 414 |
return self.extract_pose_simple(image)
|
| 415 |
|
| 416 |
+
print(" Extrahiere Pose mit OpenPose...")
|
| 417 |
pose_image = detector(image, hand_and_face=True)
|
| 418 |
+
print("✅ Pose-Map erfolgreich erstellt")
|
| 419 |
return pose_image
|
| 420 |
except Exception as e:
|
| 421 |
print(f"Fehler bei Pose-Extraktion: {e}")
|
|
|
|
| 423 |
|
| 424 |
def extract_canny_edges(self, image):
|
| 425 |
"""Extrahiert Canny Edges für Umgebungserhaltung"""
|
| 426 |
+
print("#" * 80)
|
| 427 |
+
print("# 🎨 ERSTELLE CANNY EDGE MAP")
|
| 428 |
+
print("#" * 80)
|
| 429 |
try:
|
| 430 |
img_array = np.array(image.convert("RGB"))
|
| 431 |
|
|
|
|
| 445 |
"""
|
| 446 |
Extrahiert Depth Map mit MiDaS (Fallback auf Filter)
|
| 447 |
"""
|
| 448 |
+
print("#" * 80)
|
| 449 |
+
print("# 🏔️ ERSTELLE DEPTH MAP")
|
| 450 |
+
print("#" * 80)
|
| 451 |
try:
|
| 452 |
midas = self.load_midas_model()
|
| 453 |
if midas is not None:
|
|
|
|
| 458 |
img_transformed = self.midas_transform(image).unsqueeze(0).to(self.device)
|
| 459 |
|
| 460 |
with torch.no_grad():
|
| 461 |
+
print(" Führe MiDaS Inferenz durch...")
|
| 462 |
prediction = midas(img_transformed)
|
| 463 |
prediction = torch.nn.functional.interpolate(
|
| 464 |
prediction.unsqueeze(1),
|
|
|
|
| 469 |
|
| 470 |
depth_np = prediction.cpu().numpy()
|
| 471 |
depth_min, depth_max = depth_np.min(), depth_np.max()
|
| 472 |
+
print(f" Tiefenwerte: Min={depth_min:.3f}, Max={depth_max:.3f}")
|
| 473 |
|
| 474 |
if depth_max > depth_min:
|
| 475 |
depth_np = (depth_np - depth_min) / (depth_max - depth_min)
|
|
|
|
| 503 |
"""
|
| 504 |
ERSTELLT NUR CONDITIONING-MAPS, generiert KEIN Bild.
|
| 505 |
"""
|
| 506 |
+
print("#" * 80)
|
| 507 |
+
print("# 🎯 STARTE CONTROLNET CONDITIONING-MAP ERSTELLUNG")
|
| 508 |
+
print("#" * 80)
|
| 509 |
+
print(f"📐 Eingabebild-Größe: {image.size}")
|
| 510 |
+
print(f"🎛️ Modus: {'Depth + Canny' if keep_environment else 'OpenPose + Canny'}")
|
| 511 |
|
| 512 |
if keep_environment:
|
| 513 |
print(" Modus: Depth + Canny")
|
| 514 |
+
print(" Schritt 1/2: Extrahiere Depth Map...")
|
| 515 |
+
depth_map = self.extract_depth_map(image)
|
| 516 |
+
print(" Schritt 2/2: Extrahiere Canny Edges...")
|
| 517 |
+
canny_map = self.extract_canny_edges(image)
|
| 518 |
+
conditioning_images = [depth_map, canny_map]
|
| 519 |
else:
|
| 520 |
print(" Modus: OpenPose + Canny")
|
| 521 |
+
print(" Schritt 1/2: Extrahiere Pose Map...")
|
| 522 |
+
pose_map = self.extract_pose(image)
|
| 523 |
+
print(" Schritt 2/2: Extrahiere Canny Edges...")
|
| 524 |
+
canny_map = self.extract_canny_edges(image)
|
| 525 |
+
conditioning_images = [pose_map, canny_map]
|
| 526 |
+
|
| 527 |
+
print("-" * 60)
|
| 528 |
+
print(f"✅ {len(conditioning_images)} CONDITIONING-MAPS ERSTELLT")
|
| 529 |
+
for i, img in enumerate(conditioning_images):
|
| 530 |
+
print(f" Map {i+1}: {img.size}, Modus: {img.mode}")
|
| 531 |
+
print("#" * 80)
|
| 532 |
+
|
| 533 |
return conditioning_images
|
| 534 |
|
| 535 |
|