Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Dec 29, 2025

Commit

4e659cc

verified ·

1 Parent(s): 735c1b0

Update controlnet_module.py

Browse files

Files changed (1) hide show

controlnet_module.py +247 -129

controlnet_module.py CHANGED Viewed

@@ -127,65 +127,116 @@ class ControlNetProcessor:
             # 2. Validiere BBox
             x1, y1, x2, y2 = self._validate_bbox(image, bbox_coords)
             # ============================================================
             # SPEZIALBEHANDLUNG NUR FÜR face_only_change
             # ============================================================
             if mode == "face_only_change":
                 print("-" * 60)
-                print("👤 SPEZIALMODUS: NUR GESICHT - ERSTELLE FOKUSIERTEN AUSSCHNITT")
                 print("-" * 60)
-                # Originalbild und Koordinaten sichern
                 original_image = image
-                original_bbox = (x1, y1, x2, y2)
-                # Puffer um die BBox berechnen (20% der BBox-Größe, mindestens 50px)
-                padding_x = max(50, int((x2 - x1) * 0.2))
-                padding_y = max(50, int((y2 - y1) * 0.2))
-                # Ausschnitt-Koordinaten berechnen (innerhalb der Bildgrenzen)
-                crop_x1 = max(0, x1 - padding_x)
-                crop_y1 = max(0, y1 - padding_y)
-                crop_x2 = min(image.width, x2 + padding_x)
-                crop_y2 = min(image.height, y2 + padding_y)
-                print(f"   📐 Original-BBox: [{x1}, {y1}, {x2}, {y2}]")
-                print(f"   📏 Original-BBox Größe: {x2-x1} × {y2-y1} px")
-                print(f"   🔲 Ausschnitt-Bereich: [{crop_x1}, {crop_y1}, {crop_x2}, {crop_y2}]")
-                print(f"   📏 Ausschnitt-Größe: {crop_x2-crop_x1} × {crop_y2-crop_y1} px")
-                print(f"   📊 Puffer: {padding_x} × {padding_y} px")
                 # Bild ausschneiden
-                cropped_image = image.crop((crop_x1, crop_y1, crop_x2, crop_y2))
-                print(f"   ✅ Ausschnitt erstellt: {cropped_image.size}")
                 # ============================================================
-                # NEU: KONTRASTVERSTÄRKUNG FÜR BESSERE SAM-ERKENNUNG
                 # ============================================================
-                print("   🔍 Wende Kontrastverstärkung an für bessere Segmentierung...")
-                contrast_enhancer = ImageEnhance.Contrast(cropped_image)
-                cropped_image = contrast_enhancer.enhance(1.5)  # 50% mehr Kontrast
-                print("   ✅ Kontrast um 50% erhöht")
-                # BBox-Koordinaten relativ zum Ausschnitt neu berechnen
                 rel_x1 = x1 - crop_x1
                 rel_y1 = y1 - crop_y1
                 rel_x2 = x2 - crop_x1
                 rel_y2 = y2 - crop_y1
-                print(f"   🎯 Relative BBox im Ausschnitt: [{rel_x1}, {rel_y1}, {rel_x2}, {rel_y2}]")
                 print(f"   📏 Relative BBox Größe: {rel_x2-rel_x1} × {rel_y2-rel_y1} px")
-                # Für SAM: Verwende Ausschnitt und relative Koordinaten
-                image = cropped_image
                 x1, y1, x2, y2 = rel_x1, rel_y1, rel_x2, rel_y2
-                print("   🔄 SAM wird auf kontrastverstärktem Ausschnitt (nicht Vollbild) ausgeführt")
             # ============================================================
             # GEMEINSAME SAM-LOGIK FÜR ALLE MODI
-            # (arbeitet auf `image` - bei face_only_change ist das der Ausschnitt)
             # ============================================================
             print("-" * 60)
             print(f"📦 BOUNDING BOX DETAILS FÜR SAM:")
@@ -201,6 +252,11 @@ class ControlNetProcessor:
             print(f"   Konvertiere Bild zu NumPy Array: {image_np.shape}")
             print(f"   Erstelle Input Boxes: {input_boxes}")
             print("   Verarbeite Bild mit SAM 2 Processor...")
             inputs = self.sam_processor(
                 image_np,
@@ -216,40 +272,123 @@ class ControlNetProcessor:
                 print("   Führe Vorhersage durch...")
                 outputs = self.sam_model(**inputs)
                 print(f"✅ Vorhersage abgeschlossen")
             # 5. Maske extrahieren und auf Originalgröße skalieren
-            single_mask = outputs.pred_masks[:, :, 0, :, :]
-            print(f"   Rohmaske Shape vor Interpolation: {single_mask.shape}")
-            final_mask = F.interpolate(
-                single_mask,
-                size=(image.height, image.width),
-                mode='bilinear',
-                align_corners=False
-            ).squeeze()
-            print(f"   Maske nach Interpolation: {final_mask.shape}")
-            # 6. In NumPy konvertieren und Schwellenwert anwenden
-            mask_np = final_mask.sigmoid().cpu().numpy()
-            print(f"   Nach Sigmoid und CPU: {mask_np.shape}, Wertebereich: [{mask_np.min():.3f}, {mask_np.max():.3f}]")
             # ============================================================
-            # KRITISCH: DYNAMISCHER THRESHOLD FÜR UNSICHERE SAM-VORHERSAGEN
             # ============================================================
             max_val = mask_np.max()
-            print(f"   🔍 Maximaler SAM-Konfidenzwert: {max_val:.3f}")
-            # NEUE LOGIK: Unterscheidung basierend auf SAM-Konfidenz
             if max_val < 0.6:
-                # Fall: SAM ist unsicher (wie in Ihrem Log: max_val=0.505)
-                # Verwende festen, niedrigen Threshold
-                dynamic_threshold = 0.2  # Sehr niedrig für unsichere Vorhersagen
                 print(f"   ⚠️  SAM ist unsicher (max_val={max_val:.3f} < 0.6)")
                 print(f"   🎯 Verwende festen niedrigen Threshold: {dynamic_threshold:.3f}")
             else:
-                # Fall: SAM ist sicher
-                # Verwende prozentualen Threshold basierend auf Maximum
-                dynamic_threshold = max_val * 0.95
                 print(f"   ✅ SAM ist sicher (max_val={max_val:.3f} >= 0.6)")
                 print(f"   🎯 Dynamischer Threshold: {dynamic_threshold:.3f} (80% von Maximum)")
@@ -258,71 +397,76 @@ class ControlNetProcessor:
             print(f"   Nach Threshold ({dynamic_threshold:.3f}): {mask_array.shape}, Unique Werte: {unique_vals}")
             # ============================================================
-            # NEU: VORFILTERUNG FÜR KLEINE KOMPONENTEN (RAUSCHEN ENTFERNEN)
             # ============================================================
-            if np.sum(mask_array > 0) > 0:  # Nur wenn weiße Pixel existieren
                 labeled_array, num_features = ndimage.label(mask_array)
                 if num_features > 1:
-                    print(f"   🧹 Vorfilterung: Gefundene Komponenten vor Filterung: {num_features}")
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
-                    min_size = 1000  # Minimale Größe für eine sinnvolle Komponente
-                    # Zähle, wie viele Komponenten die Mindestgröße erreichen
-                    valid_components = 0
                     for i in range(1, num_features + 1):
-                        if sizes[i-1] >= min_size:
-                            valid_components += 1
-                        else:
-                            # Entferne kleine Komponenten (Rauschen)
                             mask_array = np.where(labeled_array == i, 0, mask_array)
-                    print(f"   ✅ Entferne kleine Komponenten (<{min_size}px): {num_features} → {valid_components} Komponenten")
-            # 7. BEIDE MASKEN ERSTELLEN (vor Nachbearbeitung)
             original_mask_array = mask_array.copy()        # Person weiß (255), Hintergrund schwarz (0)
             inverted_mask_array = 255 - mask_array         # Person schwarz (0), Hintergrund weiß (255)
             print("-" * 60)
-            print(f"🔧 STARTE NACHBEARBEITUNG FÜR MODUS: {mode}")
             print(f"   Original-Maske (Person weiß): {original_mask_array.shape}")
             print(f"   Invertierte Maske (Person schwarz): {inverted_mask_array.shape}")
-            # 8. MODUS-SPEZIFISCHE NACHBEARBEITUNG
             if mode == "environment_change":
                 print("🌳 MODUS: UMWELT ÄNDERN")
                 # Arbeite auf der INVERTIERTEN Maske (Person schwarz, Hintergrund weiß)
                 mask_array = inverted_mask_array.copy()
                 print("   Arbeite auf invertierter Maske (Person schwarz, Hintergrund weiß)")
-                # Größte weiße Komponente finden (Hintergrund)
-                labeled_array, num_features = ndimage.label(mask_array)
-                print(f"   Gefundene weiße Komponenten (Hintergrund): {num_features}")
-                # Nur wenn wir mehrere weiße Komponenten haben (z.B. Hintergrund durch Person geteilt)
-                if num_features > 1:
-                    # Finde alle weißen Komponenten
-                    sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
-                    print(f"   Größen der weißen Komponenten: {sizes}")
-                    # Verbinde alle weißen Komponenten (Hintergrundteile)
-                    for i in range(1, num_features + 1):
-                        mask_array = np.where(labeled_array == i, 255, mask_array)
-                    print(f"   ✅ Verbinde {num_features} Hintergrund-Komponenten")
                 # Morphologische Operationen für saubere Umgebung
                 kernel = np.ones((5,5), np.uint8)
-                print(f"   Wende MORPH_CLOSE an (Kernel 5x5) um schwarze Löcher zu füllen...")
                 mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
-                print(f"   Wende MORPH_OPEN an (Kernel 5x5) um kleine weiße Inseln zu entfernen...")
                 mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel)
-                # Umgebung erweitern für besseren Personenschutz (2 Pixel)
-                print(f"   Wende DILATE an (Kernel 2x2) für Personenschutz...")
                 mask_array = cv2.dilate(mask_array, np.ones((2,2), np.uint8), iterations=1)
-                # Leichte Unschärfe für natürlichere Übergänge
-                print(f"   Wende GaussianBlur an (Kernel 3x3) für glatte Übergänge...")
                 mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
                 print("   ✅ Umwelt-Modus: Person geschützt, Hintergrund optimiert")
@@ -335,70 +479,37 @@ class ControlNetProcessor:
                 # Größte weiße Komponente behalten (Person)
                 labeled_array, num_features = ndimage.label(mask_array)
-                print(f"   Gefundene weiße Komponenten (Person): {num_features}")
                 if num_features > 1:
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
-                    print(f"   Größen der weißen Komponenten: {sizes}")
                     largest_component = np.argmax(sizes) + 1
                     mask_array = np.where(labeled_array == largest_component, mask_array, 0)
-                    print(f"   ✅ Behalte größte Person-Komponente ({num_features} Komponenten)")
                 # Maske leicht erweitern für bessere Abdeckung
                 kernel = np.ones((3,3), np.uint8)
-                print(f"   Wende DILATE an (Kernel 3x3) für bessere Abdeckung...")
                 mask_array = cv2.dilate(mask_array, kernel, iterations=1)
-                # Morphologische Glättung
-                print(f"   Wende MORPH_CLOSE an (Kernel 3x3) für glatte Kanten...")
                 mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
                 print("   ✅ Focus-Modus: Person verändert, Hintergrund geschützt")
             elif mode == "face_only_change":
-                print("👤 MODUS: NUR GESICHT ÄNDERN (AUF AUSSCHNITT)")
                 # Arbeite auf der ORIGINAL-Maske (Person weiß, Hintergrund schwarz)
                 mask_array = original_mask_array.copy()
                 print("   Arbeite auf originaler Maske (Person weiß, Hintergrund schwarz)")
-                # Größte weiße Komponente behalten (Person)
-                labeled_array, num_features = ndimage.label(mask_array)
-                print(f"   Gefundene weiße Komponenten auf AUSSCHNITT: {num_features}")
-                if num_features > 0:
-                    sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
-                    print(f"   Größen der weißen Komponenten auf AUSSCHNITT: {sizes[:10]}...")  # Nur erste 10 anzeigen
-                    if num_features > 1:
-                        # WICHTIG: Für Gesicht nehmen wir die GRÖSSTE Komponente im AUSSCHNITT
-                        # (Im Ausschnitt sollte das das Gesicht sein, nicht der Hintergrund)
-                        largest_component = np.argmax(sizes) + 1
-                        mask_array = np.where(labeled_array == largest_component, mask_array, 0)
-                        print(f"   ✅ Behalte größte Komponente im Ausschnitt ({num_features} Komponenten)")
-                        print(f"   📊 Größe der behaltenen Komponente: {sizes[largest_component-1]:,} Pixel")
-                    else:
-                        print(f"   ℹ️ Nur eine Komponente gefunden, behalte diese")
                 # Starke Erosion für präzises Gesicht
                 kernel = np.ones((3,3), np.uint8)
-                print(f"   Wende ERODE an (Kernel 3x3, 2 Iterationen) für präzises Gesicht...")
                 mask_array = cv2.erode(mask_array, kernel, iterations=2)
-                # Zusätzliche Präzisions-Erosion
-                print(f"   Wende zusätzliche ERODE an (Kernel 2x2, 1 Iteration)...")
                 mask_array = cv2.erode(mask_array, np.ones((2,2), np.uint8), iterations=1)
-                # Sanfte Glättung der Kanten
-                print(f"   Wende GaussianBlur an (Kernel 3x3) für glatte Kanten...")
                 mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
-                print("   ✅ Gesichts-Modus: Nachbearbeitung auf Ausschnitt abgeschlossen")
                 # ============================================================
-                # SPEZIALSCHRITT NUR FÜR face_only_change: MASKE ZURÜCKSKALIEREN
                 # ============================================================
                 print("-" * 60)
-                print("🔄 SPEZIALSCHRITT: MASKE VOM AUSSCHNITT ZURÜCK AUF ORIGINALGRÖSSE")
                 # Temporäre Maske aus dem Array erstellen
                 temp_mask = Image.fromarray(mask_array).convert("L")
@@ -428,10 +539,16 @@ class ControlNetProcessor:
             black_ratio = 100 - white_ratio
             print("-" * 60)
-            print("📊 MASKEN-STATISTIK (NACHBEARBEITET)")
             print(f"   Weiße Pixel (Veränderungsbereich): {white_pixels:,} ({white_ratio:.1f}%)")
             print(f"   Schwarze Pixel (Erhaltungsbereich): {black_pixels:,} ({black_ratio:.1f}%)")
             print(f"   Gesamtpixel: {total_pixels:,}")
             # 10. Zurück zu PIL Image
             mask = Image.fromarray(mask_array).convert("L")
@@ -440,6 +557,7 @@ class ControlNetProcessor:
             print(f"✅ SAM 2 SEGMENTIERUNG ABGESCHLOSSEN")
             print(f"📐 Finale Maskengröße: {mask.size}")
             print(f"🎛️  Verwendeter Modus: {mode}")
             print("#" * 80)
             return mask

             # 2. Validiere BBox
             x1, y1, x2, y2 = self._validate_bbox(image, bbox_coords)
+            original_bbox = (x1, y1, x2, y2)
+            original_bbox_size = (x2 - x1, y2 - y1)
+            print(f"📏 Original-BBox Größe: {original_bbox_size[0]} × {original_bbox_size[1]} px")
             # ============================================================
             # SPEZIALBEHANDLUNG NUR FÜR face_only_change
             # ============================================================
             if mode == "face_only_change":
                 print("-" * 60)
+                print("👤 SPEZIALMODUS: NUR GESICHT - EMPFOHLENER WORKFLOW")
                 print("-" * 60)
+                # ============================================================
+                # SCHRITT 1: Originalbild sichern
+                # ============================================================
                 original_image = image
+                print(f"💾 Originalbild gesichert: {original_image.size}")
+                # ============================================================
+                # SCHRITT 2: Crop = BBox × 2.0 (einmal, sauber, quadratisch)
+                # ============================================================
+                print("✂️ SCHRITT 2: ERSTELLE QUADRATISCHEN AUSSCHNITT (BBox × 2.0)")
+                # BBox-Zentrum berechnen
+                bbox_center_x = (x1 + x2) // 2
+                bbox_center_y = (y1 + y2) // 2
+                print(f"   📍 BBox-Zentrum: ({bbox_center_x}, {bbox_center_y})")
+                # Größte Dimension der BBox finden
+                bbox_width = x2 - x1
+                bbox_height = y2 - y1
+                bbox_max_dim = max(bbox_width, bbox_height)
+                print(f"   📏 BBox Dimensionen: {bbox_width} × {bbox_height} px")
+                print(f"   📐 Maximale BBox-Dimension: {bbox_max_dim} px")
+                # Crop-Größe berechnen (BBox × 2.0)
+                crop_size = int(bbox_max_dim * 2.0)
+                print(f"   🎯 Ziel-Crop-Größe: {crop_size} × {crop_size} px (BBox × 2.0)")
+                # Crop-Koordinaten berechnen (zentriert um BBox)
+                crop_x1 = bbox_center_x - crop_size // 2
+                crop_y1 = bbox_center_y - crop_size // 2
+                crop_x2 = crop_x1 + crop_size
+                crop_y2 = crop_y1 + crop_size
+                # Sicherstellen, dass Crop innerhalb der Bildgrenzen bleibt
+                crop_x1 = max(0, crop_x1)
+                crop_y1 = max(0, crop_y1)
+                crop_x2 = min(original_image.width, crop_x2)
+                crop_y2 = min(original_image.height, crop_y2)
+                # Falls Crop zu klein ist, anpassen
+                actual_crop_width = crop_x2 - crop_x1
+                actual_crop_height = crop_y2 - crop_y1
+                if actual_crop_width < crop_size or actual_crop_height < crop_size:
+                    # An Kanten anpassen
+                    if crop_x1 == 0:
+                        crop_x2 = min(original_image.width, crop_size)
+                    elif crop_x2 == original_image.width:
+                        crop_x1 = max(0, original_image.width - crop_size)
+                    if crop_y1 == 0:
+                        crop_y2 = min(original_image.height, crop_size)
+                    elif crop_y2 == original_image.height:
+                        crop_y1 = max(0, original_image.height - crop_size)
+                print(f"   🔲 Crop-Bereich: [{crop_x1}, {crop_y1}, {crop_x2}, {crop_y2}]")
+                print(f"   📏 Tatsächliche Crop-Größe: {crop_x2-crop_x1} × {crop_y2-crop_y1} px")
                 # Bild ausschneiden
+                cropped_image = original_image.crop((crop_x1, crop_y1, crop_x2, crop_y2))
+                print(f"   ✅ Quadratischer Ausschnitt erstellt: {cropped_image.size}")
                 # ============================================================
+                # SCHRITT 3: BBox-Koordinaten im Crop-Koordinatensystem berechnen
                 # ============================================================
+                print("📐 SCHRITT 3: BBox-KOORDINATEN TRANSFORMIEREN")
                 rel_x1 = x1 - crop_x1
                 rel_y1 = y1 - crop_y1
                 rel_x2 = x2 - crop_x1
                 rel_y2 = y2 - crop_y1
+                # Sicherstellen, dass BBox innerhalb des Crops liegt
+                rel_x1 = max(0, rel_x1)
+                rel_y1 = max(0, rel_y1)
+                rel_x2 = min(cropped_image.width, rel_x2)
+                rel_y2 = min(cropped_image.height, rel_y2)
+                print(f"   🎯 Relative BBox im Crop: [{rel_x1}, {rel_y1}, {rel_x2}, {rel_y2}]")
                 print(f"   📏 Relative BBox Größe: {rel_x2-rel_x1} × {rel_y2-rel_y1} px")
+                # ============================================================
+                # SCHRITT 4: Bildkontrast verstärken für bessere Segmentierung
+                # ============================================================
+                print("🔍 SCHRITT 4: KONTRASTVERSTÄRKUNG FÜR SAM")
+                contrast_enhancer = ImageEnhance.Contrast(cropped_image)
+                enhanced_cropped_image = contrast_enhancer.enhance(1.5)  # 50% mehr Kontrast
+                print(f"   ✅ Kontrast um 50% erhöht")
+                # Für SAM: Verwende kontrastverstärkten Ausschnitt und relative Koordinaten
+                image = enhanced_cropped_image
                 x1, y1, x2, y2 = rel_x1, rel_y1, rel_x2, rel_y2
+                print("   🔄 SAM wird auf kontrastverstärktem Ausschnitt ausgeführt")
+                print(f"   📊 SAM-Eingabegröße: {image.size}")
             # ============================================================
             # GEMEINSAME SAM-LOGIK FÜR ALLE MODI
+            # (arbeitet auf `image` - bei face_only_change ist das der Crop)
             # ============================================================
             print("-" * 60)
             print(f"📦 BOUNDING BOX DETAILS FÜR SAM:")
             print(f"   Konvertiere Bild zu NumPy Array: {image_np.shape}")
             print(f"   Erstelle Input Boxes: {input_boxes}")
+            # ============================================================
+            # SCHRITT 4-5: SAM mit Box-Prompt = ursprüngliche BBox
+            # (im Crop-Koordinatensystem bei face_only_change)
+            # ============================================================
+            print("🎯 SCHRITT 4-5: SAM MIT BOX-PROMPT")
             print("   Verarbeite Bild mit SAM 2 Processor...")
             inputs = self.sam_processor(
                 image_np,
                 print("   Führe Vorhersage durch...")
                 outputs = self.sam_model(**inputs)
                 print(f"✅ Vorhersage abgeschlossen")
+                print(f"   Anzahl der Vorhersagemasken: {outputs.pred_masks.shape[2]}")
             # 5. Maske extrahieren und auf Originalgröße skalieren
+            print("📏 SCHRITT 6: MASKE EXTRAHIEREN UND SKALIEREN")
+            # ============================================================
+            # SCHRITT 6: SAM liefert mehrere Masken
+            # ============================================================
+            num_masks = outputs.pred_masks.shape[2]
+            print(f"   SAM lieferte {num_masks} verschiedene Masken")
+            # Extrahiere alle Masken
+            all_masks = []
+            mask_qualities = []
+            for i in range(num_masks):
+                single_mask = outputs.pred_masks[:, :, i, :, :]
+                resized_mask = F.interpolate(
+                    single_mask,
+                    size=(image.height, image.width),
+                    mode='bilinear',
+                    align_corners=False
+                ).squeeze()
+                mask_np = resized_mask.sigmoid().cpu().numpy()
+                all_masks.append(mask_np)
+                # Basis-Statistiken für jede Maske
+                mask_binary = (mask_np > 0.5).astype(np.uint8)
+                mask_area = np.sum(mask_binary)
+                print(f"   Maske {i+1}: Größe={mask_area:,} Pixel, Max-Konfidenz={mask_np.max():.3f}")
+            # ============================================================
+            # SCHRITT 6: Maskenauswahl per Heuristik
+            # ============================================================
+            print("🤔 SCHRITT 6: MASKENAUSWAHL MIT HEURISTIK")
+            # Erwartete BBox für Heuristik (in Pixel-Koordinaten)
+            bbox_center = ((x1 + x2) // 2, (y1 + y2) // 2)
+            bbox_area = (x2 - x1) * (y2 - y1)
+            print(f"   Erwartetes BBox-Zentrum: {bbox_center}")
+            print(f"   Erwartete BBox-Fläche: {bbox_area:,} Pixel")
+            best_mask_idx = 0
+            best_score = -1
+            for i, mask_np in enumerate(all_masks):
+                # Threshold für binäre Maske
+                mask_binary = (mask_np > 0.5).astype(np.uint8)
+                if np.sum(mask_binary) == 0:
+                    print(f"   ❌ Maske {i+1}: Keine Pixel, überspringe")
+                    continue
+                # 1. Größte Überlappung mit BBox
+                # Erstelle binäre BBox-Maske
+                bbox_mask = np.zeros((image.height, image.width), dtype=np.uint8)
+                bbox_mask[y1:y2, x1:x2] = 1
+                overlap = np.sum(mask_binary & bbox_mask)
+                bbox_overlap_ratio = overlap / np.sum(bbox_mask) if np.sum(bbox_mask) > 0 else 0
+                # 2. Schwerpunkt nahe BBox-Zentrum
+                y_coords, x_coords = np.where(mask_binary > 0)
+                if len(y_coords) > 0:
+                    centroid_y = np.mean(y_coords)
+                    centroid_x = np.mean(x_coords)
+                    centroid_distance = np.sqrt((centroid_x - bbox_center[0])**2 + (centroid_y - bbox_center[1])**2)
+                    normalized_distance = centroid_distance / max(image.width, image.height)
+                else:
+                    centroid_distance = float('inf')
+                    normalized_distance = 1.0
+                # 3. Maskenfläche im erwarteten Bereich
+                mask_area = np.sum(mask_binary)
+                area_ratio = mask_area / bbox_area
+                area_score = 1.0 - min(abs(area_ratio - 1.0), 1.0)  # 1.0 ist perfekt
+                # 4. SAM-Konfidenz
+                confidence_score = mask_np.max()
+                # Gesamtscore berechnen (Gewichtung anpassbar)
+                score = (
+                    bbox_overlap_ratio * 0.4 +      # 40% Überlappung mit BBox
+                    (1.0 - normalized_distance) * 0.3 +  # 30% Zentrumsnähe
+                    area_score * 0.2 +              # 20% Flächenübereinstimmung
+                    confidence_score * 0.1           # 10% SAM-Konfidenz
+                )
+                print(f"   📊 Maske {i+1} Scores:")
+                print(f"     • BBox-Überlappung: {bbox_overlap_ratio:.3f} ({overlap:,} Pixel)")
+                print(f"     • Zentrums-Distanz: {centroid_distance:.1f} px (normalisiert: {normalized_distance:.3f})")
+                print(f"     • Flächen-Ratio: {area_ratio:.3f} ({mask_area:,} Pixel)")
+                print(f"     • Max-Konfidenz: {confidence_score:.3f}")
+                print(f"     • GESAMTSCORE: {score:.3f}")
+                if score > best_score:
+                    best_score = score
+                    best_mask_idx = i
+            print(f"✅ Beste Maske ausgewählt: Nr. {best_mask_idx+1} mit Score {best_score:.3f}")
+            # Beste Maske verwenden
+            mask_np = all_masks[best_mask_idx]
             # ============================================================
+            # DYNAMISCHER THRESHOLD
             # ============================================================
             max_val = mask_np.max()
+            print(f"   🔍 Maximaler SAM-Konfidenzwert der besten Maske: {max_val:.3f}")
             if max_val < 0.6:
+                dynamic_threshold = 0.2
                 print(f"   ⚠️  SAM ist unsicher (max_val={max_val:.3f} < 0.6)")
                 print(f"   🎯 Verwende festen niedrigen Threshold: {dynamic_threshold:.3f}")
             else:
+                dynamic_threshold = max_val * 0.8
                 print(f"   ✅ SAM ist sicher (max_val={max_val:.3f} >= 0.6)")
                 print(f"   🎯 Dynamischer Threshold: {dynamic_threshold:.3f} (80% von Maximum)")
             print(f"   Nach Threshold ({dynamic_threshold:.3f}): {mask_array.shape}, Unique Werte: {unique_vals}")
             # ============================================================
+            # SCHRITT 7: Postprocessing
             # ============================================================
+            print("🔧 SCHRITT 7: POSTPROCESSING")
+            # a) Kleine Löcher füllen
+            if np.sum(mask_array > 0) > 0:
+                # Finde alle schwarze Regionen in der weißen Maske (Löcher)
+                mask_inverted = 255 - mask_array
+                labeled_holes, num_holes = ndimage.label(mask_inverted)
+                if num_holes > 1:  # 1 ist der Hintergrund
+                    print(f"   🔍 Gefundene Löcher: {num_holes - 1}")
+                    # Fülle kleine Löcher
+                    for i in range(2, num_holes + 1):  # Beginne bei 2 (1 ist Hintergrund)
+                        hole_size = np.sum(labeled_holes == i)
+                        if hole_size < 500:  # Kleine Löcher füllen
+                            mask_array = np.where(labeled_holes == i, 255, mask_array)
+                            print(f"     • Loch {i} gefüllt ({hole_size} Pixel)")
+                # b) Kleine Komponenten entfernen
                 labeled_array, num_features = ndimage.label(mask_array)
                 if num_features > 1:
+                    print(f"   🧹 Komponenten vor Filterung: {num_features}")
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
+                    total_mask_area = np.sum(mask_array > 0)
+                    min_size = total_mask_area * 0.1  # 10% der Gesamtfläche
+                    print(f"   📊 Gesamtmaskenfläche: {total_mask_area:,} Pixel")
+                    print(f"   📏 Minimale Komponentengröße: {min_size:,.0f} Pixel")
                     for i in range(1, num_features + 1):
+                        if sizes[i-1] < min_size:
                             mask_array = np.where(labeled_array == i, 0, mask_array)
+                            print(f"     • Komponente {i} entfernt ({sizes[i-1]:,} Pixel)")
+            # c) Ggf. leichte Erosion/Dilation
+            print("   ⚙️  Leichte morphologische Operationen...")
+            kernel = np.ones((3, 3), np.uint8)
+            # Leichte Erosion für saubere Kanten
+            mask_array = cv2.erode(mask_array, kernel, iterations=1)
+            print("     • Erosion (1 Iteration) angewendet")
+            # Leichte Dilation für glatte Übergänge
+            mask_array = cv2.dilate(mask_array, kernel, iterations=1)
+            print("     • Dilation (1 Iteration) angewendet")
+            # BEIDE MASKEN ERSTELLEN (vor Nachbearbeitung)
             original_mask_array = mask_array.copy()        # Person weiß (255), Hintergrund schwarz (0)
             inverted_mask_array = 255 - mask_array         # Person schwarz (0), Hintergrund weiß (255)
             print("-" * 60)
+            print(f"🔧 MODUS-SPEZIFISCHE NACHBEARBEITUNG: {mode}")
             print(f"   Original-Maske (Person weiß): {original_mask_array.shape}")
             print(f"   Invertierte Maske (Person schwarz): {inverted_mask_array.shape}")
+            # MODUS-SPEZIFISCHE NACHBEARBEITUNG
             if mode == "environment_change":
                 print("🌳 MODUS: UMWELT ÄNDERN")
                 # Arbeite auf der INVERTIERTEN Maske (Person schwarz, Hintergrund weiß)
                 mask_array = inverted_mask_array.copy()
                 print("   Arbeite auf invertierter Maske (Person schwarz, Hintergrund weiß)")
                 # Morphologische Operationen für saubere Umgebung
                 kernel = np.ones((5,5), np.uint8)
                 mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
                 mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel)
                 mask_array = cv2.dilate(mask_array, np.ones((2,2), np.uint8), iterations=1)
                 mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
                 print("   ✅ Umwelt-Modus: Person geschützt, Hintergrund optimiert")
                 # Größte weiße Komponente behalten (Person)
                 labeled_array, num_features = ndimage.label(mask_array)
                 if num_features > 1:
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
                     largest_component = np.argmax(sizes) + 1
                     mask_array = np.where(labeled_array == largest_component, mask_array, 0)
                 # Maske leicht erweitern für bessere Abdeckung
                 kernel = np.ones((3,3), np.uint8)
                 mask_array = cv2.dilate(mask_array, kernel, iterations=1)
                 mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
                 print("   ✅ Focus-Modus: Person verändert, Hintergrund geschützt")
             elif mode == "face_only_change":
+                print("👤 MODUS: NUR GESICHT ÄNDERN")
                 # Arbeite auf der ORIGINAL-Maske (Person weiß, Hintergrund schwarz)
                 mask_array = original_mask_array.copy()
                 print("   Arbeite auf originaler Maske (Person weiß, Hintergrund schwarz)")
                 # Starke Erosion für präzises Gesicht
                 kernel = np.ones((3,3), np.uint8)
                 mask_array = cv2.erode(mask_array, kernel, iterations=2)
                 mask_array = cv2.erode(mask_array, np.ones((2,2), np.uint8), iterations=1)
                 mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
+                print("   ✅ Gesichts-Modus: Postprocessing auf Ausschnitt abgeschlossen")
                 # ============================================================
+                # SPEZIALSCHRITT: MASKE ZURÜCK AUF ORIGINALGRÖSSE BRINGEN
                 # ============================================================
                 print("-" * 60)
+                print("🔄 MASKE VOM AUSSCHNITT ZURÜCK AUF ORIGINALGRÖSSE")
                 # Temporäre Maske aus dem Array erstellen
                 temp_mask = Image.fromarray(mask_array).convert("L")
             black_ratio = 100 - white_ratio
             print("-" * 60)
+            print("📊 MASKEN-STATISTIK (FINAL)")
             print(f"   Weiße Pixel (Veränderungsbereich): {white_pixels:,} ({white_ratio:.1f}%)")
             print(f"   Schwarze Pixel (Erhaltungsbereich): {black_pixels:,} ({black_ratio:.1f}%)")
             print(f"   Gesamtpixel: {total_pixels:,}")
+            if mode == "face_only_change":
+                # Zusätzliche Statistik für Gesichtsmodus
+                original_face_area = original_bbox_size[0] * original_bbox_size[1]
+                coverage_ratio = white_pixels / original_face_area if original_face_area > 0 else 0
+                print(f"   👤 Gesichtsabdeckung: {coverage_ratio:.1%} der ursprünglichen BBox")
             # 10. Zurück zu PIL Image
             mask = Image.fromarray(mask_array).convert("L")
             print(f"✅ SAM 2 SEGMENTIERUNG ABGESCHLOSSEN")
             print(f"📐 Finale Maskengröße: {mask.size}")
             print(f"🎛️  Verwendeter Modus: {mode}")
+            print(f"👤 Bei face_only_change: Crop={crop_size}×{crop_size}px, Heuristik-Score={best_score:.3f}")
             print("#" * 80)
             return mask