Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Dec 29, 2025

Commit

421ffe3

verified ·

1 Parent(s): 4e659cc

Update controlnet_module.py

Browse files

Files changed (1) hide show

controlnet_module.py +343 -200

controlnet_module.py CHANGED Viewed

@@ -102,6 +102,7 @@ class ControlNetProcessor:
             print(f"⚠️ Fehler beim Glätten der Maske: {e}")
             return mask_array
     def create_sam_mask(self, image, bbox_coords, mode):
         """
         ERWEITERTE Funktion: Erstellt präzise Maske mit SAM 2
@@ -114,7 +115,13 @@ class ControlNetProcessor:
             print(f"📐 Eingabebild-Größe: {image.size}")
             print(f"🎛️  Ausgewählter Modus: {mode}")
-            # 1. SAM2 laden (falls noch nicht geschehen)
             if not self.sam_initialized:
                 print("📥 SAM 2 ist noch nicht geladen, starte Lazy Loading...")
                 self._lazy_load_sam()
@@ -136,7 +143,7 @@ class ControlNetProcessor:
             # ============================================================
             if mode == "face_only_change":
                 print("-" * 60)
-                print("👤 SPEZIALMODUS: NUR GESICHT - EMPFOHLENER WORKFLOW")
                 print("-" * 60)
                 # ============================================================
@@ -146,9 +153,9 @@ class ControlNetProcessor:
                 print(f"💾 Originalbild gesichert: {original_image.size}")
                 # ============================================================
-                # SCHRITT 2: Crop = BBox × 2.0 (einmal, sauber, quadratisch)
                 # ============================================================
-                print("✂️ SCHRITT 2: ERSTELLE QUADRATISCHEN AUSSCHNITT (BBox × 2.0)")
                 # BBox-Zentrum berechnen
                 bbox_center_x = (x1 + x2) // 2
@@ -162,9 +169,9 @@ class ControlNetProcessor:
                 print(f"   📏 BBox Dimensionen: {bbox_width} × {bbox_height} px")
                 print(f"   📐 Maximale BBox-Dimension: {bbox_max_dim} px")
-                # Crop-Größe berechnen (BBox × 2.0)
-                crop_size = int(bbox_max_dim * 2.0)
-                print(f"   🎯 Ziel-Crop-Größe: {crop_size} × {crop_size} px (BBox × 2.0)")
                 # Crop-Koordinaten berechnen (zentriert um BBox)
                 crop_x1 = bbox_center_x - crop_size // 2
@@ -202,7 +209,7 @@ class ControlNetProcessor:
                 print(f"   ✅ Quadratischer Ausschnitt erstellt: {cropped_image.size}")
                 # ============================================================
-                # SCHRITT 3: BBox-Koordinaten im Crop-Koordinatensystem berechnen
                 # ============================================================
                 print("📐 SCHRITT 3: BBox-KOORDINATEN TRANSFORMIEREN")
                 rel_x1 = x1 - crop_x1
@@ -220,23 +227,36 @@ class ControlNetProcessor:
                 print(f"   📏 Relative BBox Größe: {rel_x2-rel_x1} × {rel_y2-rel_y1} px")
                 # ============================================================
-                # SCHRITT 4: Bildkontrast verstärken für bessere Segmentierung
                 # ============================================================
-                print("🔍 SCHRITT 4: KONTRASTVERSTÄRKUNG FÜR SAM")
                 contrast_enhancer = ImageEnhance.Contrast(cropped_image)
-                enhanced_cropped_image = contrast_enhancer.enhance(1.5)  # 50% mehr Kontrast
-                print(f"   ✅ Kontrast um 50% erhöht")
-                # Für SAM: Verwende kontrastverstärkten Ausschnitt und relative Koordinaten
-                image = enhanced_cropped_image
                 x1, y1, x2, y2 = rel_x1, rel_y1, rel_x2, rel_y2
-                print("   🔄 SAM wird auf kontrastverstärktem Ausschnitt ausgeführt")
                 print(f"   📊 SAM-Eingabegröße: {image.size}")
             # ============================================================
             # GEMEINSAME SAM-LOGIK FÜR ALLE MODI
-            # (arbeitet auf `image` - bei face_only_change ist das der Crop)
             # ============================================================
             print("-" * 60)
             print(f"📦 BOUNDING BOX DETAILS FÜR SAM:")
@@ -248,15 +268,37 @@ class ControlNetProcessor:
             print("-" * 60)
             print("🖼️  BILDAUFBEREITUNG FÜR SAM 2")
             image_np = np.array(image.convert("RGB"))
-            input_boxes = [[[x1, y1, x2, y2]]]
-            print(f"   Konvertiere Bild zu NumPy Array: {image_np.shape}")
-            print(f"   Erstelle Input Boxes: {input_boxes}")
             # ============================================================
-            # SCHRITT 4-5: SAM mit Box-Prompt = ursprüngliche BBox
-            # (im Crop-Koordinatensystem bei face_only_change)
             # ============================================================
-            print("🎯 SCHRITT 4-5: SAM MIT BOX-PROMPT")
             print("   Verarbeite Bild mit SAM 2 Processor...")
             inputs = self.sam_processor(
                 image_np,
@@ -274,18 +316,14 @@ class ControlNetProcessor:
                 print(f"✅ Vorhersage abgeschlossen")
                 print(f"   Anzahl der Vorhersagemasken: {outputs.pred_masks.shape[2]}")
-            # 5. Maske extrahieren und auf Originalgröße skalieren
-            print("📏 SCHRITT 6: MASKE EXTRAHIEREN UND SKALIEREN")
-            # ============================================================
-            # SCHRITT 6: SAM liefert mehrere Masken
-            # ============================================================
             num_masks = outputs.pred_masks.shape[2]
             print(f"   SAM lieferte {num_masks} verschiedene Masken")
             # Extrahiere alle Masken
             all_masks = []
-            mask_qualities = []
             for i in range(num_masks):
                 single_mask = outputs.pred_masks[:, :, i, :, :]
@@ -305,11 +343,10 @@ class ControlNetProcessor:
                 print(f"   Maske {i+1}: Größe={mask_area:,} Pixel, Max-Konfidenz={mask_np.max():.3f}")
             # ============================================================
-            # SCHRITT 6: Maskenauswahl per Heuristik
             # ============================================================
-            print("🤔 SCHRITT 6: MASKENAUSWAHL MIT HEURISTIK")
-            # Erwartete BBox für Heuristik (in Pixel-Koordinaten)
             bbox_center = ((x1 + x2) // 2, (y1 + y2) // 2)
             bbox_area = (x2 - x1) * (y2 - y1)
             print(f"   Erwartetes BBox-Zentrum: {bbox_center}")
@@ -319,58 +356,158 @@ class ControlNetProcessor:
             best_score = -1
             for i, mask_np in enumerate(all_masks):
-                # Threshold für binäre Maske
-                mask_binary = (mask_np > 0.5).astype(np.uint8)
                 if np.sum(mask_binary) == 0:
-                    print(f"   ❌ Maske {i+1}: Keine Pixel, überspringe")
                     continue
-                # 1. Größte Überlappung mit BBox
-                # Erstelle binäre BBox-Maske
-                bbox_mask = np.zeros((image.height, image.width), dtype=np.uint8)
-                bbox_mask[y1:y2, x1:x2] = 1
-                overlap = np.sum(mask_binary & bbox_mask)
-                bbox_overlap_ratio = overlap / np.sum(bbox_mask) if np.sum(bbox_mask) > 0 else 0
-                # 2. Schwerpunkt nahe BBox-Zentrum
-                y_coords, x_coords = np.where(mask_binary > 0)
-                if len(y_coords) > 0:
-                    centroid_y = np.mean(y_coords)
-                    centroid_x = np.mean(x_coords)
-                    centroid_distance = np.sqrt((centroid_x - bbox_center[0])**2 + (centroid_y - bbox_center[1])**2)
-                    normalized_distance = centroid_distance / max(image.width, image.height)
-                else:
-                    centroid_distance = float('inf')
-                    normalized_distance = 1.0
-                # 3. Maskenfläche im erwarteten Bereich
-                mask_area = np.sum(mask_binary)
-                area_ratio = mask_area / bbox_area
-                area_score = 1.0 - min(abs(area_ratio - 1.0), 1.0)  # 1.0 ist perfekt
-                # 4. SAM-Konfidenz
-                confidence_score = mask_np.max()
-                # Gesamtscore berechnen (Gewichtung anpassbar)
-                score = (
-                    bbox_overlap_ratio * 0.4 +      # 40% Überlappung mit BBox
-                    (1.0 - normalized_distance) * 0.3 +  # 30% Zentrumsnähe
-                    area_score * 0.2 +              # 20% Flächenübereinstimmung
-                    confidence_score * 0.1           # 10% SAM-Konfidenz
-                )
-                print(f"   📊 Maske {i+1} Scores:")
-                print(f"     • BBox-Überlappung: {bbox_overlap_ratio:.3f} ({overlap:,} Pixel)")
-                print(f"     • Zentrums-Distanz: {centroid_distance:.1f} px (normalisiert: {normalized_distance:.3f})")
-                print(f"     • Flächen-Ratio: {area_ratio:.3f} ({mask_area:,} Pixel)")
-                print(f"     • Max-Konfidenz: {confidence_score:.3f}")
-                print(f"     • GESAMTSCORE: {score:.3f}")
                 if score > best_score:
                     best_score = score
                     best_mask_idx = i
             print(f"✅ Beste Maske ausgewählt: Nr. {best_mask_idx+1} mit Score {best_score:.3f}")
@@ -378,186 +515,191 @@ class ControlNetProcessor:
             mask_np = all_masks[best_mask_idx]
             # ============================================================
-            # DYNAMISCHER THRESHOLD
             # ============================================================
             max_val = mask_np.max()
             print(f"   🔍 Maximaler SAM-Konfidenzwert der besten Maske: {max_val:.3f}")
-            if max_val < 0.6:
-                dynamic_threshold = 0.2
-                print(f"   ⚠️  SAM ist unsicher (max_val={max_val:.3f} < 0.6)")
-                print(f"   🎯 Verwende festen niedrigen Threshold: {dynamic_threshold:.3f}")
             else:
-                dynamic_threshold = max_val * 0.8
-                print(f"   ✅ SAM ist sicher (max_val={max_val:.3f} >= 0.6)")
-                print(f"   🎯 Dynamischer Threshold: {dynamic_threshold:.3f} (80% von Maximum)")
             mask_array = (mask_np > dynamic_threshold).astype(np.uint8) * 255
-            unique_vals = np.unique(mask_array)
-            print(f"   Nach Threshold ({dynamic_threshold:.3f}): {mask_array.shape}, Unique Werte: {unique_vals}")
             # ============================================================
-            # SCHRITT 7: Postprocessing
             # ============================================================
-            print("🔧 SCHRITT 7: POSTPROCESSING")
-            # a) Kleine Löcher füllen
-            if np.sum(mask_array > 0) > 0:
-                # Finde alle schwarze Regionen in der weißen Maske (Löcher)
-                mask_inverted = 255 - mask_array
-                labeled_holes, num_holes = ndimage.label(mask_inverted)
-                if num_holes > 1:  # 1 ist der Hintergrund
-                    print(f"   🔍 Gefundene Löcher: {num_holes - 1}")
-                    # Fülle kleine Löcher
-                    for i in range(2, num_holes + 1):  # Beginne bei 2 (1 ist Hintergrund)
-                        hole_size = np.sum(labeled_holes == i)
-                        if hole_size < 500:  # Kleine Löcher füllen
-                            mask_array = np.where(labeled_holes == i, 255, mask_array)
-                            print(f"     • Loch {i} gefüllt ({hole_size} Pixel)")
-                # b) Kleine Komponenten entfernen
                 labeled_array, num_features = ndimage.label(mask_array)
-                if num_features > 1:
-                    print(f"   🧹 Komponenten vor Filterung: {num_features}")
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
-                    total_mask_area = np.sum(mask_array > 0)
-                    min_size = total_mask_area * 0.1  # 10% der Gesamtfläche
-                    print(f"   📊 Gesamtmaskenfläche: {total_mask_area:,} Pixel")
-                    print(f"   📏 Minimale Komponentengröße: {min_size:,.0f} Pixel")
-                    for i in range(1, num_features + 1):
-                        if sizes[i-1] < min_size:
-                            mask_array = np.where(labeled_array == i, 0, mask_array)
-                            print(f"     • Komponente {i} entfernt ({sizes[i-1]:,} Pixel)")
-            # c) Ggf. leichte Erosion/Dilation
-            print("   ⚙️  Leichte morphologische Operationen...")
-            kernel = np.ones((3, 3), np.uint8)
-            # Leichte Erosion für saubere Kanten
-            mask_array = cv2.erode(mask_array, kernel, iterations=1)
-            print("     • Erosion (1 Iteration) angewendet")
-            # Leichte Dilation für glatte Übergänge
-            mask_array = cv2.dilate(mask_array, kernel, iterations=1)
-            print("     • Dilation (1 Iteration) angewendet")
-            # BEIDE MASKEN ERSTELLEN (vor Nachbearbeitung)
-            original_mask_array = mask_array.copy()        # Person weiß (255), Hintergrund schwarz (0)
-            inverted_mask_array = 255 - mask_array         # Person schwarz (0), Hintergrund weiß (255)
-            print("-" * 60)
-            print(f"🔧 MODUS-SPEZIFISCHE NACHBEARBEITUNG: {mode}")
-            print(f"   Original-Maske (Person weiß): {original_mask_array.shape}")
-            print(f"   Invertierte Maske (Person schwarz): {inverted_mask_array.shape}")
-            # MODUS-SPEZIFISCHE NACHBEARBEITUNG
-            if mode == "environment_change":
-                print("🌳 MODUS: UMWELT ÄNDERN")
-                # Arbeite auf der INVERTIERTEN Maske (Person schwarz, Hintergrund weiß)
-                mask_array = inverted_mask_array.copy()
-                print("   Arbeite auf invertierter Maske (Person schwarz, Hintergrund weiß)")
-                # Morphologische Operationen für saubere Umgebung
-                kernel = np.ones((5,5), np.uint8)
-                mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
-                mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel)
-                mask_array = cv2.dilate(mask_array, np.ones((2,2), np.uint8), iterations=1)
-                mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
-                print("   ✅ Umwelt-Modus: Person geschützt, Hintergrund optimiert")
-            elif mode == "focus_change":
-                print("🎯 MODUS: FOCUS ÄNDERN")
-                # Arbeite auf der ORIGINAL-Maske (Person weiß, Hintergrund schwarz)
-                mask_array = original_mask_array.copy()
-                print("   Arbeite auf originaler Maske (Person weiß, Hintergrund schwarz)")
-                # Größte weiße Komponente behalten (Person)
-                labeled_array, num_features = ndimage.label(mask_array)
-                if num_features > 1:
-                    sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
-                    largest_component = np.argmax(sizes) + 1
-                    mask_array = np.where(labeled_array == largest_component, mask_array, 0)
-                # Maske leicht erweitern für bessere Abdeckung
-                kernel = np.ones((3,3), np.uint8)
-                mask_array = cv2.dilate(mask_array, kernel, iterations=1)
-                mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
-                print("   ✅ Focus-Modus: Person verändert, Hintergrund geschützt")
-            elif mode == "face_only_change":
-                print("👤 MODUS: NUR GESICHT ÄNDERN")
-                # Arbeite auf der ORIGINAL-Maske (Person weiß, Hintergrund schwarz)
-                mask_array = original_mask_array.copy()
-                print("   Arbeite auf originaler Maske (Person weiß, Hintergrund schwarz)")
-                # Starke Erosion für präzises Gesicht
-                kernel = np.ones((3,3), np.uint8)
-                mask_array = cv2.erode(mask_array, kernel, iterations=2)
-                mask_array = cv2.erode(mask_array, np.ones((2,2), np.uint8), iterations=1)
-                mask_array = cv2.GaussianBlur(mask_array, (3, 3), 0)
-                print("   ✅ Gesichts-Modus: Postprocessing auf Ausschnitt abgeschlossen")
-                # ============================================================
-                # SPEZIALSCHRITT: MASKE ZURÜCK AUF ORIGINALGRÖSSE BRINGEN
-                # ============================================================
                 print("-" * 60)
                 print("🔄 MASKE VOM AUSSCHNITT ZURÜCK AUF ORIGINALGRÖSSE")
-                # Temporäre Maske aus dem Array erstellen
                 temp_mask = Image.fromarray(mask_array).convert("L")
                 print(f"   Maskengröße auf Ausschnitt: {temp_mask.size}")
-                # Leere Maske in Originalbild-Größe erstellen
                 final_mask = Image.new("L", original_image.size, 0)
                 print(f"   Leere Maske in Originalgröße: {final_mask.size}")
-                # Die segmentierte Maske an der richtigen Position im Originalbild platzieren
                 final_mask.paste(temp_mask, (crop_x1, crop_y1))
                 print(f"   Maskenposition im Original: ({crop_x1}, {crop_y1})")
-                # Zurück zum mask_array konvertieren
                 mask_array = np.array(final_mask)
                 print(f"   ✅ Maske zurück auf Originalgröße skaliert: {mask_array.shape}")
-                # Originalbild wiederherstellen für eventuelle spätere Verwendung
                 image = original_image
                 print(f"   🔄 Bild-Referenz wieder auf Original gesetzt: {image.size}")
-            # 9. Qualitätskontrolle und Statistik
             white_pixels = np.sum(mask_array > 127)
             total_pixels = mask_array.size
             white_ratio = white_pixels / total_pixels * 100
-            black_pixels = total_pixels - white_pixels
-            black_ratio = 100 - white_ratio
             print("-" * 60)
             print("📊 MASKEN-STATISTIK (FINAL)")
             print(f"   Weiße Pixel (Veränderungsbereich): {white_pixels:,} ({white_ratio:.1f}%)")
-            print(f"   Schwarze Pixel (Erhaltungsbereich): {black_pixels:,} ({black_ratio:.1f}%)")
             print(f"   Gesamtpixel: {total_pixels:,}")
             if mode == "face_only_change":
-                # Zusätzliche Statistik für Gesichtsmodus
                 original_face_area = original_bbox_size[0] * original_bbox_size[1]
                 coverage_ratio = white_pixels / original_face_area if original_face_area > 0 else 0
-                print(f"   👤 Gesichtsabdeckung: {coverage_ratio:.1%} der ursprünglichen BBox")
-            # 10. Zurück zu PIL Image
             mask = Image.fromarray(mask_array).convert("L")
             print("#" * 80)
             print(f"✅ SAM 2 SEGMENTIERUNG ABGESCHLOSSEN")
             print(f"📐 Finale Maskengröße: {mask.size}")
             print(f"🎛️  Verwendeter Modus: {mode}")
-            print(f"👤 Bei face_only_change: Crop={crop_size}×{crop_size}px, Heuristik-Score={best_score:.3f}")
             print("#" * 80)
             return mask
@@ -570,6 +712,7 @@ class ControlNetProcessor:
             traceback.print_exc()
             print("ℹ️ Fallback auf rechteckige Maske")
             return self._create_rectangular_mask(image, bbox_coords, mode)
     def _create_rectangular_mask(self, image, bbox_coords, mode):
         """Fallback: Erstellt rechteckige Maske"""

             print(f"⚠️ Fehler beim Glätten der Maske: {e}")
             return mask_array
     def create_sam_mask(self, image, bbox_coords, mode):
         """
         ERWEITERTE Funktion: Erstellt präzise Maske mit SAM 2
             print(f"📐 Eingabebild-Größe: {image.size}")
             print(f"🎛️  Ausgewählter Modus: {mode}")
+            # Variablen für alle Modi initialisieren
+            crop_size = None
+            crop_x1 = crop_y1 = crop_x2 = crop_y2 = None
+            original_image = image
+            best_score = 0.0
+            # 1. SAM2 laden
             if not self.sam_initialized:
                 print("📥 SAM 2 ist noch nicht geladen, starte Lazy Loading...")
                 self._lazy_load_sam()
             # ============================================================
             if mode == "face_only_change":
                 print("-" * 60)
+                print("👤 SPEZIALMODUS: NUR GESICHT - ROBUSTER WORKFLOW")
                 print("-" * 60)
                 # ============================================================
                 print(f"💾 Originalbild gesichert: {original_image.size}")
                 # ============================================================
+                # SCHRITT 2: Crop = BBox × 2.5 (ERHÖHT für mehr Kontext)
                 # ============================================================
+                print("✂️ SCHRITT 2: ERSTELLE QUADRATISCHEN AUSSCHNITT (BBox × 2.5)")
                 # BBox-Zentrum berechnen
                 bbox_center_x = (x1 + x2) // 2
                 print(f"   📏 BBox Dimensionen: {bbox_width} × {bbox_height} px")
                 print(f"   📐 Maximale BBox-Dimension: {bbox_max_dim} px")
+                # ERHÖHT: Crop-Größe berechnen (BBox × 2.5 für mehr Kontext)
+                crop_size = int(bbox_max_dim * 2.5)
+                print(f"   🎯 Ziel-Crop-Größe: {crop_size} × {crop_size} px (BBox × 2.5)")
                 # Crop-Koordinaten berechnen (zentriert um BBox)
                 crop_x1 = bbox_center_x - crop_size // 2
                 print(f"   ✅ Quadratischer Ausschnitt erstellt: {cropped_image.size}")
                 # ============================================================
+                # SCHRITT 3: BBox-Koordinaten transformieren
                 # ============================================================
                 print("📐 SCHRITT 3: BBox-KOORDINATEN TRANSFORMIEREN")
                 rel_x1 = x1 - crop_x1
                 print(f"   📏 Relative BBox Größe: {rel_x2-rel_x1} × {rel_y2-rel_y1} px")
                 # ============================================================
+                # SCHRITT 4: INTENSIVE BILDAUFBEREITUNG FÜR GESICHTSERKENNUNG
                 # ============================================================
+                print("🔍 SCHRITT 4: ERWEITERTE BILDAUFBEREITUNG FÜR GESICHTSERKENNUNG")
+                # 1. Kontrast verstärken
                 contrast_enhancer = ImageEnhance.Contrast(cropped_image)
+                enhanced_image = contrast_enhancer.enhance(1.8)  # 80% mehr Kontrast
+                # 2. Schärfe erhöhen für bessere Kantenerkennung
+                sharpness_enhancer = ImageEnhance.Sharpness(enhanced_image)
+                enhanced_image = sharpness_enhancer.enhance(2.0)  # 100% mehr Schärfe
+                # 3. Helligkeit anpassen
+                brightness_enhancer = ImageEnhance.Brightness(enhanced_image)
+                enhanced_image = brightness_enhancer.enhance(1.1)  # 10% heller
+                print(f"   ✅ Erweiterte Bildaufbereitung abgeschlossen")
+                print(f"     • Kontrast: +80%")
+                print(f"     • Schärfe: +100%")
+                print(f"     • Helligkeit: +10%")
+                # Für SAM: Verwende aufbereiteten Ausschnitt
+                image = enhanced_image
                 x1, y1, x2, y2 = rel_x1, rel_y1, rel_x2, rel_y2
+                print("   🔄 SAM wird auf aufbereitetem Ausschnitt ausgeführt")
                 print(f"   📊 SAM-Eingabegröße: {image.size}")
             # ============================================================
             # GEMEINSAME SAM-LOGIK FÜR ALLE MODI
             # ============================================================
             print("-" * 60)
             print(f"📦 BOUNDING BOX DETAILS FÜR SAM:")
             print("-" * 60)
             print("🖼️  BILDAUFBEREITUNG FÜR SAM 2")
             image_np = np.array(image.convert("RGB"))
             # ============================================================
+            # NEU: ERWEITERTE SAM-EINGABE FÜR GESICHTSMODUS
             # ============================================================
+            print("🎯 SCHRITT 4-5: ERWEITERTE SAM-PROMPTING")
+            bbox_width = x2 - x1
+            bbox_height = y2 - y1
+            # Für Gesichtsmodus: Verstärkte BBox-Prompts
+            if mode == "face_only_change":
+                # 1. Haupt-BBox (ursprüngliche Koordinaten)
+                input_boxes = [[[x1, y1, x2, y2]]]
+                # 2. ERWEITERTE BBox für Gesichtskontext (15% größer)
+                expand_factor = 0.15
+                expanded_x1 = max(0, int(x1 - bbox_width * expand_factor))
+                expanded_y1 = max(0, int(y1 - bbox_height * expand_factor))
+                expanded_x2 = min(image.width, int(x2 + bbox_width * expand_factor))
+                expanded_y2 = min(image.height, int(y2 + bbox_height * expand_factor))
+                input_boxes.append([[expanded_x1, expanded_y1, expanded_x2, expanded_y2]])
+                print(f"   Haupt-BBox: [{x1}, {y1}, {x2}, {y2}]")
+                print(f"   Erweiterte BBox: [{expanded_x1}, {expanded_y1}, {expanded_x2}, {expanded_y2}]")
+                print(f"   Anzahl BBox-Prompts: {len(input_boxes)}")
+            else:
+                # Standard für andere Modi
+                input_boxes = [[[x1, y1, x2, y2]]]
+                print(f"   Standard-BBox: [{x1}, {y1}, {x2}, {y2}]")
             print("   Verarbeite Bild mit SAM 2 Processor...")
             inputs = self.sam_processor(
                 image_np,
                 print(f"✅ Vorhersage abgeschlossen")
                 print(f"   Anzahl der Vorhersagemasken: {outputs.pred_masks.shape[2]}")
+            # 5. Maske extrahieren
+            print("📏 SCHRITT 6: MASKE EXTRAHIEREN")
             num_masks = outputs.pred_masks.shape[2]
             print(f"   SAM lieferte {num_masks} verschiedene Masken")
             # Extrahiere alle Masken
             all_masks = []
             for i in range(num_masks):
                 single_mask = outputs.pred_masks[:, :, i, :, :]
                 print(f"   Maske {i+1}: Größe={mask_area:,} Pixel, Max-Konfidenz={mask_np.max():.3f}")
             # ============================================================
+            # MODUS-SPEZIFISCHE HEURISTIK
             # ============================================================
+            print("🤔 SCHRITT 6: MASKENAUSWAHL MIT MODUS-SPEZIFISCHER HEURISTIK")
             bbox_center = ((x1 + x2) // 2, (y1 + y2) // 2)
             bbox_area = (x2 - x1) * (y2 - y1)
             print(f"   Erwartetes BBox-Zentrum: {bbox_center}")
             best_score = -1
             for i, mask_np in enumerate(all_masks):
+                mask_max = mask_np.max()
+                # Grundlegende Filterung
+                if mask_max < 0.3:
+                    print(f"   ❌ Maske {i+1}: Zu niedrige Konfidenz ({mask_max:.3f}), überspringe")
+                    continue
+                # Adaptiver Threshold
+                adaptive_threshold = max(0.3, mask_max * 0.7)
+                mask_binary = (mask_np > adaptive_threshold).astype(np.uint8)
                 if np.sum(mask_binary) == 0:
+                    print(f"   ❌ Maske {i+1}: Keine Pixel nach Threshold {adaptive_threshold:.3f}")
                     continue
+                mask_area_pixels = np.sum(mask_binary)
+                # ============================================================
+                # SPEZIALHEURISTIK NUR FÜR GESICHTSMODUS
+                # ============================================================
+                if mode == "face_only_change":
+                    print(f"   🔍 Analysiere Maske {i+1} mit GESICHTS-HEURISTIK")
+                    # 1. FLÄCHENBASIERTE BEWERTUNG (40%)
+                    area_ratio = mask_area_pixels / bbox_area
+                    print(f"     📐 Flächen-Ratio: {area_ratio:.3f} ({mask_area_pixels:,} / {bbox_area:,} Pixel)")
+                    # Optimale Kopfgröße: 80-120% der BBox
+                    if area_ratio < 0.6:
+                        print(f"     ⚠️  Fläche zu klein für Kopf (<60% der BBox)")
+                        area_score = area_ratio * 0.5  # Stark bestrafen
+                    elif area_ratio > 1.5:
+                        print(f"     ⚠️  Fläche zu groß für Kopf (>150% der BBox)")
+                        area_score = 2.0 - area_ratio  # Linear bestrafen
+                    elif 0.8 <= area_ratio <= 1.2:
+                        area_score = 1.0  # Perfekte Größe
+                        print(f"     ✅ Perfekte Kopfgröße (80-120% der BBox)")
+                    else:
+                        # Sanfte Abweichung
+                        area_score = 1.0 - abs(area_ratio - 1.0) * 0.5
+                    # 2. KOMPAKTHEIT/SOLIDITÄT (30%)
+                    labeled_mask = measure.label(mask_binary)
+                    regions = measure.regionprops(labeled_mask)
+                    if len(regions) == 0:
+                        compactness_score = 0.1
+                        print(f"     ❌ Keine zusammenhängenden Regionen gefunden")
+                    else:
+                        # Größte Region finden (sollte der Kopf sein)
+                        largest_region = max(regions, key=lambda r: r.area)
+                        # Solidität = Fläche / konvexe Hüllenfläche
+                        solidity = largest_region.solidity if hasattr(largest_region, 'solidity') else 0.7
+                        # Exzentrizität (wie elliptisch) - Köpfe sind tendenziell elliptisch
+                        eccentricity = largest_region.eccentricity if hasattr(largest_region, 'eccentricity') else 0.5
+                        # Perfekt runde Formen (Kreis) sind 0, Linie wäre 1
+                        # Köpfe haben typischerweise 0.5-0.8
+                        if 0.4 <= eccentricity <= 0.9:
+                            eccentricity_score = 1.0 - abs(eccentricity - 0.65) * 2
+                        else:
+                            eccentricity_score = 0.2
+                        compactness_score = (solidity * 0.6 + eccentricity_score * 0.4)
+                        print(f"     🎯 Kompaktheits-Analyse:")
+                        print(f"       • Solidität (Fläche/Konvex): {solidity:.3f}")
+                        print(f"       • Exzentrizität (Form): {eccentricity:.3f}")
+                        print(f"       • Kompaktheits-Score: {compactness_score:.3f}")
+                    # 3. BBOX-ÜBERLAPPUNG (20%)
+                    bbox_mask = np.zeros((image.height, image.width), dtype=np.uint8)
+                    bbox_mask[y1:y2, x1:x2] = 1
+                    overlap = np.sum(mask_binary & bbox_mask)
+                    bbox_overlap_ratio = overlap / mask_area_pixels if mask_area_pixels > 0 else 0
+                    # Für Kopf: Sollte großteils in BBox sein (mind. 70%)
+                    if bbox_overlap_ratio >= 0.7:
+                        bbox_score = 1.0
+                        print(f"     ✅ Hohe BBox-Überlappung: {bbox_overlap_ratio:.3f} ({overlap:,} Pixel)")
+                    elif bbox_overlap_ratio >= 0.5:
+                        bbox_score = bbox_overlap_ratio * 1.2
+                        print(f"     ⚠️  Mittlere BBox-Überlappung: {bbox_overlap_ratio:.3f}")
+                    else:
+                        bbox_score = bbox_overlap_ratio * 0.8
+                        print(f"     ❌ Geringe BBox-Überlappung: {bbox_overlap_ratio:.3f}")
+                    # 4. SAM-KONFIDENZ (10%)
+                    confidence_score = mask_max
+                    # GESAMTSCORE für Gesicht
+                    score = (
+                        area_score * 0.4 +      # 40% Flächenpassung
+                        compactness_score * 0.3 + # 30% Kompaktheit
+                        bbox_score * 0.2 +      # 20% BBox-Überlappung
+                        confidence_score * 0.1   # 10% Konfidenz
+                    )
+                    print(f"     📊 GESICHTS-SCORES für Maske {i+1}:")
+                    print(f"       • Flächen-Score: {area_score:.3f}")
+                    print(f"       • Kompaktheits-Score: {compactness_score:.3f}")
+                    print(f"       • BBox-Überlappungs-Score: {bbox_score:.3f}")
+                    print(f"       • Konfidenz-Score: {confidence_score:.3f}")
+                    print(f"       • GESAMTSCORE: {score:.3f}")
+                # ============================================================
+                # STANDARD-HEURISTIK FÜR ANDERE MODI
+                # ============================================================
+                else:
+                    # Standard Heuristik für focus_change und environment_change
+                    bbox_mask = np.zeros((image.height, image.width), dtype=np.uint8)
+                    bbox_mask[y1:y2, x1:x2] = 1
+                    overlap = np.sum(mask_binary & bbox_mask)
+                    bbox_overlap_ratio = overlap / np.sum(bbox_mask) if np.sum(bbox_mask) > 0 else 0
+                    # Schwerpunkt berechnen
+                    y_coords, x_coords = np.where(mask_binary > 0)
+                    if len(y_coords) > 0:
+                        centroid_y = np.mean(y_coords)
+                        centroid_x = np.mean(x_coords)
+                        centroid_distance = np.sqrt((centroid_x - bbox_center[0])**2 + (centroid_y - bbox_center[1])**2)
+                        normalized_distance = centroid_distance / max(image.width, image.height)
+                    else:
+                        normalized_distance = 1.0
+                    # Flächen-Ratio
+                    area_ratio = mask_area_pixels / bbox_area
+                    area_score = 1.0 - min(abs(area_ratio - 1.0), 1.0)
+                    # Konfidenz
+                    confidence_score = mask_max
+                    # Standard-Score
+                    score = (
+                        bbox_overlap_ratio * 0.4 +
+                        (1.0 - normalized_distance) * 0.25 +
+                        area_score * 0.25 +
+                        confidence_score * 0.1
+                    )
+                    print(f"   📊 STANDARD-SCORES für Maske {i+1}:")
+                    print(f"     • BBox-Überlappung: {bbox_overlap_ratio:.3f}")
+                    print(f"     • Zentrums-Distanz: {centroid_distance if 'centroid_distance' in locals() else 'N/A'}")
+                    print(f"     • Flächen-Ratio: {area_ratio:.3f}")
+                    print(f"     • GESAMTSCORE: {score:.3f}")
                 if score > best_score:
                     best_score = score
                     best_mask_idx = i
+                    print(f"     🏆 Neue beste Maske: Nr. {i+1} mit Score {score:.3f}")
             print(f"✅ Beste Maske ausgewählt: Nr. {best_mask_idx+1} mit Score {best_score:.3f}")
             mask_np = all_masks[best_mask_idx]
             # ============================================================
+            # OPTIMIERTER THRESHOLD
             # ============================================================
             max_val = mask_np.max()
             print(f"   🔍 Maximaler SAM-Konfidenzwert der besten Maske: {max_val:.3f}")
+            if mode == "face_only_change":
+                # Spezieller Threshold für Gesichter
+                if max_val < 0.5:
+                    dynamic_threshold = 0.25
+                    print(f"   ⚠️  SAM ist unsicher für Gesicht (max_val={max_val:.3f} < 0.5)")
+                elif max_val < 0.8:
+                    dynamic_threshold = max_val * 0.65  # Mittlerer Threshold
+                    print(f"   ℹ️  SAM ist mäßig sicher für Gesicht (max_val={max_val:.3f})")
+                else:
+                    dynamic_threshold = max_val * 0.75  # Hoher Threshold
+                    print(f"   ✅ SAM ist sicher für Gesicht (max_val={max_val:.3f} >= 0.8)")
+                print(f"   🎯 Gesichts-Threshold: {dynamic_threshold:.3f}")
             else:
+                # Standard Threshold
+                if max_val < 0.6:
+                    dynamic_threshold = 0.3
+                    print(f"   ⚠️  SAM ist unsicher (max_val={max_val:.3f} < 0.6)")
+                else:
+                    dynamic_threshold = max_val * 0.8
+                    print(f"   ✅ SAM ist sicher (max_val={max_val:.3f} >= 0.6)")
+                print(f"   🎯 Standard-Threshold: {dynamic_threshold:.3f}")
             mask_array = (mask_np > dynamic_threshold).astype(np.uint8) * 255
             # ============================================================
+            # MODUS-SPEZIFISCHES POSTPROCESSING
             # ============================================================
+            print("🔧 SCHRITT 7: MODUS-SPEZIFISCHES POSTPROCESSING")
+            if mode == "face_only_change":
+                print("👤 GESICHTS-SPEZIFISCHES POSTPROCESSING")
+                # 1. Größte zusammenhängende Komponente finden (sollte der Kopf sein)
                 labeled_array, num_features = ndimage.label(mask_array)
+                if num_features > 0:
+                    print(f"   🔍 Gefundene Komponenten: {num_features}")
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
+                    largest_component_idx = np.argmax(sizes) + 1
+                    print(f"   👑 Größte Komponente: Nr. {largest_component_idx} mit {sizes[largest_component_idx-1]:,} Pixel")
+                    # NUR die größte Komponente behalten (der Kopf)
+                    mask_array = np.where(labeled_array == largest_component_idx, mask_array, 0)
+                    # 2. FORMBASIERTE OPTIMIERUNG FÜR KOPF
+                    print("   🎯 Formbasierte Optimierung für Kopf")
+                    # Hole die Region-Eigenschaften für die größte Komponente
+                    labeled_single = np.where(labeled_array == largest_component_idx, 1, 0).astype(np.uint8)
+                    regions = measure.regionprops(labeled_single)
+                    if regions:
+                        region = regions[0]
+                        # Erweiterte Bounding Box für Kopf (etwas größer)
+                        minr, minc, maxr, maxc = region.bbox
+                        head_bbox_height = maxr - minr
+                        head_bbox_width = maxc - minc
+                        # Kopf sollte etwa 1.2-1.5 mal höher als breit sein
+                        aspect_ratio = head_bbox_height / head_bbox_width if head_bbox_width > 0 else 1.0
+                        print(f"   📏 Kopf-BBox: {head_bbox_width}×{head_bbox_height} (Ratio: {aspect_ratio:.2f})")
+                        # Wenn der Kopf zu "flach" ist (z.B. nur Haare), vertikal erweitern
+                        if aspect_ratio < 1.0 and head_bbox_height < bbox_height * 0.8:
+                            print(f"   ⬇️  Kopf zu flach, vertikal erweitern")
+                            expand_y = int((bbox_height * 0.8 - head_bbox_height) / 2)
+                            minr = max(0, minr - expand_y)
+                            maxr = min(mask_array.shape[0], maxr + expand_y)
+                            # Fülle den erweiterten Bereich
+                            mask_array[minr:maxr, minc:maxc] = 255
+                    # 3. MORPHOLOGISCHE OPERATIONEN FÜR SAUBEREN KOPF
+                    print("   ⚙️  Morphologische Operationen für sauberen Kopf")
+                    # Zuerst CLOSE, um kleine Löcher im Kopf zu füllen
+                    kernel_close = np.ones((7, 7), np.uint8)
+                    mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel_close, iterations=1)
+                    print("     • MORPH_CLOSE (7x7) - Löcher im Kopf füllen")
+                    # Dann OPEN, um kleine Ausreißer zu entfernen
+                    kernel_open = np.ones((5, 5), np.uint8)
+                    mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel_open, iterations=1)
+                    print("     • MORPH_OPEN (5x5) - Rauschen entfernen")
+                    # Sanfte Glättung der Kanten
+                    mask_array = cv2.GaussianBlur(mask_array, (5, 5), 1.0)
+                    mask_array = (mask_array > 127).astype(np.uint8) * 255
+                    print("     • GaussianBlur + Re-Threshold - Glatte Kanten")
+                # 4. MASKE ZURÜCK AUF ORIGINALGRÖSSE (nur für face_only_change)
                 print("-" * 60)
                 print("🔄 MASKE VOM AUSSCHNITT ZURÜCK AUF ORIGINALGRÖSSE")
                 temp_mask = Image.fromarray(mask_array).convert("L")
                 print(f"   Maskengröße auf Ausschnitt: {temp_mask.size}")
                 final_mask = Image.new("L", original_image.size, 0)
                 print(f"   Leere Maske in Originalgröße: {final_mask.size}")
                 final_mask.paste(temp_mask, (crop_x1, crop_y1))
                 print(f"   Maskenposition im Original: ({crop_x1}, {crop_y1})")
                 mask_array = np.array(final_mask)
                 print(f"   ✅ Maske zurück auf Originalgröße skaliert: {mask_array.shape}")
                 image = original_image
                 print(f"   🔄 Bild-Referenz wieder auf Original gesetzt: {image.size}")
+            elif mode == "focus_change":
+                print("🎯 FOCUS-CHANGE POSTPROCESSING")
+                mask_array = mask_array.copy()
+                # Größte weiße Komponente behalten (Person)
+                labeled_array, num_features = ndimage.label(mask_array)
+                if num_features > 1:
+                    sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
+                    largest_component = np.argmax(sizes) + 1
+                    mask_array = np.where(labeled_array == largest_component, mask_array, 0)
+                    print(f"   ✅ Behalte größte Person-Komponente ({num_features} → 1 Komponente)")
+                # Maske leicht erweitern für bessere Abdeckung
+                kernel = np.ones((3,3), np.uint8)
+                mask_array = cv2.dilate(mask_array, kernel, iterations=1)
+                print("   ✅ Dilation für bessere Personenabdeckung")
+            elif mode == "environment_change":
+                print("🌳 ENVIRONMENT-CHANGE POSTPROCESSING")
+                mask_array = 255 - mask_array  # Invertiere Maske
+                print("   ✅ Maske invertiert (Person schwarz, Hintergrund weiß)")
+                # Morphologische Operationen für saubere Umgebung
+                kernel = np.ones((5,5), np.uint8)
+                mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel)
+                print("   ✅ MORPH_CLOSE für zusammenhängende Umgebung")
+            # QUALITÄTSKONTROLLE
             white_pixels = np.sum(mask_array > 127)
             total_pixels = mask_array.size
             white_ratio = white_pixels / total_pixels * 100
             print("-" * 60)
             print("📊 MASKEN-STATISTIK (FINAL)")
             print(f"   Weiße Pixel (Veränderungsbereich): {white_pixels:,} ({white_ratio:.1f}%)")
+            print(f"   Schwarze Pixel (Erhaltungsbereich): {total_pixels-white_pixels:,} ({100-white_ratio:.1f}%)")
             print(f"   Gesamtpixel: {total_pixels:,}")
             if mode == "face_only_change":
                 original_face_area = original_bbox_size[0] * original_bbox_size[1]
                 coverage_ratio = white_pixels / original_face_area if original_face_area > 0 else 0
+                print(f"   👤 GESICHTSABDECKUNG: {coverage_ratio:.1%} der ursprünglichen BBox")
+                # Warnungen basierend auf Abdeckung
+                if coverage_ratio < 0.7:
+                    print(f"   ⚠️  WARNUNG: Geringe Gesichtsabdeckung ({coverage_ratio:.1%})")
+                    print(f"   💡 Tipp: BBox könnte zu groß sein oder SAM erkennt Gesicht nicht vollständig")
+                elif coverage_ratio > 1.3:
+                    print(f"   ⚠️  WARNUNG: Sehr hohe Gesichtsabdeckung ({coverage_ratio:.1%})")
+                    print(f"   💡 Tipp: Maske könnte zu viel Hintergrund enthalten")
+                elif 0.8 <= coverage_ratio <= 1.2:
+                    print(f"   ✅ OPTIMALE Gesichtsabdeckung ({coverage_ratio:.1%})")
+            # Zurück zu PIL Image
             mask = Image.fromarray(mask_array).convert("L")
             print("#" * 80)
             print(f"✅ SAM 2 SEGMENTIERUNG ABGESCHLOSSEN")
             print(f"📐 Finale Maskengröße: {mask.size}")
             print(f"🎛️  Verwendeter Modus: {mode}")
+            if mode == "face_only_change" and crop_size is not None:
+                print(f"👤 Bei face_only_change: Crop={crop_size}×{crop_size}px, Heuristik-Score={best_score:.3f}")
+                print(f"👤 Kopfabdeckung: {coverage_ratio:.1%} der BBox")
             print("#" * 80)
             return mask
             traceback.print_exc()
             print("ℹ️ Fallback auf rechteckige Maske")
             return self._create_rectangular_mask(image, bbox_coords, mode)
     def _create_rectangular_mask(self, image, bbox_coords, mode):
         """Fallback: Erstellt rechteckige Maske"""