Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Dec 28, 2025

Commit

a850477

verified ·

1 Parent(s): 13851f6

Update controlnet_module.py

Browse files

Files changed (1) hide show

controlnet_module.py +54 -17

controlnet_module.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
 from controlnet_aux import OpenposeDetector
-from PIL import Image, ImageFilter
 import random
 import cv2
 import numpy as np
@@ -160,6 +160,14 @@ class ControlNetProcessor:
                 cropped_image = image.crop((crop_x1, crop_y1, crop_x2, crop_y2))
                 print(f"   ✅ Ausschnitt erstellt: {cropped_image.size}")
                 # BBox-Koordinaten relativ zum Ausschnitt neu berechnen
                 rel_x1 = x1 - crop_x1
                 rel_y1 = y1 - crop_y1
@@ -173,7 +181,7 @@ class ControlNetProcessor:
                 image = cropped_image
                 x1, y1, x2, y2 = rel_x1, rel_y1, rel_x2, rel_y2
-                print("   🔄 SAM wird auf Ausschnitt (nicht Vollbild) ausgeführt")
             # ============================================================
             # GEMEINSAME SAM-LOGIK FÜR ALLE MODI
@@ -224,24 +232,53 @@ class ControlNetProcessor:
             # 6. In NumPy konvertieren und Schwellenwert anwenden
             mask_np = final_mask.sigmoid().cpu().numpy()
             print(f"   Nach Sigmoid und CPU: {mask_np.shape}, Wertebereich: [{mask_np.min():.3f}, {mask_np.max():.3f}]")
-            # VERWENDEN SIE:
-            # 1. Finde den maximalen Wert in mask_np
             max_val = mask_np.max()
             print(f"   🔍 Maximaler SAM-Konfidenzwert: {max_val:.3f}")
-            # 2. Setze Threshold auf 80% des Maximalwerts (oder einen anderen Prozentsatz)
-            dynamic_threshold = max_val * 0.8
-            print(f"   🎯 Dynamischer Threshold: {dynamic_threshold:.3f} (80% von Maximum)")
-            # 3. Anwenden
-            mask_array = (mask_np > dynamic_threshold).astype(np.uint8) * 255
-            #mask_array = (mask_np > 0.9).astype(np.uint8) * 255
             unique_vals = np.unique(mask_array)
-            print(f"   Nach Threshold (0.9): {mask_array.shape}, Unique Werte: {unique_vals}")
             # 7. BEIDE MASKEN ERSTELLEN (vor Nachbearbeitung)
             original_mask_array = mask_array.copy()        # Person weiß (255), Hintergrund schwarz (0)
             inverted_mask_array = 255 - mask_array         # Person schwarz (0), Hintergrund weiß (255)
@@ -330,7 +367,7 @@ class ControlNetProcessor:
                 if num_features > 0:
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
-                    print(f"   Größen der weißen Komponenten auf AUSSCHNITT: {sizes}")
                     if num_features > 1:
                         # WICHTIG: Für Gesicht nehmen wir die GRÖSSTE Komponente im AUSSCHNITT

 import torch
 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
 from controlnet_aux import OpenposeDetector
+from PIL import Image, ImageFilter, ImageEnhance  # NEU: ImageEnhance für Kontrast
 import random
 import cv2
 import numpy as np
                 cropped_image = image.crop((crop_x1, crop_y1, crop_x2, crop_y2))
                 print(f"   ✅ Ausschnitt erstellt: {cropped_image.size}")
+                # ============================================================
+                # NEU: KONTRASTVERSTÄRKUNG FÜR BESSERE SAM-ERKENNUNG
+                # ============================================================
+                print("   🔍 Wende Kontrastverstärkung an für bessere Segmentierung...")
+                contrast_enhancer = ImageEnhance.Contrast(cropped_image)
+                cropped_image = contrast_enhancer.enhance(1.5)  # 50% mehr Kontrast
+                print("   ✅ Kontrast um 50% erhöht")
                 # BBox-Koordinaten relativ zum Ausschnitt neu berechnen
                 rel_x1 = x1 - crop_x1
                 rel_y1 = y1 - crop_y1
                 image = cropped_image
                 x1, y1, x2, y2 = rel_x1, rel_y1, rel_x2, rel_y2
+                print("   🔄 SAM wird auf kontrastverstärktem Ausschnitt (nicht Vollbild) ausgeführt")
             # ============================================================
             # GEMEINSAME SAM-LOGIK FÜR ALLE MODI
             # 6. In NumPy konvertieren und Schwellenwert anwenden
             mask_np = final_mask.sigmoid().cpu().numpy()
             print(f"   Nach Sigmoid und CPU: {mask_np.shape}, Wertebereich: [{mask_np.min():.3f}, {mask_np.max():.3f}]")
+            # ============================================================
+            # KRITISCH: DYNAMISCHER THRESHOLD FÜR UNSICHERE SAM-VORHERSAGEN
+            # ============================================================
             max_val = mask_np.max()
             print(f"   🔍 Maximaler SAM-Konfidenzwert: {max_val:.3f}")
+            # NEUE LOGIK: Unterscheidung basierend auf SAM-Konfidenz
+            if max_val < 0.6:
+                # Fall: SAM ist unsicher (wie in Ihrem Log: max_val=0.505)
+                # Verwende festen, niedrigen Threshold
+                dynamic_threshold = 0.2  # Sehr niedrig für unsichere Vorhersagen
+                print(f"   ⚠️  SAM ist unsicher (max_val={max_val:.3f} < 0.6)")
+                print(f"   🎯 Verwende festen niedrigen Threshold: {dynamic_threshold:.3f}")
+            else:
+                # Fall: SAM ist sicher
+                # Verwende prozentualen Threshold basierend auf Maximum
+                dynamic_threshold = max_val * 0.8
+                print(f"   ✅ SAM ist sicher (max_val={max_val:.3f} >= 0.6)")
+                print(f"   🎯 Dynamischer Threshold: {dynamic_threshold:.3f} (80% von Maximum)")
+            mask_array = (mask_np > dynamic_threshold).astype(np.uint8) * 255
             unique_vals = np.unique(mask_array)
+            print(f"   Nach Threshold ({dynamic_threshold:.3f}): {mask_array.shape}, Unique Werte: {unique_vals}")
+            # ============================================================
+            # NEU: VORFILTERUNG FÜR KLEINE KOMPONENTEN (RAUSCHEN ENTFERNEN)
+            # ============================================================
+            if np.sum(mask_array > 0) > 0:  # Nur wenn weiße Pixel existieren
+                labeled_array, num_features = ndimage.label(mask_array)
+                if num_features > 1:
+                    print(f"   🧹 Vorfilterung: Gefundene Komponenten vor Filterung: {num_features}")
+                    sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
+                    min_size = 1000  # Minimale Größe für eine sinnvolle Komponente
+                    # Zähle, wie viele Komponenten die Mindestgröße erreichen
+                    valid_components = 0
+                    for i in range(1, num_features + 1):
+                        if sizes[i-1] >= min_size:
+                            valid_components += 1
+                        else:
+                            # Entferne kleine Komponenten (Rauschen)
+                            mask_array = np.where(labeled_array == i, 0, mask_array)
+                    print(f"   ✅ Entferne kleine Komponenten (<{min_size}px): {num_features} → {valid_components} Komponenten")
             # 7. BEIDE MASKEN ERSTELLEN (vor Nachbearbeitung)
             original_mask_array = mask_array.copy()        # Person weiß (255), Hintergrund schwarz (0)
             inverted_mask_array = 255 - mask_array         # Person schwarz (0), Hintergrund weiß (255)
                 if num_features > 0:
                     sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
+                    print(f"   Größen der weißen Komponenten auf AUSSCHNITT: {sizes[:10]}...")  # Nur erste 10 anzeigen
                     if num_features > 1:
                         # WICHTIG: Für Gesicht nehmen wir die GRÖSSTE Komponente im AUSSCHNITT