Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Jan 4

Commit

a85b1a7

verified ·

1 Parent(s): 284f90e

Update sam_module.py

Browse files

Files changed (1) hide show

sam_module.py +137 -147

sam_module.py CHANGED Viewed

@@ -111,33 +111,33 @@ def create_sam_mask(self, image, bbox_coords, mode):
                 bbox_mask = np.zeros((image.height, image.width), dtype=np.uint8)
                 bbox_mask[y1:y2, x1:x2] = 1
-                    overlap = np.sum(mask_binary & bbox_mask)
-                    bbox_overlap_ratio = overlap / np.sum(bbox_mask) if np.sum(bbox_mask) > 0 else 0
-                    # Schwerpunkt berechnen
-                    y_coords, x_coords = np.where(mask_binary > 0)
-                    if len(y_coords) > 0:
-                        centroid_y = np.mean(y_coords)
-                        centroid_x = np.mean(x_coords)
-                        centroid_distance = np.sqrt((centroid_x - bbox_center[0])**2 + (centroid_y - bbox_center[1])**2)
-                        normalized_distance = centroid_distance / max(image.width, image.height)
-                    else:
-                        normalized_distance = 1.0
-                    # Flächen-Ratio
-                    area_ratio = mask_area_pixels / bbox_area
-                    area_score = 1.0 - min(abs(area_ratio - 1.0), 1.0)
-                    # Konfidenz
-                    confidence_score = mask_max
-                    # Standard-Score
-                    score = (
-                        bbox_overlap_ratio * 0.4 +
-                        (1.0 - normalized_distance) * 0.25 +
-                        area_score * 0.25 +
-                        confidence_score * 0.1
-                    )
                     print(f"   📊 STANDARD-SCORES für Maske {i+1}:")
                     print(f"     • BBox-Überlappung: {bbox_overlap_ratio:.3f}")
@@ -152,150 +152,140 @@ def create_sam_mask(self, image, bbox_coords, mode):
             print(f"✅ Beste Maske ausgewählt: Nr. {best_mask_idx+1} mit Score {best_score:.3f}")
-            # Beste Maske verwenden
             mask_np = all_masks[best_mask_idx]
-    # Für environment_change: Originalbildgröße beibehalten
-    if image.size != original_image.size:
-        print(f"   ⚠️  Bildgröße angepasst: {image.size} → {original_image.size}")
-        temp_mask = Image.fromarray(mask_array).convert("L")
-        temp_mask = temp_mask.resize(original_image.size, Image.Resampling.NEAREST)
-        mask_array = np.array(temp_mask)
-        print(f"   ✅ Maske auf Originalgröße skaliert: {mask_array.shape}")
-    # DEBUG: Zustand VOR der Invertierung
-    print("🔍 DEBUG VOR INVERTIERUNG:")
-    print(f"   mask_array Min/Max: {mask_array.min()}/{mask_array.max()}")
-    print(f"   Weiße Pixel (vorher): {np.sum(mask_array > 127)}")
-    print(f"   Schwarze Pixel (vorher): {np.sum(mask_array <= 127)}")
-    # Maske invertieren (Person wird schwarz, Hintergrund weiß)
-    mask_array = 255 - mask_array
-    print("   ✅ Maske invertiert (Person schwarz, Hintergrund weiß)")
-    # DEBUG: Zustand NACH der Invertierung
-    print("🔍 DEBUG NACH INVERTIERUNG:")
-    print(f"   mask_array Min/Max: {mask_array.min()}/{mask_array.max()}")
-    print(f"   Weiße Pixel (Hintergrund): {np.sum(mask_array > 127)}")
-    print(f"   Schwarze Pixel (Person): {np.sum(mask_array <= 127)}")
-    # Weiße Punkte in der Person (schwarz) entfernen
-    print("🧹 Entferne weiße Punkte in der Person...")
-    kernel_open = np.ones((3, 3), np.uint8)
-    mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel_open, iterations=3)
-    print("   ✅ MORPH_OPEN entfernt weiße Punkte in der Person")
-    # DEBUG nach MORPH_OPEN
-    print(f"   Nach MORPH_OPEN - Weiße Pixel: {np.sum(mask_array > 127)}")
-    # Morphologische Operationen für saubere Umgebung
-    print("🔧 Verbessere Umgebungsmaske...")
-    kernel_close = np.ones((5, 5), np.uint8)
-    mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel_close)
-    print("   ✅ MORPH_CLOSE für zusammenhängende Umgebung")
-    # DEBUG nach MORPH_CLOSE
-    print(f"   Nach MORPH_CLOSE - Weiße Pixel: {np.sum(mask_array > 127)}")
-    # Weiche Ränder für bessere Integration der Person
-    print("🌈 Erstelle weiche Übergänge...")
-    mask_array = cv2.GaussianBlur(mask_array, (9, 9), 2.0)
-    print("   ✅ Gaussian Blur für weiche Übergänge")
-    # DEBUG nach Gaussian Blur
-    print(f"   Nach Gaussian Blur - Min/Max: {mask_array.min()}/{mask_array.max()}")
-    print(f"   Nach Gaussian Blur - dtype: {mask_array.dtype}")
-    # Gamma-Korrektur für präzisere Ränder
-    print("🎛️  Wende Gamma-Korrektur an...")
-    mask_array = mask_array.astype(np.float32) / 255.0
-    print(f"   Konvertiert zu Float32: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
-    mask_array = np.clip(mask_array, 0.0, 1.0)
-    mask_array = mask_array ** 0.85  # Gamma-Korrektur
-    print(f"   Nach Gamma 0.85: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
-    mask_array = (mask_array * 255).astype(np.uint8)
-    print("   ✅ Gamma-Korrektur (0.85) gegen milchige Ränder")
-    # FINALE QUALITÄTSKONTROLLE
-    print("-" * 60)
-    print("📊 FINALE MASKEN-STATISTIK (ENVIRONMENT_CHANGE)")
-    white_pixels = np.sum(mask_array > 127)
-    black_pixels = np.sum(mask_array <= 127)
-    total_pixels = mask_array.size
-    white_ratio = white_pixels / total_pixels * 100
-    black_ratio = black_pixels / total_pixels * 100
-    print(f"   Weiße Pixel (HINTERGRUND - Veränderung): {white_pixels:,} ({white_ratio:.1f}%)")
-    print(f"   Schwarze Pixel (PERSON - Erhaltung): {black_pixels:,} ({black_ratio:.1f}%)")
-    print(f"   Gesamtpixel: {total_pixels:,}")
-    # Warnungen basierend auf Verhältnis
-    if white_ratio < 30:
-        print(f"   ⚠️  WARNUNG: Sehr wenig Hintergrund ({white_ratio:.1f}%)")
-        print(f"   ℹ️  Das könnte bedeuten, dass die Person zu groß segmentiert wurde")
-    elif white_ratio > 90:
-        print(f"   ⚠️  WARNUNG: Sehr viel Hintergrund ({white_ratio:.1f}%)")
-        print(f"   ℹ️  Das könnte bedeuten, dass die Person zu klein segmentiert wurde")
-    elif 50 <= white_ratio <= 80:
-        print(f"   ✅ OPTIMALES Verhältnis ({white_ratio:.1f}%)")
-    else:
-        print(f"   ℹ️  Normales Verhältnis ({white_ratio:.1f}%)")
-    # Zurück zu PIL Image
-    mask = Image.fromarray(mask_array).convert("L")
-    print(f"   Finale Maskengröße: {mask.size}")
-    print("-" * 60)
-            # ... existierende environment_change Logik hier komplett ...
-            # (wird aus dem Original übernommen, nicht verändert)
-            # WICHTIG: Du musst den environment_change Code hier einfügen
-            # von Zeile ~175 bis ~250 aus dem Original
-            # Beispiel-Struktur (vereinfacht):
-            image_np = np.array(image.convert("RGB"))
-            input_boxes = [[[x1, y1, x2, y2]]]
-            # KEINE Punkte für environment_change
-            inputs = self.sam_processor(
-                image_np,
-                input_boxes=input_boxes,
-                return_tensors="pt"
-            ).to(self.device)
-            with torch.no_grad():
-                outputs = self.sam_model(**inputs)
-            # Nur beste Maske verwenden und auf 512x512 skalieren
-            best_mask = outputs.pred_masks[:, :, 0, :, :]  # Erste Maske nehmen
-            resized_mask = F.interpolate(
-                best_mask,
-                size=(512, 512),  # Direkt auf ControlNet-Zielgröße
-                mode='bilinear',
-                align_corners=False
-            ).squeeze()
-            mask_np = resized_mask.sigmoid().cpu().numpy()
-            # Invertieren für environment_change
-            threshold = 0.5
-            mask_array = (mask_np > threshold).astype(np.uint8) * 255
-            mask_array = 255 - mask_array  # Invertieren
-            # Auf Originalgröße für Rückgabe
             mask = Image.fromarray(mask_array).convert("L")
-            mask = mask.resize(original_image.size, Image.Resampling.NEAREST)
-            return mask, raw_mask  # raw_mask gleiche wie finale Maske
         # ============================================================
         # BLOCK 2: FOCUS_CHANGE (KORRIGIERTE VERSION)

                 bbox_mask = np.zeros((image.height, image.width), dtype=np.uint8)
                 bbox_mask[y1:y2, x1:x2] = 1
+                overlap = np.sum(mask_binary & bbox_mask)
+                bbox_overlap_ratio = overlap / np.sum(bbox_mask) if np.sum(bbox_mask) > 0 else 0
+                # Schwerpunkt berechnen
+                y_coords, x_coords = np.where(mask_binary > 0)
+                if len(y_coords) > 0:
+                    centroid_y = np.mean(y_coords)
+                    centroid_x = np.mean(x_coords)
+                    centroid_distance = np.sqrt((centroid_x - bbox_center[0])**2 + (centroid_y - bbox_center[1])**2)
+                    normalized_distance = centroid_distance / max(image.width, image.height)
+                else:
+                    normalized_distance = 1.0
+                # Flächen-Ratio
+                area_ratio = mask_area_pixels / bbox_area
+                area_score = 1.0 - min(abs(area_ratio - 1.0), 1.0)
+                # Konfidenz
+                confidence_score = mask_max
+                # Standard-Score
+                score = (
+                    bbox_overlap_ratio * 0.4 +
+                    (1.0 - normalized_distance) * 0.25 +
+                    area_score * 0.25 +
+                    confidence_score * 0.1
+                )
                     print(f"   📊 STANDARD-SCORES für Maske {i+1}:")
                     print(f"     • BBox-Überlappung: {bbox_overlap_ratio:.3f}")
             print(f"✅ Beste Maske ausgewählt: Nr. {best_mask_idx+1} mit Score {best_score:.3f}")
+            # Beste Maske verwenden - mask_np beste Maske
             mask_np = all_masks[best_mask_idx]
+            max_val = mask_np.max()
+            print(f"   🔍 Maximaler SAM-Konfidenzwert der besten Maske: {max_val:.3f}")
+            if max_val < 0.6:
+                dynamic_threshold = 0.3
+                print(f"   ⚠️  SAM ist unsicher (max_val={max_val:.3f} < 0.6)")
+            else:
+                dynamic_threshold = max_val * 0.8
+                print(f"   ✅ SAM ist sicher (max_val={max_val:.3f} >= 0.6)")
+            # Binärmaske erstellen (256x256)
+            mask_array = (mask_np > dynamic_threshold).astype(np.uint8) * 255
+            # Fallback bei leerer Maske, der höchste Wert ist 0 also schwarz
+            if mask_array.max() == 0:
+                print("   ⚠️  Maske leer, erstelle rechteckige Fallback-Maske")
+                mask_array = np.zeros((512, 512), dtype=np.uint8)
+                # BBox auf 512x512 skalieren für Fallback
+                scale_x = 512 / image.width
+                scale_y = 512 / image.height
+                fb_x1 = int(x1 * scale_x)
+                fb_y1 = int(y1 * scale_y)
+                fb_x2 = int(x2 * scale_x)
+                fb_y2 = int(y2 * scale_y)
+                cv2.rectangle(mask_array, (fb_x1, fb_y1), (fb_x2, fb_y2), 255, -1)
+            # Damit wird die Rohmaske für die UI-Anzeige gespeichert
+            raw_mask_array = mask_array.copy()
+            print("🌳 ENVIRONMENT-CHANGE POSTPROCESSING")
+            # Originalbildgröße beibehalten
+            if image.size != original_image.size:
+                print(f"   ⚠️  Bildgröße angepasst: {image.size} → {original_image.size}")
+                temp_mask = Image.fromarray(mask_array).convert("L")
+                temp_mask = temp_mask.resize(original_image.size, Image.Resampling.NEAREST)
+                mask_array = np.array(temp_mask)
+                print(f"   ✅ Maske auf Originalgröße skaliert: {mask_array.shape}")
+            # Maske invertieren (Person wird schwarz, Hintergrund weiß)
+            threshold = 0.5
+            mask_array = (mask_np > threshold).astype(np.uint8) * 255
+            mask_array = 255 - mask_array
+            print("   ✅ Maske invertiert (Person schwarz, Hintergrund
+            # Weiße Punkte in der Person (schwarz) entfernen
+            print("🧹 Entferne weiße Punkte in der Person...")
+            kernel_open = np.ones((3, 3), np.uint8)
+            mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel_open, iterations=3)
+            print("   ✅ MORPH_OPEN entfernt weiße Punkte in der Person")
+            # DEBUG nach MORPH_OPEN
+            print(f"   Nach MORPH_OPEN - Weiße Pixel: {np.sum(mask_array > 127)}")
+            # Morphologische Operationen für saubere Umgebung
+            print("🔧 Verbessere Umgebungsmaske...")
+            kernel_close = np.ones((5, 5), np.uint8)
+            mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel_close)
+            print("   ✅ MORPH_CLOSE für zusammenhängende Umgebung")
+            # DEBUG nach MORPH_CLOSE
+            print(f"   Nach MORPH_CLOSE - Weiße Pixel: {np.sum(mask_array > 127)}")
+            # Weiche Ränder für bessere Integration der Person
+            print("🌈 Erstelle weiche Übergänge...")
+            mask_array = cv2.GaussianBlur(mask_array, (9, 9), 2.0)
+            print("   ✅ Gaussian Blur für weiche Übergänge")
+            # DEBUG nach Gaussian Blur
+            print(f"   Nach Gaussian Blur - Min/Max: {mask_array.min()}/{mask_array.max()}")
+            print(f"   Nach Gaussian Blur - dtype: {mask_array.dtype}")
+            # Gamma-Korrektur für präzisere Ränder
+            print("🎛️  Wende Gamma-Korrektur an...")
+            mask_array = mask_array.astype(np.float32) / 255.0
+            print(f"   Konvertiert zu Float32: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
+            mask_array = np.clip(mask_array, 0.0, 1.0)
+            mask_array = mask_array ** 0.85  # Gamma-Korrektur
+            print(f"   Nach Gamma 0.85: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
+            mask_array = (mask_array * 255).astype(np.uint8)
+            print("   ✅ Gamma-Korrektur (0.85) gegen milchige Ränder")
+            # FINALE QUALITÄTSKONTROLLE
+            print("-" * 60)
+            print("📊 FINALE MASKEN-STATISTIK (ENVIRONMENT_CHANGE)")
+            white_pixels = np.sum(mask_array > 127)
+            black_pixels = np.sum(mask_array <= 127)
+            total_pixels = mask_array.size
+            white_ratio = white_pixels / total_pixels * 100
+            black_ratio = black_pixels / total_pixels * 100
+            print(f"   Weiße Pixel (HINTERGRUND - Veränderung): {white_pixels:,} ({white_ratio:.1f}%)")
+            print(f"   Schwarze Pixel (PERSON - Erhaltung): {black_pixels:,} ({black_ratio:.1f}%)")
+            print(f"   Gesamtpixel: {total_pixels:,}")
+            # Warnungen basierend auf Verhältnis
+            if white_ratio < 30:
+                print(f"   ⚠️  WARNUNG: Sehr wenig Hintergrund ({white_ratio:.1f}%)")
+                print(f"   ℹ️  Das könnte bedeuten, dass die Person zu groß segmentiert wurde")
+            elif white_ratio > 90:
+                print(f"   ⚠️  WARNUNG: Sehr viel Hintergrund ({white_ratio:.1f}%)")
+                print(f"   ℹ️  Das könnte bedeuten, dass die Person zu klein segmentiert wurde")
+            elif 50 <= white_ratio <= 80:
+                print(f"   ✅ OPTIMALES Verhältnis ({white_ratio:.1f}%)")
+            else:
+                print(f"   ℹ️  Normales Verhältnis ({white_ratio:.1f}%)")
+            # Zurück zu PIL Image
             mask = Image.fromarray(mask_array).convert("L")
+            raw_mask = Image.fromarray(raw_mask_array).convert("L")
+            print("#" * 80)
+            print(f"✅ SAM 2 SEGMENTIERUNG ABGESCHLOSSEN")
+            print(f"📐 Finale Maskengröße: {mask.size}")
+            print(f"🎛️  Verwendeter Modus: {mode}")
+            print("#" * 80)
+            return mask, raw_mask  #in mask steht die invertierte nachbearbeitete Maske, in raw_mask die Rohmaske
         # ============================================================
         # BLOCK 2: FOCUS_CHANGE (KORRIGIERTE VERSION)