Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on 12 days ago

Commit

73f9907

verified ·

1 Parent(s): a8a1956

Update sam_module.py

Browse files

Files changed (1) hide show

sam_module.py +13 -13

sam_module.py CHANGED Viewed

@@ -1,7 +1,6 @@
 def create_sam_mask(self, image, bbox_coords, mode):
     """
     ERWEITERTE Funktion: Erstellt präzise Maske mit SAM 2
-    Restrukturierte Version mit klaren Blöcken pro Modus
     """
     try:
         print("#" * 80)
@@ -40,11 +39,11 @@ def create_sam_mask(self, image, bbox_coords, mode):
             # Der Prozessor von SAM erwartet ein NumPy-Array kein PIL
             image_np = np.array(image.convert("RGB"))
-            # Immer nur eine BBox verwenden (SAM 2 erwartet genau 1)
             input_boxes = [[[x1, y1, x2, y2]]]
-            # Aufruf des SAM-Prozessors mit den Variablen. Der Processor verpackt diese Rohdaten
-            # in die für das SAM-Modell erforderlichen Tensoren und speichert sie in inputs.
             inputs = self.sam_processor(
                 image_np,
                 input_boxes=input_boxes,
@@ -58,7 +57,7 @@ def create_sam_mask(self, image, bbox_coords, mode):
             print("🧠 SAM 2 INFERENZ (Vorhersage)")
             with torch.no_grad():
                 print("   Führe Vorhersage durch...")
-                outputs = self.sam_model(**inputs)
                 print(f"✅ Vorhersage abgeschlossen")
                 print(f"   Anzahl der Vorhersagemasken: {outputs.pred_masks.shape[2]}")
@@ -77,8 +76,8 @@ def create_sam_mask(self, image, bbox_coords, mode):
                     align_corners=False
              ).squeeze()
-            mask_np = resized_mask.sigmoid().cpu().numpy()
-            all_masks.append(mask_np)
             bbox_center = ((x1 + x2) // 2, (y1 + y2) // 2)
@@ -195,7 +194,7 @@ def create_sam_mask(self, image, bbox_coords, mode):
                 fb_x2 = int(x2 * scale_x)
                 fb_y2 = int(y2 * scale_y)
-                # Schwarzes Rechteck für Person
                 cv2.rectangle(mask_array, (fb_x1, fb_y1), (fb_x2, fb_y2), 0, -1)
             # Damit wird die Rohmaske für die UI-Anzeige gespeichert
@@ -203,7 +202,8 @@ def create_sam_mask(self, image, bbox_coords, mode):
             print("🌳 ENVIRONMENT-CHANGE POSTPROCESSING")
-            # Originalbildgröße beibehalten
             if image.size != original_image.size:
                 print(f"   ⚠️  Bildgröße angepasst: {image.size} → {original_image.size}")
                 temp_mask = Image.fromarray(mask_array).convert("L")
@@ -224,7 +224,7 @@ def create_sam_mask(self, image, bbox_coords, mode):
             # DEBUG nach MORPH_OPEN
             print(f"   Nach MORPH_OPEN - Weiße Pixel: {np.sum(mask_array > 127)}")
-            # Morphologische Operationen für saubere Umgebung
             print("🔧 Verbessere Umgebungsmaske...")
             kernel_close = np.ones((5, 5), np.uint8)
             mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel_close)
@@ -235,7 +235,7 @@ def create_sam_mask(self, image, bbox_coords, mode):
             # Weiche Ränder für bessere Integration der Person
             print("🌈 Erstelle weiche Übergänge...")
-            mask_array = cv2.GaussianBlur(mask_array, (9, 9), 2.0)
             print("   ✅ Gaussian Blur für weiche Übergänge")
             # DEBUG nach Gaussian Blur
@@ -247,8 +247,8 @@ def create_sam_mask(self, image, bbox_coords, mode):
             mask_array = mask_array.astype(np.float32) / 255.0
             print(f"   Konvertiert zu Float32: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
-            mask_array = np.clip(mask_array, 0.0, 1.0)
-            mask_array = mask_array ** 0.85  # Gamma-Korrektur
             print(f"   Nach Gamma 0.85: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
             mask_array = (mask_array * 255).astype(np.uint8)

 def create_sam_mask(self, image, bbox_coords, mode):
     """
     ERWEITERTE Funktion: Erstellt präzise Maske mit SAM 2
     """
     try:
         print("#" * 80)
             # Der Prozessor von SAM erwartet ein NumPy-Array kein PIL
             image_np = np.array(image.convert("RGB"))
+            # Packt die BBox-Koordinaten in eine 3D-Liste
             input_boxes = [[[x1, y1, x2, y2]]]
+            # Aufruf des SAM-Prozessors mit Originalbild in Form NumPy-Array und BBox.Der Processor verarbeitet Bild und BBox
+            # in die für SAM erforderlichen Tensoren und speichert sie in inputs.
             inputs = self.sam_processor(
                 image_np,
                 input_boxes=input_boxes,
             print("🧠 SAM 2 INFERENZ (Vorhersage)")
             with torch.no_grad():
                 print("   Führe Vorhersage durch...")
+                outputs = self.sam_model(**inputs)   #führt die Segmentierung mit SAM aus
                 print(f"✅ Vorhersage abgeschlossen")
                 print(f"   Anzahl der Vorhersagemasken: {outputs.pred_masks.shape[2]}")
                     align_corners=False
              ).squeeze()
+            mask_np = resized_mask.sigmoid().cpu().numpy(). #wandelt Modellausgaben in Wahrscheinlichkeiten und bewegt Daten von GPU nach CPU
+            all_masks.append(mask_np) #fügt die aktuelle Maske der Liste all_masks hinzu
             bbox_center = ((x1 + x2) // 2, (y1 + y2) // 2)
                 fb_x2 = int(x2 * scale_x)
                 fb_y2 = int(y2 * scale_y)
+                # Schwarzes Rechteck für Person bzw. BBox
                 cv2.rectangle(mask_array, (fb_x1, fb_y1), (fb_x2, fb_y2), 0, -1)
             # Damit wird die Rohmaske für die UI-Anzeige gespeichert
             print("🌳 ENVIRONMENT-CHANGE POSTPROCESSING")
+            # Konvertierung zu PIL, hochskalieren auf Originalgröße (korrekte Überlagerung mit O-Bild),
+            # Konvertierung NumPy für weitere Verarbeitung da mathematisch korrekter als PIL.
             if image.size != original_image.size:
                 print(f"   ⚠️  Bildgröße angepasst: {image.size} → {original_image.size}")
                 temp_mask = Image.fromarray(mask_array).convert("L")
             # DEBUG nach MORPH_OPEN
             print(f"   Nach MORPH_OPEN - Weiße Pixel: {np.sum(mask_array > 127)}")
+            # Morphologische Operationen für saubere Umgebung - entfernt schwarze Pixel aus Umgebung
             print("🔧 Verbessere Umgebungsmaske...")
             kernel_close = np.ones((5, 5), np.uint8)
             mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel_close)
             # Weiche Ränder für bessere Integration der Person
             print("🌈 Erstelle weiche Übergänge...")
+            mask_array = cv2.GaussianBlur(mask_array, (9, 9), 2.0) #2.0 bestimmt wie stark die Unschärfe ist
             print("   ✅ Gaussian Blur für weiche Übergänge")
             # DEBUG nach Gaussian Blur
             mask_array = mask_array.astype(np.float32) / 255.0
             print(f"   Konvertiert zu Float32: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
+            mask_array = np.clip(mask_array, 0.0, 1.0).  #begrenzt alle Werte auf 0 und 1
+            mask_array = mask_array ** 0.85  # Gamma-Korrektur Werte > 0.5 werden abgedunkelt, <0.5 aufgehellt-erzeugt natürliche Maskenübergänge
             print(f"   Nach Gamma 0.85: Min={mask_array.min():.3f}, Max={mask_array.max():.3f}")
             mask_array = (mask_array * 255).astype(np.uint8)