Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Jan 3

Commit

3cbb0e3

verified ·

1 Parent(s): d916b1d

Update controlnet_module.py

Browse files

Files changed (1) hide show

controlnet_module.py +34 -10

controlnet_module.py CHANGED Viewed

@@ -269,20 +269,44 @@ class ControlNetProcessor:
             print("🖼️  BILDAUFBEREITUNG FÜR SAM 2")
             image_np = np.array(image.convert("RGB"))
-            # KORREKTUR: Immer nur eine BBox verwenden (SAM 2 erwartet genau 1)
             input_boxes = [[[x1, y1, x2, y2]]]
-            print(f"   Konvertiere Bild zu NumPy Array: {image_np.shape}")
-            print(f"   Erstelle EINZIGE Input Box: {input_boxes}")
-            print("   ℹ️  SAM 2 erwartet genau eine BBox pro Vorhersage")
-            print("🎯 SCHRITT 4-5: SAM MIT BOX-PROMPT")
-            print("   Verarbeite Bild mit SAM 2 Processor...")
             inputs = self.sam_processor(
-                image_np,
                 input_boxes=input_boxes,
                 return_tensors="pt"
-            ).to(self.device)
-            print(f"✅ Processor-Ausgabe: {len(inputs)} Elemente")
             # 4. SAM2 Vorhersage
             print("-" * 60)

             print("🖼️  BILDAUFBEREITUNG FÜR SAM 2")
             image_np = np.array(image.convert("RGB"))
+            # Immer nur eine BBox verwenden (SAM 2 erwartet genau 1)
             input_boxes = [[[x1, y1, x2, y2]]]
+            # 1. Punkt in der BBox-Mitte (zur Ünterstützung von SAM damit BBox nicht zu dicht um Kopf gezogen werden muß!)
+            center_x = (x1 + x2) // 2
+            center_y = (y1 + y2) // 2
+            # 2. Punkt im Gesicht (30% höher vom Mittelpunkt)(auch für größere BBox)
+            bbox_height = y2 - y1
+            face_offset = int(bbox_height * 0.3)
+            face_x = center_x
+            face_y = center_y - face_offset
+            face_y = max(y1 + 10, min(face_y, y2 - 10))  # In BBox halten
+            # BEIDE Punkte kombinieren
+            input_points = [[[center_x, center_y], [face_x, face_y]]]  # ZWEI Punkte
+            input_labels = [[1, 1]]  # Beide sind positive Prompts
+            print(f"   🎯 SAM-Prompt: BBox [{x1},{y1},{x2},{y2}]")
+            print(f"   👁️  Punkte: Mitte ({center_x},{center_y}), Gesicht ({face_x},{face_y})")
+            # Aufruf des SAM-Prozessors mit den Variablen. Der Processor verpackt diese Rohdaten
+            # in die für das SAM-Modell erforderlichen Tensoren und speichert sie in inputs.
             inputs = self.sam_processor(
+                image_np,
                 input_boxes=input_boxes,
+                input_points=input_points,    # ZWEI Punkte
+                input_labels=input_labels,    # Zwei Labels
                 return_tensors="pt"
+            ).to(self.device)                 # Ohne .to(self.device) werden die Tensoren standardmäßig im CPU-RAM erzeugt und gespeichert! Da GPU-Fehler!
+            print(f"✅ Processor-Ausgabe: Dictionary mit {len(inputs)} Schlüsseln: {list(inputs.keys())}")
+            print(f"   - 'pixel_values' Shape: {inputs['pixel_values'].shape}")
+            print(f"   - 'input_boxes' Shape: {inputs['input_boxes'].shape}")
+            if 'input_points' in inputs:
+                print(f"   - 'input_points' Shape: {inputs['input_points'].shape}")
             # 4. SAM2 Vorhersage
             print("-" * 60)