Update controlnet_module.py
Browse files- controlnet_module.py +33 -47
controlnet_module.py
CHANGED
|
@@ -134,14 +134,12 @@ class ControlNetProcessor:
|
|
| 134 |
|
| 135 |
# 3. Vorbereiten der Eingabe für SAM2
|
| 136 |
# BBox im Format [x_min, y_min, x_max, y_max] erstellen
|
| 137 |
-
#
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
input_boxes = [[[x1, y1, x2, y2]]] #Dreifach verschachtelt
|
| 141 |
|
| 142 |
-
# Bild
|
| 143 |
inputs = self.sam_processor(
|
| 144 |
-
image_np,
|
| 145 |
input_boxes=input_boxes,
|
| 146 |
return_tensors="pt"
|
| 147 |
).to(self.device)
|
|
@@ -151,56 +149,43 @@ class ControlNetProcessor:
|
|
| 151 |
with torch.no_grad():
|
| 152 |
outputs = self.sam_model(**inputs)
|
| 153 |
|
| 154 |
-
|
| 155 |
# DEBUG: Dimensionen prüfen
|
| 156 |
-
print(f"🔍 Original image size: {image.size}")
|
| 157 |
-
print(f"🔍 Processed image size: {inputs['pixel_values'].shape}")
|
| 158 |
-
print(f"🔍 Output masks shape: {outputs.pred_masks.shape}")
|
| 159 |
-
print(f"🔍 Original_sizes parameter (in inputs): {inputs.get('original_sizes', 'NOT FOUND')}")
|
| 160 |
-
print(f"🔍 Reshaped_input_sizes parameter (in inputs): {inputs.get('reshaped_input_sizes', 'NOT FOUND')}")
|
| 161 |
|
| 162 |
-
#
|
| 163 |
single_mask = outputs.pred_masks[:, :, 0, :, :] # Shape: [1, 1, 256, 256]
|
| 164 |
|
| 165 |
-
|
| 166 |
-
print(f"🔍
|
| 167 |
-
print(f"🔍 outputs.pred_masks dimensions: {outputs.pred_masks.dim()}")
|
| 168 |
-
|
| 169 |
-
# 2. KRITISCHE KORREKTUR: Stelle sicher, dass es 4D bleibt
|
| 170 |
-
# Wenn single_mask 3D ist ([1, 256, 256]), füge Channel-Dimension hinzu
|
| 171 |
-
if single_mask.dim() == 3:
|
| 172 |
-
print("⚠️ Maske ist 3D, füge Channel-Dimension hinzu")
|
| 173 |
-
single_mask = single_mask.unsqueeze(1) # Wird zu [1, 1, 256, 256]
|
| 174 |
-
|
| 175 |
-
print(f"🔍 Final mask shape before post_process: {single_mask.shape}")
|
| 176 |
-
print(f"🔍 Final mask dimensions: {single_mask.dim()}")
|
| 177 |
-
|
| 178 |
-
# 3. Größen-Parameter konvertieren
|
| 179 |
-
original_size = inputs['original_sizes'].cpu().numpy().tolist()[0] # [512, 512]
|
| 180 |
-
input_size = inputs['reshaped_input_sizes'].cpu().numpy().tolist()[0] # [512, 512]
|
| 181 |
-
|
| 182 |
-
# 4. Maske verarbeiten (WICHTIG: original_size muss Tuple/Liste sein)
|
| 183 |
-
mask = self.sam_processor.post_process_masks(
|
| 184 |
-
single_mask,
|
| 185 |
-
original_sizes=[original_size], # [512, 512] als Liste in einer Liste
|
| 186 |
-
reshaped_input_sizes=[input_size] # [512, 512] als Liste in einer Liste
|
| 187 |
-
)[0][0]
|
| 188 |
-
|
| 189 |
|
| 190 |
-
#
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
| 197 |
|
| 198 |
-
#
|
|
|
|
|
|
|
|
|
|
| 199 |
mask_array = np.array(mask)
|
| 200 |
mask_array = self._smooth_mask(mask_array, blur_radius=2)
|
| 201 |
mask = Image.fromarray(mask_array).convert("L")
|
| 202 |
|
| 203 |
-
#
|
| 204 |
if mode == "environment_change":
|
| 205 |
# MODUS 1: Umgebung ändern - Objekt schwarz (erhalten)
|
| 206 |
mask = Image.eval(mask, lambda x: 255 - x)
|
|
@@ -211,7 +196,7 @@ class ControlNetProcessor:
|
|
| 211 |
|
| 212 |
print(f"✅ SAM 2: Präzise Maske erstellt ({mask.size})")
|
| 213 |
return mask
|
| 214 |
-
|
| 215 |
except Exception as e:
|
| 216 |
print(f"⚠️ SAM 2 Fehler (Transformers API): {str(e)[:200]}")
|
| 217 |
print(f"🔍 SAM 2 Model dtype: {self.sam_model.dtype}")
|
|
@@ -221,7 +206,8 @@ class ControlNetProcessor:
|
|
| 221 |
traceback.print_exc()
|
| 222 |
print("ℹ️ Fallback auf rechteckige Maske")
|
| 223 |
return self._create_rectangular_mask(image, bbox_coords, mode)
|
| 224 |
-
|
|
|
|
| 225 |
def _create_rectangular_mask(self, image, bbox_coords, mode):
|
| 226 |
"""Fallback: Erstellt rechteckige Maske"""
|
| 227 |
from PIL import ImageDraw
|
|
|
|
| 134 |
|
| 135 |
# 3. Vorbereiten der Eingabe für SAM2
|
| 136 |
# BBox im Format [x_min, y_min, x_max, y_max] erstellen
|
| 137 |
+
# Dreifach verschachteltes Format: [[[x1, y1, x2, y2]]]
|
| 138 |
+
input_boxes = [[[x1, y1, x2, y2]]]
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
# Original-Bild und BBox-Koordinaten zur Segmentierung vorverarbeiten
|
| 141 |
inputs = self.sam_processor(
|
| 142 |
+
image_np,
|
| 143 |
input_boxes=input_boxes,
|
| 144 |
return_tensors="pt"
|
| 145 |
).to(self.device)
|
|
|
|
| 149 |
with torch.no_grad():
|
| 150 |
outputs = self.sam_model(**inputs)
|
| 151 |
|
|
|
|
| 152 |
# DEBUG: Dimensionen prüfen
|
| 153 |
+
print(f"🔍 Original image size: {image.size}")
|
| 154 |
+
print(f"🔍 Processed image size: {inputs['pixel_values'].shape}")
|
| 155 |
+
print(f"🔍 Output masks shape: {outputs.pred_masks.shape}")
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
# 5. Maske auswählen (erste Maske der ersten Batch-Dimension)
|
| 158 |
single_mask = outputs.pred_masks[:, :, 0, :, :] # Shape: [1, 1, 256, 256]
|
| 159 |
|
| 160 |
+
print(f"🔍 Single mask shape: {single_mask.shape}")
|
| 161 |
+
print(f"🔍 Single mask dimensions: {single_mask.dim()}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
# 6. KRITISCHE KORREKTUR: Direkte Skalierung statt post_process_masks
|
| 164 |
+
import torch.nn.functional as F
|
| 165 |
+
|
| 166 |
+
# Skaliere die 256x256 Rohmaske direkt auf Ihre Zielgröße (image.height, image.width)
|
| 167 |
+
final_mask = F.interpolate(
|
| 168 |
+
single_mask, # Direkt die 256x256 Rohmaske verwenden
|
| 169 |
+
size=(image.height, image.width), # Direkt auf Ihre Zielgröße skalieren
|
| 170 |
+
mode='bilinear',
|
| 171 |
+
align_corners=False
|
| 172 |
+
).squeeze() # Entferne Batch- und Channel-Dimensionen
|
| 173 |
|
| 174 |
+
print(f"🔍 Final mask shape after interpolation: {final_mask.shape}")
|
| 175 |
+
|
| 176 |
+
# 7. In NumPy konvertieren und Schwellenwert anwenden
|
| 177 |
+
mask_np = final_mask.sigmoid().cpu().numpy()
|
| 178 |
+
mask_array = (mask_np > 0.5).astype(np.uint8) * 255
|
| 179 |
|
| 180 |
+
# 8. Zu PIL Image konvertieren
|
| 181 |
+
mask = Image.fromarray(mask_array.squeeze()).convert("L")
|
| 182 |
+
|
| 183 |
+
# 9. Kanten glätten für natürlichere Übergänge
|
| 184 |
mask_array = np.array(mask)
|
| 185 |
mask_array = self._smooth_mask(mask_array, blur_radius=2)
|
| 186 |
mask = Image.fromarray(mask_array).convert("L")
|
| 187 |
|
| 188 |
+
# 10. Modus-spezifische Anpassung (Invertierung)
|
| 189 |
if mode == "environment_change":
|
| 190 |
# MODUS 1: Umgebung ändern - Objekt schwarz (erhalten)
|
| 191 |
mask = Image.eval(mask, lambda x: 255 - x)
|
|
|
|
| 196 |
|
| 197 |
print(f"✅ SAM 2: Präzise Maske erstellt ({mask.size})")
|
| 198 |
return mask
|
| 199 |
+
|
| 200 |
except Exception as e:
|
| 201 |
print(f"⚠️ SAM 2 Fehler (Transformers API): {str(e)[:200]}")
|
| 202 |
print(f"🔍 SAM 2 Model dtype: {self.sam_model.dtype}")
|
|
|
|
| 206 |
traceback.print_exc()
|
| 207 |
print("ℹ️ Fallback auf rechteckige Maske")
|
| 208 |
return self._create_rectangular_mask(image, bbox_coords, mode)
|
| 209 |
+
|
| 210 |
+
|
| 211 |
def _create_rectangular_mask(self, image, bbox_coords, mode):
|
| 212 |
"""Fallback: Erstellt rechteckige Maske"""
|
| 213 |
from PIL import ImageDraw
|