Update sam_module.py
Browse files- sam_module.py +421 -1
sam_module.py
CHANGED
|
@@ -38,7 +38,7 @@ def create_sam_mask(self, image, bbox_coords, mode):
|
|
| 38 |
print("-" * 60)
|
| 39 |
|
| 40 |
|
| 41 |
-
#
|
| 42 |
image_np = np.array(image.convert("RGB"))
|
| 43 |
|
| 44 |
|
|
@@ -531,6 +531,425 @@ def create_sam_mask(self, image, bbox_coords, mode):
|
|
| 531 |
# ============================================================
|
| 532 |
# BLOCK 3: FACE_ONLY_CHANGE
|
| 533 |
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
elif mode == "face_only_change":
|
| 535 |
print("-" * 60)
|
| 536 |
print("👤 MODUS: FACE_ONLY_CHANGE")
|
|
@@ -572,4 +991,5 @@ def create_sam_mask(self, image, bbox_coords, mode):
|
|
| 572 |
fallback_mask = fallback_mask.resize(original_image.size, Image.Resampling.NEAREST)
|
| 573 |
|
| 574 |
return fallback_mask, fallback_mask
|
|
|
|
| 575 |
|
|
|
|
| 38 |
print("-" * 60)
|
| 39 |
|
| 40 |
|
| 41 |
+
# Der Prozessor von SAM erwartet ein NumPy-Array kein PIL
|
| 42 |
image_np = np.array(image.convert("RGB"))
|
| 43 |
|
| 44 |
|
|
|
|
| 531 |
# ============================================================
|
| 532 |
# BLOCK 3: FACE_ONLY_CHANGE
|
| 533 |
# ============================================================
|
| 534 |
+
elif mode == "face_only_change":
|
| 535 |
+
print("-" * 60)
|
| 536 |
+
print("👤 SPEZIALMODUS: NUR GESICHT - ROBUSTER WORKFLOW")
|
| 537 |
+
print("-" * 60)
|
| 538 |
+
|
| 539 |
+
# ============================================================
|
| 540 |
+
# SCHRITT 1: Originalbild sichern
|
| 541 |
+
# ============================================================
|
| 542 |
+
original_image = image
|
| 543 |
+
print(f"💾 Originalbild gesichert: {original_image.size}")
|
| 544 |
+
|
| 545 |
+
# ============================================================
|
| 546 |
+
# SCHRITT 2: Crop = BBox × 2.5 (ERHÖHT für mehr Kontext)
|
| 547 |
+
# ============================================================
|
| 548 |
+
print("✂️ SCHRITT 2: ERSTELLE QUADRATISCHEN AUSSCHNITT (BBox × 2.5)")
|
| 549 |
+
|
| 550 |
+
# BBox-Zentrum berechnen
|
| 551 |
+
bbox_center_x = (x1 + x2) // 2
|
| 552 |
+
bbox_center_y = (y1 + y2) // 2
|
| 553 |
+
print(f" 📍 BBox-Zentrum: ({bbox_center_x}, {bbox_center_y})")
|
| 554 |
+
|
| 555 |
+
# Größte Dimension der BBox finden
|
| 556 |
+
bbox_width = x2 - x1
|
| 557 |
+
bbox_height = y2 - y1
|
| 558 |
+
bbox_max_dim = max(bbox_width, bbox_height)
|
| 559 |
+
print(f" 📏 BBox Dimensionen: {bbox_width} × {bbox_height} px")
|
| 560 |
+
print(f" 📐 Maximale BBox-Dimension: {bbox_max_dim} px")
|
| 561 |
+
|
| 562 |
+
# Crop-Größe berechnen (BBox × 2.5)
|
| 563 |
+
crop_size = int(bbox_max_dim * 2.5)
|
| 564 |
+
print(f" 🎯 Ziel-Crop-Größe: {crop_size} × {crop_size} px (BBox × 2.5)")
|
| 565 |
+
|
| 566 |
+
# Crop-Koordinaten berechnen (zentriert um BBox)
|
| 567 |
+
crop_x1 = bbox_center_x - crop_size // 2
|
| 568 |
+
crop_y1 = bbox_center_y - crop_size // 2
|
| 569 |
+
crop_x2 = crop_x1 + crop_size
|
| 570 |
+
crop_y2 = crop_y1 + crop_size
|
| 571 |
+
|
| 572 |
+
# Sicherstellen, dass Crop innerhalb der Bildgrenzen bleibt
|
| 573 |
+
crop_x1 = max(0, crop_x1)
|
| 574 |
+
crop_y1 = max(0, crop_y1)
|
| 575 |
+
crop_x2 = min(original_image.width, crop_x2)
|
| 576 |
+
crop_y2 = min(original_image.height, crop_y2)
|
| 577 |
+
|
| 578 |
+
# Falls Crop zu klein ist, anpassen
|
| 579 |
+
actual_crop_width = crop_x2 - crop_x1
|
| 580 |
+
actual_crop_height = crop_y2 - crop_y1
|
| 581 |
+
|
| 582 |
+
if actual_crop_width < crop_size or actual_crop_height < crop_size:
|
| 583 |
+
# An Kanten anpassen
|
| 584 |
+
if crop_x1 == 0:
|
| 585 |
+
crop_x2 = min(original_image.width, crop_size)
|
| 586 |
+
elif crop_x2 == original_image.width:
|
| 587 |
+
crop_x1 = max(0, original_image.width - crop_size)
|
| 588 |
+
|
| 589 |
+
if crop_y1 == 0:
|
| 590 |
+
crop_y2 = min(original_image.height, crop_size)
|
| 591 |
+
elif crop_y2 == original_image.height:
|
| 592 |
+
crop_y1 = max(0, original_image.height - crop_size)
|
| 593 |
+
|
| 594 |
+
print(f" 🔲 Crop-Bereich: [{crop_x1}, {crop_y1}, {crop_x2}, {crop_y2}]")
|
| 595 |
+
print(f" 📏 Tatsächliche Crop-Größe: {crop_x2-crop_x1} × {crop_y2-crop_y1} px")
|
| 596 |
+
|
| 597 |
+
# Bild ausschneiden- 2,5 mal so groß und quadratisch wie BBox
|
| 598 |
+
cropped_image = original_image.crop((crop_x1, crop_y1, crop_x2, crop_y2))
|
| 599 |
+
print(f" ✅ Quadratischer Ausschnitt erstellt: {cropped_image.size}")
|
| 600 |
+
|
| 601 |
+
# ============================================================
|
| 602 |
+
# SCHRITT 3: BBox-Koordinaten transformieren
|
| 603 |
+
# ============================================================
|
| 604 |
+
print("📐 SCHRITT 3: BBox-KOORDINATEN TRANSFORMIEREN")
|
| 605 |
+
rel_x1 = x1 - crop_x1
|
| 606 |
+
rel_y1 = y1 - crop_y1
|
| 607 |
+
rel_x2 = x2 - crop_x1
|
| 608 |
+
rel_y2 = y2 - crop_y1
|
| 609 |
+
|
| 610 |
+
# Sicherstellen, dass BBox innerhalb des Crops liegt
|
| 611 |
+
rel_x1 = max(0, rel_x1)
|
| 612 |
+
rel_y1 = max(0, rel_y1)
|
| 613 |
+
rel_x2 = min(cropped_image.width, rel_x2)
|
| 614 |
+
rel_y2 = min(cropped_image.height, rel_y2)
|
| 615 |
+
|
| 616 |
+
print(f" 🎯 Relative BBox im Crop: [{rel_x1}, {rel_y1}, {rel_x2}, {rel_y2}]")
|
| 617 |
+
print(f" 📏 Relative BBox Größe: {rel_x2-rel_x1} × {rel_y2-rel_y1} px")
|
| 618 |
+
|
| 619 |
+
# ============================================================
|
| 620 |
+
# SCHRITT 4: INTENSIVE BILDAUFBEREITUNG FÜR GESICHTSERKENNUNG
|
| 621 |
+
# ============================================================
|
| 622 |
+
print("🔍 SCHRITT 4: ERWEITERTE BILDAUFBEREITUNG FÜR GESICHTSERKENNUNG")
|
| 623 |
+
|
| 624 |
+
# 1. Kontrast verstärken
|
| 625 |
+
contrast_enhancer = ImageEnhance.Contrast(cropped_image)
|
| 626 |
+
enhanced_image = contrast_enhancer.enhance(1.8) # 80% mehr Kontrast
|
| 627 |
+
|
| 628 |
+
# 2. Schärfe erhöhen für bessere Kantenerkennung
|
| 629 |
+
sharpness_enhancer = ImageEnhance.Sharpness(enhanced_image)
|
| 630 |
+
enhanced_image = sharpness_enhancer.enhance(2.0) # 100% mehr Schärfe
|
| 631 |
+
|
| 632 |
+
# 3. Helligkeit anpassen
|
| 633 |
+
brightness_enhancer = ImageEnhance.Brightness(enhanced_image)
|
| 634 |
+
enhanced_image = brightness_enhancer.enhance(1.1) # 10% heller
|
| 635 |
+
|
| 636 |
+
print(f" ✅ Erweiterte Bildaufbereitung abgeschlossen")
|
| 637 |
+
print(f" • Kontrast: +80%")
|
| 638 |
+
print(f" • Schärfe: +100%")
|
| 639 |
+
print(f" • Helligkeit: +10%")
|
| 640 |
+
|
| 641 |
+
# Für SAM: Verwende aufbereiteten Ausschnitt
|
| 642 |
+
image = enhanced_image
|
| 643 |
+
x1, y1, x2, y2 = rel_x1, rel_y1, rel_x2, rel_y2
|
| 644 |
+
|
| 645 |
+
print(" 🔄 SAM wird auf aufbereitetem Ausschnitt ausgeführt")
|
| 646 |
+
print(f" 📊 SAM-Eingabegröße: {image.size}")
|
| 647 |
+
|
| 648 |
+
#======Bei allen Modi gleich=====
|
| 649 |
+
print("-" * 60)
|
| 650 |
+
print(f"📦 BOUNDING BOX DETAILS FÜR SAM:")
|
| 651 |
+
print(f" Bild-Größe für SAM: {image.size}")
|
| 652 |
+
print(f" BBox Koordinaten: [{x1}, {y1}, {x2}, {y2}]")
|
| 653 |
+
print(f" BBox Dimensionen: {x2-x1}px × {y2-y1}px")
|
| 654 |
+
|
| 655 |
+
# Vorbereitung für SAM2 - WICHTIG: NUR EINE BBOX
|
| 656 |
+
print("-" * 60)
|
| 657 |
+
print("🖼️ BILDAUFBEREITUNG FÜR SAM 2")
|
| 658 |
+
#SAM erwartet NumPy-Array, kein PIL
|
| 659 |
+
image_np = np.array(image.convert("RGB"))
|
| 660 |
+
|
| 661 |
+
# Immer nur eine BBox verwenden (SAM 2 erwartet genau 1)
|
| 662 |
+
input_boxes = [[[x1, y1, x2, y2]]]
|
| 663 |
+
|
| 664 |
+
# Punkt in der BBox-Mitte (zur Ünterstützung von SAM damit BBox nicht zu dicht um Kopf gezogen werden muß!)
|
| 665 |
+
center_x = (x1 + x2) // 2
|
| 666 |
+
center_y = (y1 + y2) // 2
|
| 667 |
+
|
| 668 |
+
# Punkt im Gesicht (30% höher vom Mittelpunkt)(auch für größere BBox)
|
| 669 |
+
bbox_height = y2 - y1
|
| 670 |
+
face_offset = int(bbox_height * 0.3)
|
| 671 |
+
face_x = center_x
|
| 672 |
+
face_y = center_y - face_offset
|
| 673 |
+
face_y = max(y1 + 10, min(face_y, y2 - 10)) # In BBox halten
|
| 674 |
+
|
| 675 |
+
# BEIDE Punkte kombinieren
|
| 676 |
+
input_points = [[[[center_x, center_y], [face_x, face_y]]]] # ZWEI Punkte
|
| 677 |
+
input_labels = [[[1, 1]]] # Beide sind positive Prompts
|
| 678 |
+
|
| 679 |
+
print(f" 🎯 SAM-Prompt: BBox [{x1},{y1},{x2},{y2}]")
|
| 680 |
+
print(f" 👁️ Punkte: Mitte ({center_x},{center_y}), Gesicht ({face_x},{face_y})")
|
| 681 |
+
|
| 682 |
+
|
| 683 |
+
# Aufruf des SAM-Prozessors mit den Variablen. Der Processor verpackt diese Rohdaten
|
| 684 |
+
# in die für das SAM-Modell erforderlichen Tensoren und speichert sie in inputs.
|
| 685 |
+
inputs = self.sam_processor(
|
| 686 |
+
image_np,
|
| 687 |
+
input_boxes=input_boxes,
|
| 688 |
+
input_points=input_points, # ZWEI Punkte
|
| 689 |
+
input_labels=input_labels, # Zwei Labels
|
| 690 |
+
return_tensors="pt"
|
| 691 |
+
).to(self.device) # Ohne .to(self.device) werden die Tensoren standardmäßig im CPU-RAM erzeugt und gespeichert! Da GPU-Fehler!
|
| 692 |
+
|
| 693 |
+
print(f"✅ Processor-Ausgabe: Dictionary mit {len(inputs)} Schlüsseln: {list(inputs.keys())}")
|
| 694 |
+
print(f" - 'pixel_values' Shape: {inputs['pixel_values'].shape}")
|
| 695 |
+
print(f" - 'input_boxes' Shape: {inputs['input_boxes'].shape}")
|
| 696 |
+
if 'input_points' in inputs:
|
| 697 |
+
print(f" - 'input_points' Shape: {inputs['input_points'].shape}")
|
| 698 |
+
|
| 699 |
+
|
| 700 |
+
# 4. SAM2 Vorhersage
|
| 701 |
+
print("-" * 60)
|
| 702 |
+
print("🧠 SAM 2 INFERENZ (Vorhersage)")
|
| 703 |
+
with torch.no_grad():
|
| 704 |
+
print(" Führe Vorhersage durch...")
|
| 705 |
+
outputs = self.sam_model(**inputs)
|
| 706 |
+
print(f"✅ Vorhersage abgeschlossen")
|
| 707 |
+
print(f" Anzahl der Vorhersagemasken: {outputs.pred_masks.shape[2]}")
|
| 708 |
+
|
| 709 |
+
# 5. Maske extrahieren
|
| 710 |
+
print("📏 SCHRITT 6: MASKE EXTRAHIEREN")
|
| 711 |
+
|
| 712 |
+
num_masks = outputs.pred_masks.shape[2]
|
| 713 |
+
print(f" SAM lieferte {num_masks} verschiedene Masken")
|
| 714 |
+
|
| 715 |
+
# Extrahiere alle Masken
|
| 716 |
+
all_masks = []
|
| 717 |
+
|
| 718 |
+
for i in range(num_masks):
|
| 719 |
+
single_mask = outputs.pred_masks[:, :, i, :, :]
|
| 720 |
+
resized_mask = F.interpolate(
|
| 721 |
+
single_mask,
|
| 722 |
+
size=(image.height, image.width),
|
| 723 |
+
mode='bilinear',
|
| 724 |
+
align_corners=False
|
| 725 |
+
).squeeze()
|
| 726 |
+
|
| 727 |
+
mask_np = resized_mask.sigmoid().cpu().numpy()
|
| 728 |
+
all_masks.append(mask_np)
|
| 729 |
+
|
| 730 |
+
# Basis-Statistiken für jede Maske
|
| 731 |
+
mask_binary = (mask_np > 0.5).astype(np.uint8)
|
| 732 |
+
mask_area = np.sum(mask_binary)
|
| 733 |
+
print(f" Maske {i+1}: Größe={mask_area:,} Pixel, Max-Konfidenz={mask_np.max():.3f}")
|
| 734 |
+
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
|
| 749 |
+
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
# ============================================================
|
| 755 |
+
# SPEZIALHEURISTIK FÜR GESICHTSMODUS (später im Code)
|
| 756 |
+
# ============================================================
|
| 757 |
+
if mode == "face_only_change":
|
| 758 |
+
print(f" 🔍 Analysiere Maske {i+1} mit GESICHTS-HEURISTIK")
|
| 759 |
+
|
| 760 |
+
# 1. FLÄCHENBASIERTE BEWERTUNG (40%)
|
| 761 |
+
area_ratio = mask_area_pixels / bbox_area
|
| 762 |
+
print(f" 📐 Flächen-Ratio: {area_ratio:.3f} ({mask_area_pixels:,} / {bbox_area:,} Pixel)")
|
| 763 |
+
|
| 764 |
+
# Optimale Kopfgröße: 80-120% der BBox
|
| 765 |
+
if area_ratio < 0.6:
|
| 766 |
+
print(f" ⚠️ Fläche zu klein für Kopf (<60% der BBox)")
|
| 767 |
+
area_score = area_ratio * 0.5 # Stark bestrafen
|
| 768 |
+
elif area_ratio > 1.5:
|
| 769 |
+
print(f" ⚠️ Fläche zu groß für Kopf (>150% der BBox)")
|
| 770 |
+
area_score = 2.0 - area_ratio # Linear bestrafen
|
| 771 |
+
elif 0.8 <= area_ratio <= 1.2:
|
| 772 |
+
area_score = 1.0 # Perfekte Größe
|
| 773 |
+
print(f" ✅ Perfekte Kopfgröße (80-120% der BBox)")
|
| 774 |
+
else:
|
| 775 |
+
# Sanfte Abweichung
|
| 776 |
+
area_score = 1.0 - abs(area_ratio - 1.0) * 0.5
|
| 777 |
+
|
| 778 |
+
# 2. KOMPAKTHEIT/SOLIDITÄT (30%)
|
| 779 |
+
labeled_mask = measure.label(mask_binary)
|
| 780 |
+
regions = measure.regionprops(labeled_mask)
|
| 781 |
+
|
| 782 |
+
if len(regions) == 0:
|
| 783 |
+
compactness_score = 0.1
|
| 784 |
+
print(f" ❌ Keine zusammenhängenden Regionen gefunden")
|
| 785 |
+
else:
|
| 786 |
+
# Größte Region finden (sollte der Kopf sein)
|
| 787 |
+
largest_region = max(regions, key=lambda r: r.area)
|
| 788 |
+
|
| 789 |
+
# Solidität = Fläche / konvexe Hüllenfläche
|
| 790 |
+
solidity = largest_region.solidity if hasattr(largest_region, 'solidity') else 0.7
|
| 791 |
+
|
| 792 |
+
# Exzentrizität (wie elliptisch) - Köpfe sind tendenziell elliptisch
|
| 793 |
+
eccentricity = largest_region.eccentricity if hasattr(largest_region, 'eccentricity') else 0.5
|
| 794 |
+
|
| 795 |
+
# Perfekt runde Formen (Kreis) sind 0, Linie wäre 1
|
| 796 |
+
# Köpfe haben typischerweise 0.5-0.8
|
| 797 |
+
if 0.4 <= eccentricity <= 0.9:
|
| 798 |
+
eccentricity_score = 1.0 - abs(eccentricity - 0.65) * 2
|
| 799 |
+
else:
|
| 800 |
+
eccentricity_score = 0.2
|
| 801 |
+
|
| 802 |
+
compactness_score = (solidity * 0.6 + eccentricity_score * 0.4)
|
| 803 |
+
print(f" 🎯 Kompaktheits-Analyse:")
|
| 804 |
+
print(f" • Solidität (Fläche/Konvex): {solidity:.3f}")
|
| 805 |
+
print(f" • Exzentrizität (Form): {eccentricity:.3f}")
|
| 806 |
+
print(f" • Kompaktheits-Score: {compactness_score:.3f}")
|
| 807 |
+
|
| 808 |
+
# 3. BBOX-ÜBERLAPPUNG (20%)
|
| 809 |
+
bbox_mask = np.zeros((image.height, image.width), dtype=np.uint8)
|
| 810 |
+
bbox_mask[y1:y2, x1:x2] = 1
|
| 811 |
+
overlap = np.sum(mask_binary & bbox_mask)
|
| 812 |
+
bbox_overlap_ratio = overlap / mask_area_pixels if mask_area_pixels > 0 else 0
|
| 813 |
+
|
| 814 |
+
# Für Kopf: Sollte großteils in BBox sein (mind. 70%)
|
| 815 |
+
if bbox_overlap_ratio >= 0.7:
|
| 816 |
+
bbox_score = 1.0
|
| 817 |
+
print(f" ✅ Hohe BBox-Überlappung: {bbox_overlap_ratio:.3f} ({overlap:,} Pixel)")
|
| 818 |
+
elif bbox_overlap_ratio >= 0.5:
|
| 819 |
+
bbox_score = bbox_overlap_ratio * 1.2
|
| 820 |
+
print(f" ⚠️ Mittlere BBox-Überlappung: {bbox_overlap_ratio:.3f}")
|
| 821 |
+
else:
|
| 822 |
+
bbox_score = bbox_overlap_ratio * 0.8
|
| 823 |
+
print(f" ❌ Geringe BBox-Überlappung: {bbox_overlap_ratio:.3f}")
|
| 824 |
+
|
| 825 |
+
# 4. SAM-KONFIDENZ (10%)
|
| 826 |
+
confidence_score = mask_max
|
| 827 |
+
|
| 828 |
+
# GESAMTSCORE für Gesicht
|
| 829 |
+
score = (
|
| 830 |
+
area_score * 0.4 + # 40% Flächenpassung
|
| 831 |
+
compactness_score * 0.3 + # 30% Kompaktheit
|
| 832 |
+
bbox_score * 0.2 + # 20% BBox-Überlappung
|
| 833 |
+
confidence_score * 0.1 # 10% Konfidenz
|
| 834 |
+
)
|
| 835 |
+
|
| 836 |
+
print(f" 📊 GESICHTS-SCORES für Maske {i+1}:")
|
| 837 |
+
print(f" • Flächen-Score: {area_score:.3f}")
|
| 838 |
+
print(f" • Kompaktheits-Score: {compactness_score:.3f}")
|
| 839 |
+
print(f" • BBox-Überlappungs-Score: {bbox_score:.3f}")
|
| 840 |
+
print(f" • Konfidenz-Score: {confidence_score:.3f}")
|
| 841 |
+
print(f" • GESAMTSCORE: {score:.3f}")
|
| 842 |
+
|
| 843 |
+
# ============================================================
|
| 844 |
+
# THRESHOLD-BESTIMMUNG FÜR GESICHTSMODUS
|
| 845 |
+
# ============================================================
|
| 846 |
+
if mode == "face_only_change":
|
| 847 |
+
# Spezieller Threshold für Gesichter
|
| 848 |
+
if max_val < 0.5:
|
| 849 |
+
dynamic_threshold = 0.25
|
| 850 |
+
print(f" ⚠️ SAM ist unsicher für Gesicht (max_val={max_val:.3f} < 0.5)")
|
| 851 |
+
elif max_val < 0.8:
|
| 852 |
+
dynamic_threshold = max_val * 0.65 # Mittlerer Threshold
|
| 853 |
+
print(f" ℹ️ SAM ist mäßig sicher für Gesicht (max_val={max_val:.3f})")
|
| 854 |
+
else:
|
| 855 |
+
dynamic_threshold = max_val * 0.75 # Hoher Threshold
|
| 856 |
+
print(f" ✅ SAM ist sicher für Gesicht (max_val={max_val:.3f} >= 0.8)")
|
| 857 |
+
|
| 858 |
+
print(f" 🎯 Gesichts-Threshold: {dynamic_threshold:.3f}")
|
| 859 |
+
|
| 860 |
+
# ============================================================
|
| 861 |
+
# POSTPROCESSING FÜR GESICHTSMODUS
|
| 862 |
+
# ============================================================
|
| 863 |
+
if mode == "face_only_change":
|
| 864 |
+
print("👤 GESICHTS-SPEZIFISCHES POSTPROCESSING")
|
| 865 |
+
|
| 866 |
+
# 1. Größte zusammenhängende Komponente finden
|
| 867 |
+
labeled_array, num_features = ndimage.label(mask_array)
|
| 868 |
+
|
| 869 |
+
if num_features > 0:
|
| 870 |
+
print(f" 🔍 Gefundene Komponenten: {num_features}")
|
| 871 |
+
|
| 872 |
+
sizes = ndimage.sum(mask_array, labeled_array, range(1, num_features + 1))
|
| 873 |
+
largest_component_idx = np.argmax(sizes) + 1
|
| 874 |
+
|
| 875 |
+
print(f" 👑 Größte Komponente: Nr. {largest_component_idx} mit {sizes[largest_component_idx-1]:,} Pixel")
|
| 876 |
+
|
| 877 |
+
# NUR die größte Komponente behalten (der Kopf)
|
| 878 |
+
mask_array = np.where(labeled_array == largest_component_idx, mask_array, 0)
|
| 879 |
+
|
| 880 |
+
# 2. MORPHOLOGISCHE OPERATIONEN FÜR SAUBEREN KOPF
|
| 881 |
+
print(" ⚙️ Morphologische Operationen für sauberen Kopf")
|
| 882 |
+
|
| 883 |
+
# Zuerst CLOSE, um kleine Löcher im Kopf zu füllen
|
| 884 |
+
kernel_close = np.ones((7, 7), np.uint8)
|
| 885 |
+
mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_CLOSE, kernel_close, iterations=1)
|
| 886 |
+
print(" • MORPH_CLOSE (7x7) - Löcher im Kopf füllen")
|
| 887 |
+
|
| 888 |
+
# Dann OPEN, um kleine Ausreißer zu entfernen
|
| 889 |
+
kernel_open = np.ones((5, 5), np.uint8)
|
| 890 |
+
mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel_open, iterations=1)
|
| 891 |
+
print(" • MORPH_OPEN (5x5) - Rauschen entfernen")
|
| 892 |
+
|
| 893 |
+
# ============================================================
|
| 894 |
+
# KRITISCH: MASKE IMMER ZURÜCK AUF ORIGINALGRÖSSE (auch bei Fallback!)
|
| 895 |
+
# ============================================================
|
| 896 |
+
print("-" * 60)
|
| 897 |
+
print("🔄 MASKE IMMER ZURÜCK AUF ORIGINALGRÖSSE TRANSFORMIEREN")
|
| 898 |
+
|
| 899 |
+
# WICHTIG: Immer die richtigen Crop-Koordinaten verwenden
|
| 900 |
+
temp_mask = Image.fromarray(mask_array).convert("L")
|
| 901 |
+
print(f" Maskengröße auf Ausschnitt: {temp_mask.size}")
|
| 902 |
+
|
| 903 |
+
# Maske auf ORIGINALBILDGRÖSSE bringen
|
| 904 |
+
final_mask = Image.new("L", original_image.size, 0)
|
| 905 |
+
print(f" Leere Maske in Originalgröße: {final_mask.size}")
|
| 906 |
+
|
| 907 |
+
# Immer die gespeicherten Crop-Koordinaten verwenden
|
| 908 |
+
if crop_x1 is not None and crop_y1 is not None:
|
| 909 |
+
final_mask.paste(temp_mask, (crop_x1, crop_y1))
|
| 910 |
+
print(f" Maskenposition im Original: ({crop_x1}, {crop_y1})")
|
| 911 |
+
else:
|
| 912 |
+
# Fallback: Zentrieren
|
| 913 |
+
x_offset = (original_image.width - temp_mask.width) // 2
|
| 914 |
+
y_offset = (original_image.height - temp_mask.height) // 2
|
| 915 |
+
final_mask.paste(temp_mask, (x_offset, y_offset))
|
| 916 |
+
print(f" ⚠️ Keine Crop-Koordinaten, zentriert: ({x_offset}, {y_offset})")
|
| 917 |
+
|
| 918 |
+
mask_array = np.array(final_mask)
|
| 919 |
+
print(f" ✅ Maske zurück auf Originalgröße skaliert: {mask_array.shape}")
|
| 920 |
+
|
| 921 |
+
# Bild-Referenz zurücksetzen
|
| 922 |
+
image = original_image
|
| 923 |
+
print(f" 🔄 Bild-Referenz wieder auf Original gesetzt: {image.size}")
|
| 924 |
+
|
| 925 |
+
# ============================================================
|
| 926 |
+
# ABSCHLIESSENDE STATISTIK FÜR GESICHTSMODUS
|
| 927 |
+
# ============================================================
|
| 928 |
+
if mode == "face_only_change":
|
| 929 |
+
original_face_area = original_bbox_size[0] * original_bbox_size[1]
|
| 930 |
+
coverage_ratio = white_pixels / original_face_area if original_face_area > 0 else 0
|
| 931 |
+
print(f" 👤 GESICHTSABDECKUNG: {coverage_ratio:.1%} der ursprünglichen BBox")
|
| 932 |
+
|
| 933 |
+
# Warnungen basierend auf Abdeckung
|
| 934 |
+
if coverage_ratio < 0.7:
|
| 935 |
+
print(f" ⚠️ WARNUNG: Geringe Gesichtsabdeckung ({coverage_ratio:.1%})")
|
| 936 |
+
elif coverage_ratio > 1.3:
|
| 937 |
+
print(f" ⚠️ WARNUNG: Sehr hohe Gesichtsabdeckung ({coverage_ratio:.1%})")
|
| 938 |
+
elif 0.8 <= coverage_ratio <= 1.2:
|
| 939 |
+
print(f" ✅ OPTIMALE Gesichtsabdeckung ({coverage_ratio:.1%})")
|
| 940 |
+
|
| 941 |
+
# ============================================================
|
| 942 |
+
# FINALE AUSGABE FÜR GESICHTSMODUS
|
| 943 |
+
# ============================================================
|
| 944 |
+
if mode == "face_only_change" and crop_size is not None:
|
| 945 |
+
print(f"👤 Bei face_only_change: Crop={crop_size}×{crop_size}px, Heuristik-Score={best_score:.3f}")
|
| 946 |
+
print(f"👤 Kopfabdeckung: {coverage_ratio:.1%} der BBox")
|
| 947 |
+
|
| 948 |
+
|
| 949 |
+
|
| 950 |
+
|
| 951 |
+
|
| 952 |
+
|
| 953 |
elif mode == "face_only_change":
|
| 954 |
print("-" * 60)
|
| 955 |
print("👤 MODUS: FACE_ONLY_CHANGE")
|
|
|
|
| 991 |
fallback_mask = fallback_mask.resize(original_image.size, Image.Resampling.NEAREST)
|
| 992 |
|
| 993 |
return fallback_mask, fallback_mask
|
| 994 |
+
|
| 995 |
|