Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Dec 5, 2025

Commit

c37ab74

verified ·

1 Parent(s): 0ccfb8b

Update controlnet_facefix.py

Browse files

Files changed (1) hide show

controlnet_facefix.py +161 -55

controlnet_facefix.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# controlnet_facefix.py - OPTIMIERT FÜR MINIMALE GESICHTSVERBESSERUNG
 import torch
 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
 from PIL import Image
@@ -7,24 +7,23 @@ import cv2
 import numpy as np
 print("="*60)
-print("FACE-FIX: MINIMALE GESICHTSVERBESSERUNG")
 print("="*60)
-# WICHTIG: Dieselben Modelle wie in controlnet_module.py!
 _components_loaded = False
 _controlnet_depth = None
 _controlnet_pose = None
 _pipeline = None
 def _initialize_components():
-    """Lade ControlNets genau wie in controlnet_module.py"""
     global _components_loaded, _controlnet_depth, _controlnet_pose
     if _components_loaded:
         return True
     try:
-        print("1. Lade ControlNet Depth...")
         _controlnet_depth = ControlNetModel.from_pretrained(
             "lllyasviel/sd-controlnet-depth",
             torch_dtype=torch.float16
@@ -35,7 +34,7 @@ def _initialize_components():
         return False
     try:
-        print("2. Lade ControlNet OpenPose...")
         _controlnet_pose = ControlNetModel.from_pretrained(
             "lllyasviel/sd-controlnet-openpose",
             torch_dtype=torch.float16
@@ -46,70 +45,133 @@ def _initialize_components():
         return False
     _components_loaded = True
-    print("✅ ALLE KOMPONENTEN GELADEN")
     return True
 def _extract_depth_map(image):
-    """Depth Map wie in controlnet_module.py"""
     try:
         img_array = np.array(image.convert("RGB"))
         gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
-        depth_map = cv2.GaussianBlur(gray, (5, 5), 0)
-        depth_rgb = cv2.cvtColor(depth_map, cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_rgb)
     except Exception as e:
         print(f"Depth Map Fehler: {e}")
-        return image.convert("RGB")
-def _extract_pose_simple(image):
-    """Einfache Pose-Extraktion"""
     try:
         img_array = np.array(image.convert("RGB"))
-        edges = cv2.Canny(img_array, 100, 200)
-        pose_image = Image.fromarray(edges).convert("RGB")
-        return pose_image
     except Exception as e:
-        print(f"Pose Extraction Fehler: {e}")
-        return image.convert("RGB")
 def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: int, model_id: str):
-    """MINIMALE Verbesserung: Fokussiert auf Gesichter, Hintergrund fast unverändert"""
-    print("\n" + "🎭"*50)
-    print("FACE-FIX: MINIMALE VERBESSERUNG (Fokus: Gesichter)")
-    print(f"  Model: {model_id}")
-    print(f"  Original Seed: {seed}")
-    print("🎭"*50)
     start_time = time.time()
     # 1. Komponenten initialisieren
     if not _initialize_components():
-        print("❌ Komponenten konnten nicht geladen werden")
         return image
-    # 2. Control Images erstellen
-    print("🎭 Erstelle Control Images...")
-    depth_img = _extract_depth_map(image).resize((512, 512))
-    pose_img = _extract_pose_simple(image).resize((512, 512))
     # 3. Pipeline erstellen
     global _pipeline
     if _pipeline is None:
         try:
-            print("🔄 Lade Face-Fix Pipeline...")
             _pipeline = StableDiffusionControlNetPipeline.from_pretrained(
                 model_id,
-                controlnet=[_controlnet_pose, _controlnet_depth],
                 torch_dtype=torch.float16,
                 safety_checker=None,
                 requires_safety_checker=False,
             )
-            # Optimierungen für HF Spaces
             _pipeline.enable_attention_slicing()
             _pipeline.enable_vae_slicing()
-            print("✅ Pipeline geladen")
         except Exception as e:
             print(f"❌ Pipeline Fehler: {e}")
             return image
@@ -120,43 +182,87 @@ def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: i
         print(f"  Device: {device}")
         pipeline = _pipeline.to(device)
-        # 5. EXTREM NIEDRIGE PARAMETER für minimale Änderung
-        #    Nur Qualitäts-Prompt, kein Inhalts-Prompt
-        face_quality_prompt = "perfect face, detailed skin, sharp eyes"
-        # 6. OPTIMIERTE INFERENCE mit minimaler Stärke
         result = pipeline(
-            prompt=face_quality_prompt,  # KURZ und fokussiert
-            negative_prompt="deformed, blurry, low quality",
-            image=[pose_img, depth_img],
-            controlnet_conditioning_scale=[0.7, 0.4],  # EXTREM NIEDRIG: OpenPose 0.35, Depth 0.12
-            num_inference_steps=20,                       # SEHR WENIG Steps
-            guidance_scale=7.0,                          # NIEDRIGER CFG
-            generator=torch.Generator(device).manual_seed(seed),  # Gleicher Seed
             height=512,
             width=512,
         ).images[0]
-        # Zurück auf Originalgröße
-        if image.size != (512, 512):
-            result = result.resize(image.size)
         duration = time.time() - start_time
-        print(f"\n✅✅✅ MINIMALE GESICHTSVERBESSERUNG in {duration:.1f}s ✅✅✅")
-        print(f"   - OpenPose: 0.35 (sehr subtil)")
-        print(f"   - Depth: 0.12 (kaum Hintergrund-Einfluss)")
-        print(f"   - Steps: 6 (minimal)")
-        print(f"   - CFG: 3.0 (niedrig)")
-        print(f"   - Gleicher Seed: {seed}")
         return result
     except Exception as e:
-        print(f"\n❌❌❌ FEHLER: {e} ❌❌❌")
         import traceback
         traceback.print_exc()
         return image
 print("="*60)
-print("FACE-FIX MODUL FERTIG (MINIMALE GESICHTSVERBESSERUNG)")
 print("="*60)

+# controlnet_facefix.py - NUR QUALITÄTSVERBESSERUNG MIT OPENPOSE + DEPTH
 import torch
 from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
 from PIL import Image
 import numpy as np
 print("="*60)
+print("FACE-FIX: QUALITÄTSVERBESSERUNG MIT OPENPOSE + DEPTH")
 print("="*60)
 _components_loaded = False
 _controlnet_depth = None
 _controlnet_pose = None
 _pipeline = None
 def _initialize_components():
+    """Lade OpenPose und Depth ControlNets"""
     global _components_loaded, _controlnet_depth, _controlnet_pose
     if _components_loaded:
         return True
     try:
+        print("1. Lade ControlNet Depth (für 3D-Struktur)...")
         _controlnet_depth = ControlNetModel.from_pretrained(
             "lllyasviel/sd-controlnet-depth",
             torch_dtype=torch.float16
         return False
     try:
+        print("2. Lade ControlNet OpenPose (für Pose-Erhaltung)...")
         _controlnet_pose = ControlNetModel.from_pretrained(
             "lllyasviel/sd-controlnet-openpose",
             torch_dtype=torch.float16
         return False
     _components_loaded = True
+    print("✅ OPENPOSE + DEPTH GELADEN")
     return True
 def _extract_depth_map(image):
+    """Depth Map für maximale Strukturerhaltung"""
     try:
         img_array = np.array(image.convert("RGB"))
+        # Konvertiere zu Graustufen
         gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+        # Wende Gaußschen Blur an für weichere Depth Map
+        blurred = cv2.GaussianBlur(gray, (7, 7), 0)
+        # Adaptive Histogram Equalization für bessere Tiefenwahrnehmung
+        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        enhanced = clahe.apply(blurred)
+        # Invertiere für bessere Depth-Darstellung (helle = nah, dunkel = fern)
+        inverted = 255 - enhanced
+        # Normalisiere
+        depth_normalized = cv2.normalize(inverted, None, 0, 255, cv2.NORM_MINMAX)
+        # Zu RGB konvertieren
+        depth_rgb = cv2.cvtColor(depth_normalized.astype(np.uint8), cv2.COLOR_GRAY2RGB)
         return Image.fromarray(depth_rgb)
     except Exception as e:
         print(f"Depth Map Fehler: {e}")
+        # Fallback: einfache Graustufen
+        return image.convert("L").convert("RGB")
+def _extract_pose_map(image):
+    """Pose Map mit Fokus auf Gesichtskonturen"""
     try:
         img_array = np.array(image.convert("RGB"))
+        # Mehrere Canny-Ebenen für verschiedene Detailstufen
+        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+        # 1. Feine Gesichtsdetails (niedriger Threshold)
+        fine_details = cv2.Canny(gray, 20, 60)
+        # 2. Mittlere Konturen
+        medium_contours = cv2.Canny(gray, 40, 100)
+        # 3. Starke Kanten
+        strong_edges = cv2.Canny(gray, 80, 160)
+        # Kombiniere mit Gewichtung (feine Details stärker gewichtet)
+        combined = cv2.addWeighted(fine_details, 0.6, medium_contours, 0.3, 0)
+        combined = cv2.addWeighted(combined, 0.8, strong_edges, 0.2, 0)
+        # Minimal dilation für Kontinuität
+        kernel = np.ones((1, 1), np.uint8)
+        pose_edges = cv2.dilate(combined, kernel, iterations=1)
+        # Konvertiere zu RGB
+        pose_rgb = cv2.cvtColor(pose_edges, cv2.COLOR_GRAY2RGB)
+        return Image.fromarray(pose_rgb)
     except Exception as e:
+        print(f"Pose Map Fehler: {e}")
+        # Fallback
+        edges = cv2.Canny(np.array(image.convert("RGB")), 50, 150)
+        return Image.fromarray(edges).convert("RGB")
 def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: int, model_id: str):
+    """
+    QUALITÄTSVERBESSERUNG MIT MAXIMALER INHALTSERHALTUNG
+    Verwendet:
+    1. OpenPose: Behält exakte Pose und Gesichtsstruktur
+    2. Depth: Behält 3D-Struktur und räumliche Anordnung
+    Strategie: MAXIMALE ControlNet-Stärke + Qualitäts-prompts
+    """
+    print("\n" + "🔧"*50)
+    print("FACE-FIX: QUALITÄTSVERBESSERUNG MIT OPENPOSE+DEPTH")
+    print(f"  Original: {image.size}")
+    print(f"  Seed: {seed}")
+    print("🔧"*50)
     start_time = time.time()
     # 1. Komponenten initialisieren
     if not _initialize_components():
+        print("❌ OpenPose/Depth konnten nicht geladen werden")
         return image
+    # 2. Control Maps erstellen
+    print("\n📐 Erstelle Control Maps...")
+    original_size = image.size
+    # Standardgröße für ControlNet
+    control_size = (512, 512)
+    resized_image = image.resize(control_size, Image.Resampling.LANCZOS)
+    # Depth Map (für 3D-Struktur)
+    depth_img = _extract_depth_map(resized_image)
+    # Pose Map (für Gesichts- und Körperstruktur)
+    pose_img = _extract_pose_map(resized_image)
+    # Optional: Debug speichern
+    depth_img.save("debug_depth_enhanced.png")
+    pose_img.save("debug_pose_enhanced.png")
     # 3. Pipeline erstellen
     global _pipeline
     if _pipeline is None:
         try:
+            print("🔄 Lade Pipeline mit OpenPose + Depth...")
             _pipeline = StableDiffusionControlNetPipeline.from_pretrained(
                 model_id,
+                controlnet=[_controlnet_pose, _controlnet_depth],  # OpenPose zuerst, dann Depth
                 torch_dtype=torch.float16,
                 safety_checker=None,
                 requires_safety_checker=False,
             )
+            # Optimierungen
             _pipeline.enable_attention_slicing()
             _pipeline.enable_vae_slicing()
+            print("✅ Pipeline mit OpenPose+Depth geladen")
         except Exception as e:
             print(f"❌ Pipeline Fehler: {e}")
             return image
         print(f"  Device: {device}")
         pipeline = _pipeline.to(device)
+        # 5. PROMPT-STRATEGIE FÜR QUALITÄTSVERBESSERUNG:
+        # Original-Prompt + Qualitäts-Keywords, ABER KEINE neuen Inhalte
+        # Basierend auf originalem Prompt, aber fokus auf Qualität
+        if "face" in prompt.lower() or "portrait" in prompt.lower():
+            quality_prompt = f"{prompt}, professional portrait, sharp focus, detailed skin, perfect face, clear eyes, high resolution, 8k"
+        else:
+            quality_prompt = f"{prompt}, high quality, sharp focus, detailed, professional photography, no artifacts"
+        # Negative Prompts für Qualitätsverbesserung
+        quality_negative = (
+            f"{negative_prompt}, "
+            "blurry, out of focus, lowres, low quality, jpeg artifacts, "
+            "compression artifacts, pixelated, grainy, noisy, "
+            "deformed, distorted, bad anatomy, mutation, ugly"
+        )
+        # 6. KRITISCHE PARAMETER FÜR INHALTSERHALTUNG:
+        # Hohe ControlNet-Stärken für maximale Kontrolle
+        # OpenPose: Hoch für Pose-Erhaltung
+        # Depth: Hoch für Strukturerhaltung
+        print("\n⚙️  Starte Qualitätsverbesserung mit Parametern:")
+        print(f"   • OpenPose Strength: 0.95 (sehr hoch für Pose-Erhaltung)")
+        print(f"   • Depth Strength: 0.85 (hoch für 3D-Struktur)")
+        print(f"   • Steps: 25")
+        print(f"   • CFG: 5.0 (niedrig für weniger 'Kreativität')")
         result = pipeline(
+            prompt=quality_prompt,
+            negative_prompt=quality_negative,
+            image=[pose_img, depth_img],  # OpenPose zuerst, dann Depth
+            controlnet_conditioning_scale=[0.95, 0.85],  # SEHR HOHE WERTE
+            num_inference_steps=25,                      # Ausreichend für Qualität
+            guidance_scale=5.0,                         # NIEDRIG für minimale Änderung
+            generator=torch.Generator(device).manual_seed(seed),  # GLEICHER SEED
             height=512,
             width=512,
         ).images[0]
+        # 7. Zurück auf Originalgröße
+        if original_size != (512, 512):
+            result = result.resize(original_size, Image.Resampling.LANCZOS)
         duration = time.time() - start_time
+        print(f"\n" + "✅"*50)
+        print("✅ QUALITÄTSVERBESSERUNG ABGESCHLOSSEN")
+        print(f"✅ Dauer: {duration:.1f}s")
+        print(f"✅ Parameter: OpenPose=0.95, Depth=0.85")
+        print(f"✅ Gleicher Seed: {seed}")
+        print(f"✅ Größe: {original_size} → {result.size}")
+        print("✅"*50)
+        # Optional: Vergleich erstellen
+        try:
+            comparison = Image.new('RGB', (original_size[0] * 2, original_size[1]))
+            comparison.paste(image, (0, 0))
+            comparison.paste(result, (original_size[0], 0))
+            # Füge Beschriftung hinzu
+            from PIL import ImageDraw, ImageFont
+            draw = ImageDraw.Draw(comparison)
+            # Einfache Beschriftung
+            draw.text((10, 10), "Vorher", fill=(255, 255, 255))
+            draw.text((original_size[0] + 10, 10), "Nachher", fill=(255, 255, 255))
+            comparison.save("quality_improvement_comparison.png")
+            print(f"📊 Vergleich gespeichert: quality_improvement_comparison.png")
+        except Exception as e:
+            print(f"⚠️  Konnte Vergleich nicht speichern: {e}")
         return result
     except Exception as e:
+        print(f"\n❌ FEHLER: {e}")
         import traceback
         traceback.print_exc()
         return image
 print("="*60)
+print("FACE-FIX BEREIT (OpenPose + Depth)")
 print("="*60)