Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Dec 14, 2025

Commit

47f75cd

verified ·

1 Parent(s): 9b14886

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -125

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 from diffusers import StableDiffusionInpaintPipeline, AutoencoderKL
 from diffusers import DPMSolverMultistepScheduler, PNDMScheduler
 from controlnet_module import controlnet_processor
 import torch
 from PIL import Image, ImageDraw
 import time
@@ -462,31 +463,24 @@ def load_txt2img(model_id):
             print(f"❌ Auch Fallback fehlgeschlagen: {fallback_error}")
             raise
 def load_img2img():
-    """Lädt das Inpainting-Modell mit DPMSolver++ Scheduler"""
     global pipe_img2img
     if pipe_img2img is None:
-        print("🔄 Lade Inpainting-Modell...")
-        try:
-            pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained(
-                "runwayml/stable-diffusion-inpainting",
-                torch_dtype=torch_dtype,
-                allow_pickle=False,
-                safety_checker=None,
-            ).to(device)
-            pipe_img2img.scheduler = DPMSolverMultistepScheduler.from_config(
-                pipe_img2img.scheduler.config,
-                algorithm_type="sde-dpmsolver++",
-                use_karras_sigmas=True,
-                timestep_spacing="trailing"
-            )
-            print("✅ DPMSolver++ Multistep Scheduler für Inpainting konfiguriert")
-        except Exception as e:
-            print(f"❌ Fehler beim Laden des Inpainting-Modells: {e}")
-            raise
         pipe_img2img.enable_attention_slicing()
         pipe_img2img.enable_vae_tiling()
@@ -774,12 +768,7 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
                  mode, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
                  progress=gr.Progress()):
     """
-    KORRIGIERTE HAUPTFUNKTION FÜR BILD-zu-BILD MIT RICHTIGEM COMPOSITING
-    WICHTIG: Verwendet den korrekten Compositing-Workflow:
-    1. Skaliert Bild und Maske gemeinsam
-    2. Führt Inpainting auf 512×512 durch
-    3. Kompositiert nur den bearbeiteten Bereich zurück ins Originalbild
     """
     try:
         if image is None:
@@ -854,11 +843,8 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         print(f"🎯 Finaler Prompt für {mode}: {enhanced_prompt}")
-        #Zur Überbrückung bis von der Pipelines Infos kommen!
-        progress(0, desc="Starte Generierung mit ControlNet...")
         # ===== MODUS-SPEZIFISCHE EINSTELLUNGEN =====
         adj_strength = min(0.85, strength * 1.25)
@@ -876,59 +862,14 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
             keep_environment = True
             controlnet_strength = adj_strength * 0.5
             print(f"🎯 MODUS: Ausschließlich Gesicht → Depth+Canny (keep_environment=True)")
-        controlnet_steps = min(25, int(steps * 0.8))
-        print(f"⚙️ ControlNet Settings: Strength={controlnet_strength:.3f}, Steps={controlnet_steps}")
-        # ===== PUNKT 1: VAE-ENCODING & VERRAUSCHUNG =====
-        print("🔧 Punkt 1: Bereite verrauschtes Latent vor...")
-# 1. Bild für Latent-Encoding vorbereiten (bereits skaliertes Bild verwenden)
-if scaled_image is not None:
-    # img_for_latent ist das bereits herunterskalierte 512x512 Bild (mit Padding)
-    img_for_latent = scaled_image
-else:
-    # Fallback, falls keine Skalierung stattfand
-    img_for_latent = image.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS)
-# 2. In den Latent Space encoden (VAE)
-with torch.no_grad():
-    # Bild zu Tensor konvertieren
-    img_tensor = pipe.feature_extractor(img_for_latent, return_tensors="pt").pixel_values.to(device)
-    # Encoden
-    init_latent_dist = pipe.vae.encode(img_tensor).latent_dist
-    init_latents = init_latent_dist.sample()  # Latent mit zufälliger Variation aus der Verteilung
-    init_latents = init_latents * pipe.vae.config.scaling_factor  # Skalierung anpassen
-    print(f"✅ VAE-Encoding abgeschlossen. Latent Shape: {init_latents.shape}")
-# 3. Verrauschung basierend auf Strength (Scheduler)
-# Strength=0.8 bedeutet: Starte bei 80% des Rauschprozesses (stark verrauscht)
-strength = min(0.85, strength * 1.25)  # Ihre existierende Stärke-Anpassung
-latent_timestep = int(strength * pipe.scheduler.config.num_train_timesteps)
-# Rauschen generieren
-noise = torch.randn_like(init_latents)
-# Verrauschte Latents erzeugen
-noised_latents = pipe.scheduler.add_noise(init_latents, noise, torch.tensor([latent_timestep]))
-print(f"✅ Verrauschung abgeschlossen (Strength: {strength}, Timestep: {latent_timestep})")
-print(f"   Noised Latents Shape: {noised_latents.shape}")
-# Diese Variablen für später speichern:
-# - noised_latents: Das verrauschte Start-Latent für die Denoising-Schleife
-# - latent_timestep: Der Start-Timestep für die Denoising-Schleife
-# - init_latents: Das unverrauschte Latent (für spätere Referenz)
-        progress(0.03, desc="ControlNet läuft...")
         # ===== WICHTIG: VARIABLEN FÜR KOMPLETTEN WORKFLOW =====
         original_mask = None
         padding_info = None
-        controlnet_input = image.convert("RGB")  # Standard: Originalbild
         if bbox_x1 is not None and bbox_y1 is not None and bbox_x2 is not None and bbox_y2 is not None:
             print(f"🎯 BBox Koordinaten erhalten: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]")
@@ -943,75 +884,48 @@ print(f"   Noised Latents Shape: {noised_latents.shape}")
                 target_size=IMG_SIZE
             )
-            #ControlNet ist ein paralleles Modell (CNN), das unveränderte Control-Maps (z. B. Tiefenkarten)
-            #verarbeitet und konditionierende Signale an das frozen UNet weiterleitet, um die Gesamtgeneration zu steuern,
-            #ohne pixelgenaue Manipulationen vorzunehmen. Es beeinflusst den Diffusionsprozess global/lokal durch Addition zu den Features.
-            #ControlNet-Bildgröße und Inpaint-Bildgröße müssen übereinstimmen!
-            controlnet_input = scaled_image  # Verwende das skalierte Bild für ControlNet
             print(f"✅ Gemeinsame Skalierung abgeschlossen")
             print(f"   Original: {image.size} → Skaliert: {scaled_image.size}")
         else:
             # Keine BBox: Normales Img2Img (ohne Maske)
             print(f"ℹ️ Keine BBox angegeben → normales Img2Img (ohne Maske)")
-            controlnet_input = image.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS)
-        # ===== CONTROLNET AUFRUF =====
-        print(f"📊 ControlNet Input Größe: {controlnet_input.size}")
-        controlnet_output, inpaint_input = controlnet_processor.generate_with_controlnet(
-            image=controlnet_input,
-            prompt=enhanced_prompt,
-            negative_prompt=combined_negative_prompt,
-            steps=controlnet_steps,
-            guidance_scale=guidance_scale,
-            controlnet_strength=controlnet_strength,
-            progress=None,
             keep_environment=keep_environment
         )
-        print(f"✅ ControlNet Output erhalten")
-        print(f"✅ Inpaint Input Größe: {inpaint_input.size}")
         progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")
-        # ===== INPAINTING PIPELINE =====
-        pipe = load_img2img()
-        # Bild für Inpainting vorbereiten
-        if inpaint_input.size != (IMG_SIZE, IMG_SIZE):
-            print(f"⚠️ Inpaint Input hat unerwartete Größe {inpaint_input.size}, skaliere auf {IMG_SIZE}x{IMG_SIZE}")
-            img_resized = inpaint_input.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS)
-        else:
-            img_resized = inpaint_input.convert("RGB")
-            print(f"✅ Inpaint Input ist bereits {IMG_SIZE}x{IMG_SIZE}")
         # ===== SEED UND GENERATOR =====
         adj_guidance = min(guidance_scale, 12.0)
         seed = random.randint(0, 2**32 - 1)
         generator = torch.Generator(device=device).manual_seed(seed)
         print(f"🌱 Inpaint Seed: {seed}")
-        # ===== MASKE FÜR INPAINTING VORBEREITEN =====
-        inpaint_mask = None
-        if original_mask is not None and padding_info is not None:
-            # Verwende die skalierte Maske für Inpainting
-            _, scaled_mask, _ = scale_image_and_mask_together(
-                image.convert("RGB"),
-                original_mask,
-                target_size=IMG_SIZE
-            )
-            inpaint_mask = scaled_mask
-            print(f"✅ Maske für Inpainting vorbereitet: {inpaint_mask.size}")
         # ===== FORTSCHRITTS-CALLBACK =====
         callback = ImageToImageProgressCallback(progress, int(steps), adj_strength)
-        # ===== INPAINT DURCHFÜHREN =====
         result = pipe(
             prompt=enhanced_prompt,
             negative_prompt=combined_negative_prompt,
-            image=img_resized,
-            mask_image=inpaint_mask,
             strength=adj_strength,
             num_inference_steps=int(steps),
             guidance_scale=adj_guidance,
@@ -1020,6 +934,8 @@ print(f"   Noised Latents Shape: {noised_latents.shape}")
             callback_on_step_end_tensor_inputs=[],
         )
         # ===== KORREKTES COMPOSITING =====
         generated_image = result.images[0]
@@ -1033,9 +949,9 @@ print(f"   Noised Latents Shape: {noised_latents.shape}")
             )
             print(f"✅ Korrektes Compositing durchgeführt")
         else:
-            # Keine Maske: Einfach das generierte Bild zurückgeben (bereits 512×512)
             final_image = generated_image
-            print(f"ℹ️ Keine Maske → Direkte Rückgabe des 512×512 Bildes")
         end_time = time.time()
         duration = end_time - start_time
@@ -1053,6 +969,7 @@ print(f"   Noised Latents Shape: {noised_latents.shape}")
         traceback.print_exc()
         return None
 def update_bbox_from_image(image):
     """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
     if image is None:

 from diffusers import StableDiffusionInpaintPipeline, AutoencoderKL
 from diffusers import DPMSolverMultistepScheduler, PNDMScheduler
 from controlnet_module import controlnet_processor
+from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel
 import torch
 from PIL import Image, ImageDraw
 import time
             print(f"❌ Auch Fallback fehlgeschlagen: {fallback_error}")
             raise
 def load_img2img():
     global pipe_img2img
     if pipe_img2img is None:
+        print("🔄 Lade ControlNet-Inpainting-Modell...")
+        # Hier müssen die ControlNet-Modelle geladen werden
+        controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch_dtype)
+        # Oder für Multi-ControlNet: eine Liste von Modellen
+        pipe_img2img = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+            "runwayml/stable-diffusion-v1-5",
+            controlnet=controlnet,
+            torch_dtype=torch_dtype,
+            safety_checker=None,
+        ).to(device)
+        # ... Rest Ihrer Konfiguration (Scheduler, etc.)
+    return pipe_img2img
         pipe_img2img.enable_attention_slicing()
         pipe_img2img.enable_vae_tiling()
                  mode, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
                  progress=gr.Progress()):
     """
+    KORRIGIERTE HAUPTFUNKTION FÜR CONTROLNET-GESTEUERTES INPAINTING
     """
     try:
         if image is None:
         print(f"🎯 Finaler Prompt für {mode}: {enhanced_prompt}")
+        progress(0, desc="Starte Generierung...")
         # ===== MODUS-SPEZIFISCHE EINSTELLUNGEN =====
         adj_strength = min(0.85, strength * 1.25)
             keep_environment = True
             controlnet_strength = adj_strength * 0.5
             print(f"🎯 MODUS: Ausschließlich Gesicht → Depth+Canny (keep_environment=True)")
+        print(f"⚙️ ControlNet Settings: Strength={controlnet_strength:.3f}")
         # ===== WICHTIG: VARIABLEN FÜR KOMPLETTEN WORKFLOW =====
         original_mask = None
         padding_info = None
+        scaled_image = None
+        scaled_mask = None
         if bbox_x1 is not None and bbox_y1 is not None and bbox_x2 is not None and bbox_y2 is not None:
             print(f"🎯 BBox Koordinaten erhalten: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]")
                 target_size=IMG_SIZE
             )
             print(f"✅ Gemeinsame Skalierung abgeschlossen")
             print(f"   Original: {image.size} → Skaliert: {scaled_image.size}")
         else:
             # Keine BBox: Normales Img2Img (ohne Maske)
             print(f"ℹ️ Keine BBox angegeben → normales Img2Img (ohne Maske)")
+            scaled_image = image.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS)
+            scaled_mask = Image.new("L", (IMG_SIZE, IMG_SIZE), 255)  # Volle Maske
+        progress(0.1, desc="ControlNet läuft...")
+        # ===== CONTROLNET: MAPS ERSTELLEN =====
+        print(f"📊 ControlNet Input Größe: {scaled_image.size}")
+        controlnet_maps = controlnet_processor.prepare_controlnet_maps(
+            image=scaled_image,
             keep_environment=keep_environment
         )
+        print(f"✅ ControlNet Maps erstellt: {len(controlnet_maps)} Maps")
         progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")
+        # ===== CONTROLNET-INPAINTING PIPELINE =====
+        pipe = load_img2img()  # MUSS StableDiffusionControlNetInpaintPipeline sein!
         # ===== SEED UND GENERATOR =====
         adj_guidance = min(guidance_scale, 12.0)
         seed = random.randint(0, 2**32 - 1)
         generator = torch.Generator(device=device).manual_seed(seed)
         print(f"🌱 Inpaint Seed: {seed}")
         # ===== FORTSCHRITTS-CALLBACK =====
         callback = ImageToImageProgressCallback(progress, int(steps), adj_strength)
+        # ===== CONTROLNET-GESTEUERTES INPAINTING DURCHFÜHREN =====
+        print(f"🔄 Führe ControlNet-gesteuertes Inpainting durch...")
         result = pipe(
             prompt=enhanced_prompt,
             negative_prompt=combined_negative_prompt,
+            image=scaled_image,          # Das skalierte Originalbild
+            mask_image=scaled_mask,      # Die skalierte Maske
+            control_image=controlnet_maps,  # Die ControlNet-Maps als Liste
             strength=adj_strength,
             num_inference_steps=int(steps),
             guidance_scale=adj_guidance,
             callback_on_step_end_tensor_inputs=[],
         )
+        print("✅ ControlNet-Inpainting abgeschlossen")
         # ===== KORREKTES COMPOSITING =====
         generated_image = result.images[0]
             )
             print(f"✅ Korrektes Compositing durchgeführt")
         else:
+            # Keine Maske: Einfach das generierte Bild zurückgeben
             final_image = generated_image
+            print(f"ℹ️ Keine Maske → Direkte Rückgabe des Bildes")
         end_time = time.time()
         duration = end_time - start_time
         traceback.print_exc()
         return None
 def update_bbox_from_image(image):
     """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
     if image is None: