Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Nov 15, 2025

Commit

c6191da

verified ·

1 Parent(s): d77ef69

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -60

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 #und deutlich besserem Prompt-Verständnis - (Änderung Architektur).
 #Eine deutsche Alternative zur Umsetzung von Text-Bild zu Bild ist Flux - mit einer völlig anderen Architektur als SD!
 import gradio as gr
-from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionXLPipeline
 from diffusers import StableDiffusionInpaintPipeline
 from controlnet_module import controlnet_processor
 import torch
@@ -12,12 +12,10 @@ import os
 import tempfile
 import random
 # === OPTIMIERTE EINSTELLUNGEN ===
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if device == "cuda" else torch.float32
-IMG_SIZE = 1024  # SDXL verwendet 1024x1024 statt 512x512
 print(f"Running on: {device}")
@@ -65,21 +63,44 @@ pipe_img2img = None
 def load_txt2img():
     global pipe_txt2img
     if pipe_txt2img is None:
-        print("Loading SDXL Text-to-Image model...")
-        pipe_txt2img = StableDiffusionXLPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0",
-            torch_dtype=torch_dtype,
-            add_watermarker=False,
-        ).to(device)
-        from diffusers import DPMSolverMultistepScheduler
-        pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
-            pipe_txt2img.scheduler.config,
-            use_karras_sigmas=True,  # Noch bessere Qualität
-            use_safetensors=True,
-            algorithm_type="sde-dpmsolver++"
-        )
-        pipe_txt2img.enable_attention_slicing()
     return pipe_txt2img
 def load_img2img():
@@ -89,15 +110,13 @@ def load_img2img():
         try:
             pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained(
                 "runwayml/stable-diffusion-inpainting",
-                #"stabilityai/stable-diffusion-2-inpainting",  # Neues Modell
                 torch_dtype=torch_dtype,
-                #use_safetensors=True,  # Erzwinge .safetensors
-                allow_pickle=False,    # Verhindere unsichere Serialisierung
                 safety_checker=None,
-                #clean_up_tokenization_spaces=False #benötigt neue Transformer-Version
             ).to(device)
         except Exception as e:
-            print(f"Fehler beim Laden des Modells: {e}")
             raise
@@ -111,7 +130,8 @@ def load_img2img():
         pipe_img2img.enable_attention_slicing()
         pipe_img2img.enable_vae_tiling()
-        pipe_img2img.vae_slicing = True
     return pipe_img2img
@@ -126,7 +146,7 @@ class TextToImageProgressCallback:
         """Neue Callback-Signatur für diffusers >= 1.0.0"""
         self.current_step = step + 1
         progress_percent = (step / self.total_steps) * 100
-        self.progress(progress_percent / 100, desc="Generierung läuft - CPU benötigt bis zu 20 Minuten!")
         return callback_kwargs
 class ImageToImageProgressCallback:
@@ -152,7 +172,7 @@ class ImageToImageProgressCallback:
             print(f"🎯 INTERNE STEP-AUSGABE: Strength {self.strength} → {self.actual_total_steps} tatsächliche Denoising-Schritte")
         progress_percent = (step / self.actual_total_steps) * 100
-        self.progress(progress_percent / 100, desc="Generierung läuft - CPU benötigt bis zu 20 Minuten!")
         return callback_kwargs
 # === NEUE FUNKTIONEN FÜR DIE FEATURES ===
@@ -239,9 +259,7 @@ def text_to_image(prompt, steps, guidance_scale, progress=gr.Progress()):
         print(f"Starting generation for: {prompt}")
         start_time = time.time()
-        # Statusmeldung anzeigen
-        progress(0, desc="Generierung läuft - CPU benötigt bis zu 20 Minuten!")
         pipe = load_txt2img()
         # ZUFÄLLIGER SEED für Variation
@@ -249,28 +267,27 @@ def text_to_image(prompt, steps, guidance_scale, progress=gr.Progress()):
         generator = torch.Generator(device=device).manual_seed(seed)
         print(f"Using seed: {seed}")
-        # NEUE Callback-Implementierung
         callback = TextToImageProgressCallback(progress, steps)
         image = pipe(
             prompt=prompt,
-            height=IMG_SIZE,
-            width=IMG_SIZE,
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             generator=generator,
-            callback_on_step_end=callback,  # NEUE Parameter-Name
-            callback_on_step_end_tensor_inputs=[],  # Keine zusätzlichen Tensor-Inputs
         ).images[0]
         end_time = time.time()
         print(f"Bild generiert in {end_time - start_time:.2f} Sekunden")
-        # Robuste Zwischenspeicherung
         return image
     except Exception as e:
-        print(f"Fehler: {e}")
         import traceback
         traceback.print_exc()
         return None
@@ -299,12 +316,10 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         # CONTROLNET-STRENGTH ANPASSEN ABHÄNGIG VOM MODUS
         if face_preserve:
-            # UMGEBUNG BEIBEHALTEN, PERSON ÄNDERN → HOHE STRENGTH FÜR KOMBINIERTE STRATEGIE
-            controlnet_strength = adj_strength * 0.8  # 80% für kombinierte OpenPose + Canny
             print(f"🎯 ControlNet Modus: Umgebung beibehalten (Strength = {controlnet_strength:.3f})")
         else:
-            # PERSON BEIBEHALTEN, UMGEBUNG ÄNDERN → NORMALE STRENGTH FÜR OPENPOSE
-            controlnet_strength = adj_strength * 0.5  # 50% für OpenPose
             print(f"🎯 ControlNet Modus: Person beibehalten (Strength = {controlnet_strength:.3f})")
         controlnet_steps = min(25, int(steps * 0.8))
@@ -324,7 +339,7 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
             guidance_scale=guidance_scale,
             controlnet_strength=controlnet_strength,
             progress=progress,
-            keep_environment=face_preserve  # WICHTIG: Parameter hinzugefügt!
         )
         print(f"✅ ControlNet Output erhalten: {type(controlnet_output)}")
@@ -335,10 +350,9 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         # -------------------------------
         progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")
-        pipe = load_img2img()  # ← deine bestehende Funktion
-        # inpaint_input ist IMMER das Originalbild (laut neuer ControlNet-Logik)
-        img_resized = inpaint_input.convert("RGB").resize((512, 512))  # Bleibt bei 512 für Inpaint
         adj_guidance = min(guidance_scale, 12.0)
         seed = random.randint(0, 2**32 - 1)
@@ -351,7 +365,7 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         mask = None
         if bbox_x1 and bbox_y1 and bbox_x2 and bbox_y2:
             orig_w, orig_h = image.size
-            scale_x, scale_y = 512 / orig_w, 512 / orig_h  # Skalierung für Inpaint (512px)
             bbox_coords = [
                 int(bbox_x1 * scale_x),
                 int(bbox_y1 * scale_y),
@@ -398,7 +412,6 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         import traceback
         traceback.print_exc()
         return None
 def update_bbox_from_image(image):
     """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
@@ -480,7 +493,6 @@ def main_ui():
         """
     ) as demo:
-        # --- Hauptanwendungsbereich (zunächst versteckt) ---
         with gr.Column(visible=True) as content_area:
             with gr.Tab("Text zu Bild"):
                 gr.Markdown("**Beschreibe dein gewünschtes Bild:**")
@@ -524,7 +536,6 @@ def main_ui():
             with gr.Tab("Bild zu Bild"):
                 gr.Markdown("**Lade ein Bild hoch und beschreibe die gewünschte Veränderung:**")
-                # NEUE ANORDNUNG: Eingabebild und Live-Vorschau nebeneinander
                 with gr.Row():
                     with gr.Column():
                         img_input = gr.Image(
@@ -542,7 +553,6 @@ def main_ui():
                             show_download_button=False
                         )
-                # DARUNTER: Checkbox Gesicht/Person oder Umgebung ändern
                 with gr.Row():
                     face_preserve = gr.Checkbox(
                         label="Schutz",
@@ -550,7 +560,6 @@ def main_ui():
                         info="🟢 Checkbox AN: Alles AUSSERHALB des gelben Rahmens verändern | 🔴 Checkbox AUS: Nur INNERHALB des gelben Rahmens verändern"
                     )
-                # DARUNTER: Bildelementbereich anpassen
                 with gr.Row():
                     gr.Markdown("**Bildelementbereich anpassen**")
@@ -581,7 +590,6 @@ def main_ui():
                             info="Untere Kante des Bildelementbereichs"
                         )
-                # DARUNTER: Prompt und Negativ-Prompt
                 with gr.Row():
                     with gr.Column():
                         img_prompt = gr.Textbox(
@@ -598,7 +606,6 @@ def main_ui():
                             info="Was soll vermieden werden? Unerwünschte Elemente auflisten."
                         )
-                # DARUNTER: Veränderungsstärke, Inferenzschritte, Promptstärke
                 with gr.Row():
                     with gr.Column():
                         strength_slider = gr.Slider(
@@ -628,7 +635,6 @@ def main_ui():
                      "• **Koordinaten nur bei erkennbaren Verzerrungen anpassen** (Bereiche leicht verschieben)"
                     )
                 transform_btn = gr.Button("Bild transformieren", variant="primary")
                 with gr.Row():
@@ -638,15 +644,12 @@ def main_ui():
                         type="pil"
                     )
-                # NEUE: Event-Handler für alle Live-Updates
-                # Bild-Upload: Auto-Koordinaten + Vorschau
                 img_input.change(
                     fn=process_image_upload,
                     inputs=[img_input],
                     outputs=[preview_output, bbox_x1, bbox_y1, bbox_x2, bbox_y2]
                 )
-                # Live-Updates bei Koordinaten-Änderungen
                 coordinate_inputs = [img_input, bbox_x1, bbox_y1, bbox_x2, bbox_y2, face_preserve]
                 bbox_x1.change(
@@ -673,14 +676,12 @@ def main_ui():
                     outputs=preview_output
                 )
-                # Live-Update bei Modus-Änderung
                 face_preserve.change(
                     fn=update_live_preview,
                     inputs=coordinate_inputs,
                     outputs=preview_output
                 )
-                # Transform-Button (UNVERÄNDERT - gibt OUTPUT zurück)
                 transform_btn.click(
                     fn=img_to_image,
                     inputs=[
@@ -697,7 +698,11 @@ def main_ui():
 if __name__ == "__main__":
     demo = main_ui()
-    demo.queue()
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,

 #und deutlich besserem Prompt-Verständnis - (Änderung Architektur).
 #Eine deutsche Alternative zur Umsetzung von Text-Bild zu Bild ist Flux - mit einer völlig anderen Architektur als SD!
 import gradio as gr
+from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
 from diffusers import StableDiffusionInpaintPipeline
 from controlnet_module import controlnet_processor
 import torch
 import tempfile
 import random
 # === OPTIMIERTE EINSTELLUNGEN ===
 device = "cuda" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if device == "cuda" else torch.float32
+IMG_SIZE = 512  # Jetzt 512x512 für Realistic Vision
 print(f"Running on: {device}")
 def load_txt2img():
     global pipe_txt2img
     if pipe_txt2img is None:
+        try:
+            print("Loading Realistic Vision V6.0 for high-quality 512x512...")
+            pipe_txt2img = StableDiffusionPipeline.from_pretrained(
+                "SG161222/Realistic_Vision_V6.0_B1",
+                torch_dtype=torch_dtype,
+                safety_checker=None,
+                requires_safety_checker=False,
+                add_watermarker=False,
+                use_safetensors=True,  # Sicherheitsproblem behoben
+                variant="fp16" if torch_dtype == torch.float16 else None,
+            ).to(device)
+            from diffusers import DPMSolverMultistepScheduler
+            pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
+                pipe_txt2img.scheduler.config,
+                use_karras_sigmas=True,
+                algorithm_type="sde-dpmsolver++"
+            )
+            # T4 OPTIMIERUNGEN
+            pipe_txt2img.enable_attention_slicing()
+            pipe_txt2img.enable_vae_slicing()
+            if hasattr(pipe_txt2img, 'vae'):
+                pipe_txt2img.vae.enable_slicing()
+            print("✅ Realistic Vision V6.0 erfolgreich geladen")
+        except Exception as e:
+            print(f"❌ Fehler beim Laden von Realistic Vision: {e}")
+            print("🔄 Fallback auf SD 1.5...")
+            # Fallback auf Standard SD 1.5
+            pipe_txt2img = StableDiffusionPipeline.from_pretrained(
+                "runwayml/stable-diffusion-v1-5",
+                torch_dtype=torch_dtype,
+                use_safetensors=True,
+            ).to(device)
+            pipe_txt2img.enable_attention_slicing()
     return pipe_txt2img
 def load_img2img():
         try:
             pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained(
                 "runwayml/stable-diffusion-inpainting",
                 torch_dtype=torch_dtype,
+                use_safetensors=True,  # Sicherheitsproblem behoben
+                allow_pickle=False,
                 safety_checker=None,
             ).to(device)
         except Exception as e:
+            print(f"Fehler beim Laden des Inpainting-Modells: {e}")
             raise
         pipe_img2img.enable_attention_slicing()
         pipe_img2img.enable_vae_tiling()
+        if hasattr(pipe_img2img, 'vae_slicing'):
+            pipe_img2img.vae_slicing = True
     return pipe_img2img
         """Neue Callback-Signatur für diffusers >= 1.0.0"""
         self.current_step = step + 1
         progress_percent = (step / self.total_steps) * 100
+        self.progress(progress_percent / 100, desc="Generierung läuft...")
         return callback_kwargs
 class ImageToImageProgressCallback:
             print(f"🎯 INTERNE STEP-AUSGABE: Strength {self.strength} → {self.actual_total_steps} tatsächliche Denoising-Schritte")
         progress_percent = (step / self.actual_total_steps) * 100
+        self.progress(progress_percent / 100, desc="Generierung läuft...")
         return callback_kwargs
 # === NEUE FUNKTIONEN FÜR DIE FEATURES ===
         print(f"Starting generation for: {prompt}")
         start_time = time.time()
+        progress(0, desc="Lade Modell...")
         pipe = load_txt2img()
         # ZUFÄLLIGER SEED für Variation
         generator = torch.Generator(device=device).manual_seed(seed)
         print(f"Using seed: {seed}")
         callback = TextToImageProgressCallback(progress, steps)
+        # NEUE: 512x512 für Realistic Vision
         image = pipe(
             prompt=prompt,
+            height=512,  # ← 512 statt IMG_SIZE (1024)
+            width=512,   # ← 512 statt IMG_SIZE (1024)
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             generator=generator,
+            callback_on_step_end=callback,
+            callback_on_step_end_tensor_inputs=[],
         ).images[0]
         end_time = time.time()
         print(f"Bild generiert in {end_time - start_time:.2f} Sekunden")
         return image
     except Exception as e:
+        print(f"Fehler in text_to_image: {e}")
         import traceback
         traceback.print_exc()
         return None
         # CONTROLNET-STRENGTH ANPASSEN ABHÄNGIG VOM MODUS
         if face_preserve:
+            controlnet_strength = adj_strength * 0.8
             print(f"🎯 ControlNet Modus: Umgebung beibehalten (Strength = {controlnet_strength:.3f})")
         else:
+            controlnet_strength = adj_strength * 0.5
             print(f"🎯 ControlNet Modus: Person beibehalten (Strength = {controlnet_strength:.3f})")
         controlnet_steps = min(25, int(steps * 0.8))
             guidance_scale=guidance_scale,
             controlnet_strength=controlnet_strength,
             progress=progress,
+            keep_environment=face_preserve
         )
         print(f"✅ ControlNet Output erhalten: {type(controlnet_output)}")
         # -------------------------------
         progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")
+        pipe = load_img2img()
+        img_resized = inpaint_input.convert("RGB").resize((512, 512))
         adj_guidance = min(guidance_scale, 12.0)
         seed = random.randint(0, 2**32 - 1)
         mask = None
         if bbox_x1 and bbox_y1 and bbox_x2 and bbox_y2:
             orig_w, orig_h = image.size
+            scale_x, scale_y = 512 / orig_w, 512 / orig_h
             bbox_coords = [
                 int(bbox_x1 * scale_x),
                 int(bbox_y1 * scale_y),
         import traceback
         traceback.print_exc()
         return None
 def update_bbox_from_image(image):
     """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
         """
     ) as demo:
         with gr.Column(visible=True) as content_area:
             with gr.Tab("Text zu Bild"):
                 gr.Markdown("**Beschreibe dein gewünschtes Bild:**")
             with gr.Tab("Bild zu Bild"):
                 gr.Markdown("**Lade ein Bild hoch und beschreibe die gewünschte Veränderung:**")
                 with gr.Row():
                     with gr.Column():
                         img_input = gr.Image(
                             show_download_button=False
                         )
                 with gr.Row():
                     face_preserve = gr.Checkbox(
                         label="Schutz",
                         info="🟢 Checkbox AN: Alles AUSSERHALB des gelben Rahmens verändern | 🔴 Checkbox AUS: Nur INNERHALB des gelben Rahmens verändern"
                     )
                 with gr.Row():
                     gr.Markdown("**Bildelementbereich anpassen**")
                             info="Untere Kante des Bildelementbereichs"
                         )
                 with gr.Row():
                     with gr.Column():
                         img_prompt = gr.Textbox(
                             info="Was soll vermieden werden? Unerwünschte Elemente auflisten."
                         )
                 with gr.Row():
                     with gr.Column():
                         strength_slider = gr.Slider(
                      "• **Koordinaten nur bei erkennbaren Verzerrungen anpassen** (Bereiche leicht verschieben)"
                     )
                 transform_btn = gr.Button("Bild transformieren", variant="primary")
                 with gr.Row():
                         type="pil"
                     )
                 img_input.change(
                     fn=process_image_upload,
                     inputs=[img_input],
                     outputs=[preview_output, bbox_x1, bbox_y1, bbox_x2, bbox_y2]
                 )
                 coordinate_inputs = [img_input, bbox_x1, bbox_y1, bbox_x2, bbox_y2, face_preserve]
                 bbox_x1.change(
                     outputs=preview_output
                 )
                 face_preserve.change(
                     fn=update_live_preview,
                     inputs=coordinate_inputs,
                     outputs=preview_output
                 )
                 transform_btn.click(
                     fn=img_to_image,
                     inputs=[
 if __name__ == "__main__":
     demo = main_ui()
+    # OPTIMIERTE WARTESCHLANGE FÜR T4
+    demo.queue(
+        max_size=3,           # Max 3 Anfragen in Warteschlange
+        concurrency_count=1    # Nur 1 Generation gleichzeitig
+    )
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,