Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on Dec 5, 2025

Commit

1fe4554

verified ·

1 Parent(s): 0c4f4cc

Update app.py

Browse files

Files changed (1) hide show

app.py +351 -277

app.py CHANGED Viewed

@@ -4,41 +4,12 @@ from diffusers import StableDiffusionInpaintPipeline, AutoencoderKL
 from diffusers import DPMSolverMultistepScheduler, PNDMScheduler
 from controlnet_module import controlnet_processor
 import torch
-from PIL import Image, ImageDraw, ImageFont
 import time
 import os
 import tempfile
 import random
-import re
-    # === FACE-FIX IMPORT - MIT DETAILLIERTEM DEBUGGING ===
-try:
-    print("Versuch 1: Importiere controlnet_facefix...")
-    from controlnet_facefix import apply_facefix
-    FACEFIX_AVAILABLE = True
-    print("✅ Face-Fix erfolgreich geladen")
-except ImportError as e1:
-    print(f"❌ ImportError: {e1}")
-    try:
-        print("Versuch 2: Import mit sys.path...")
-        import sys
-        sys.path.append(".")
-        from controlnet_facefix import apply_facefix
-        FACEFIX_AVAILABLE = True
-        print("✅ Face-Fix erfolgreich geladen (mit sys.path)")
-    except Exception as e2:
-        print(f"❌ Endgültiger Fehler: {e2}")
-        FACEFIX_AVAILABLE = False
-        import traceback
-        traceback.print_exc()
-except Exception as e:
-    print(f"❌ Anderer Fehler: {e}")
-    FACEFIX_AVAILABLE = False
-    import traceback
-    traceback.print_exc()
 # === OPTIMIERTE EINSTELLUNGEN ===
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -50,7 +21,7 @@ print(f"Running on: {device}")
 # === MODELLKONFIGURATION (NUR 2 MODELLE) ===
 MODEL_CONFIGS = {
     "runwayml/stable-diffusion-v1-5": {
-        "name": "Stable Diffusion 1.5 (Universal)",
         "description": "Universal model, good all-rounder, reliable results",
         "requires_vae": False,
         "recommended_steps": 35,
@@ -58,7 +29,7 @@ MODEL_CONFIGS = {
         "supports_fp16": True
     },
     "SG161222/Realistic_Vision_V6.0_B1_noVAE": {
-        "name": "Realistic Vision V6.0 (Portraits)",
         "description": "Best for photorealistic faces, skin details, human portraits",
         "requires_vae": True,
         "vae_model": "stabilityai/sd-vae-ft-mse",
@@ -68,88 +39,106 @@ MODEL_CONFIGS = {
     }
 }
 SAFETENSORS_MODELS = ["runwayml/stable-diffusion-v1-5"]
 current_model_id = "runwayml/stable-diffusion-v1-5"
 # === AUTOMATISCHE NEGATIVE PROMPT GENERIERUNG ===
 def auto_negative_prompt(positive_prompt):
     p = positive_prompt.lower()
     negatives = []
     if any(w in p for w in [
-        "person", "man", "woman", "face", "portrait", "team", "employee",
-        "people", "crowd", "character", "figure", "human", "child", "baby",
-        "girl", "boy", "lady", "gentleman", "fairy", "elf", "dwarf", "santa claus",
-        "mermaid", "angel", "demon", "witch", "wizard", "creature", "being",
-        "model", "actor", "actress", "celebrity", "avatar", "group"
-    ]):
         negatives.append(
             "blurry face, lowres face, deformed pupils, bad anatomy, malformed hands, extra fingers, uneven eyes, distorted face, "
             "unrealistic skin, mutated, ugly, disfigured, poorly drawn face, "
             "missing limbs, extra limbs, fused fingers, too many fingers, bad teeth, "
-            "mutated hands, long neck, extra wings, multiple wings, grainy face, noisy face, "
             "compression artifacts, rendering artifacts, digital artifacts, overprocessed face, oversmoothed face "
         )
     if any(w in p for w in ["office", "business", "team", "meeting", "corporate", "company", "workplace"]):
-        negatives.append("overexposed, oversaturated, harsh lighting, watermark, text, logo, brand")
     if any(w in p for w in ["product", "packshot", "mockup", "render", "3d", "cgi", "packaging"]):
-        negatives.append("plastic texture, noisy, overly reflective surfaces, watermark, text, low poly")
     if any(w in p for w in ["landscape", "nature", "mountain", "forest", "outdoor", "beach", "sky"]):
-        negatives.append("blurry, oversaturated, unnatural colors, distorted horizon, floating objects")
     if any(w in p for w in ["logo", "symbol", "icon", "typography", "badge", "emblem"]):
-        negatives.append("watermark, signature, username, text, writing, scribble, messy")
     if any(w in p for w in ["building", "architecture", "house", "interior", "room", "facade"]):
-        negatives.append("deformed, distorted perspective, floating objects, collapsing structure")
     base_negatives = "low quality, worst quality, blurry, jpeg artifacts, ugly, deformed"
-    return base_negatives + ", " + ", ".join(negatives) if negatives else base_negatives
-def is_person_prompt(prompt: str) -> bool:
-    p = prompt.lower()
-    print(f"DEBUG: Prüfe '{p}' auf Personen...")
-    # EINFACHE Version die garantiert funktioniert:
-    keywords = ["fairy", "person", "man", "woman", "face", "portrait"]
-    for keyword in keywords:
-        if keyword in p:  # Einfach 'in' ohne Leerzeichen
-            print(f"✅ Person erkannt durch '{keyword}'")
-            return True
-    print(f"❌ Keine Person erkannt")
-    return False
 # === GESICHTSMASKEN-FUNKTIONEN ===
 def create_face_mask(image, bbox_coords, face_preserve):
-    mask = Image.new("L", image.size, 0)
     if bbox_coords and all(coord is not None for coord in bbox_coords):
         x1, y1, x2, y2 = bbox_coords
         draw = ImageDraw.Draw(mask)
         if face_preserve:
-            draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255)
-            draw.rectangle([x1, y1, x2, y2], fill=0)
         else:
-            draw.rectangle([x1, y1, x2, y2], fill=255)
     return mask
 def auto_detect_face_area(image):
     width, height = image.size
     face_size = min(width, height) * 0.4
     x1 = (width - face_size) / 2
-    y1 = (height - face_size) / 4
     x2 = x1 + face_size
-    y2 = y1 + face_size * 1.2
     x1, y1 = max(0, int(x1)), max(0, int(y1))
     x2, y2 = min(width, int(x2)), min(height, int(y2))
     return [x1, y1, x2, y2]
 # === PIPELINES ===
@@ -158,277 +147,375 @@ current_pipe_model_id = None
 pipe_img2img = None
 def load_txt2img(model_id):
     global pipe_txt2img, current_pipe_model_id
     if pipe_txt2img is not None and current_pipe_model_id == model_id:
         return pipe_txt2img
-    print(f"Lade Modell: {model_id}")
     config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
     try:
         vae = None
         if config.get("requires_vae", False):
-            vae = AutoencoderKL.from_pretrained(config["vae_model"], torch_dtype=torch_dtype).to(device)
         model_params = {
             "torch_dtype": torch_dtype,
             "safety_checker": None,
             "requires_safety_checker": False,
         }
         if model_id in SAFETENSORS_MODELS:
             model_params["use_safetensors"] = True
         if config.get("supports_fp16", False) and torch_dtype == torch.float16:
             model_params["variant"] = "fp16"
         if vae is not None:
             model_params["vae"] = vae
-        pipe_txt2img = StableDiffusionPipeline.from_pretrained(model_id, **model_params).to(device)
-        pipe_txt2img.enable_attention_slicing()
         try:
             pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
-                pipe_txt2img.scheduler.config,
                 use_karras_sigmas=True,
                 algorithm_type="sde-dpmsolver++"
             )
-        except:
-            pass
         current_pipe_model_id = model_id
         return pipe_txt2img
     except Exception as e:
-        print(f"Fehler beim Laden, Fallback auf SD 1.5: {e}")
-        pipe_txt2img = StableDiffusionPipeline.from_pretrained(
-            "runwayml/stable-diffusion-v1-5", torch_dtype=torch_dtype, use_safetensors=True
-        ).to(device)
-        pipe_txt2img.enable_attention_slicing()
-        current_pipe_model_id = "runwayml/stable-diffusion-v1-5"
-        return pipe_txt2img
 def load_img2img():
     global pipe_img2img
     if pipe_img2img is None:
-        pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained(
-            "runwayml/stable-diffusion-inpainting", torch_dtype=torch_dtype, safety_checker=None
-        ).to(device)
         pipe_img2img.enable_attention_slicing()
         pipe_img2img.enable_vae_tiling()
     return pipe_img2img
-# === CALLBACKS ===
 class TextToImageProgressCallback:
     def __init__(self, progress, total_steps):
         self.progress = progress
         self.total_steps = total_steps
     def __call__(self, pipe, step, timestep, callback_kwargs):
-        self.progress(step / self.total_steps, desc="Generierung läuft...")
         return callback_kwargs
 class ImageToImageProgressCallback:
     def __init__(self, progress, total_steps, strength):
         self.progress = progress
         self.total_steps = total_steps
         self.strength = strength
-        self.actual_steps = None
     def __call__(self, pipe, step, timestep, callback_kwargs):
-        if self.actual_steps is None:
-            self.actual_steps = int(self.total_steps * self.strength)
-        progress_val = step / self.actual_steps
-        self.progress(progress_val, desc="Generierung läuft...")
         return callback_kwargs
-# === BILDVERARBEITUNGS-FUNKTIONEN FÜR BILD-TO-BILD TAB ===
-def process_image_upload(image):
-    """Verarbeitet den Bild-Upload und aktualisiert die Koordinaten und Vorschau"""
     if image is None:
-        return None, 100, 100, 300, 300
-    # Automatische Gesichtserkennung
-    bbox = auto_detect_face_area(image)
-    # Live-Vorschau erstellen
-    preview = update_live_preview(image, bbox[0], bbox[1], bbox[2], bbox[3], True)
-    return preview, bbox[0], bbox[1], bbox[2], bbox[3]
-def update_live_preview(image, x1, y1, x2, y2, face_preserve):
-    """Erstellt eine Live-Vorschau mit farbigem Rahmen basierend auf dem Modus"""
     if image is None:
         return None
-    # Kopie des Bildes für Vorschau
-    preview = image.copy()
-    # Zeichne Rahmen basierend auf Modus
-    draw = ImageDraw.Draw(preview)
-    # Rahmenfarbe basierend auf Modus
-    if face_preserve:
-        # 🟢 GRÜN: Alles außerhalb des Rahmens wird verändert
-        outline_color = "green"
-    else:
-        # 🔴 ROT: Nur innerhalb des Rahmens wird verändert
-        outline_color = "red"
-    # Rahmen zeichnen
-    draw.rectangle([x1, y1, x2, y2], outline=outline_color, width=3)
-    # Hinweis-Text hinzufügen
-    try:
-        # Versuche, eine Schriftart zu laden (falls verfügbar)
-        try:
-            font = ImageFont.truetype("arial.ttf", 16)
-        except:
-            font = ImageFont.load_default()
-        text = f"{'🟢 Schutzmodus AN' if face_preserve else '🔴 Schutzmodus AUS'}"
-        # Text-Hintergrund
-        text_bbox = draw.textbbox((x1, y1 - 25), text, font=font)
-        draw.rectangle(text_bbox, fill="white")
-        # Text
-        draw.text((x1, y1 - 25), text, fill=outline_color, font=font)
-    except:
-        pass  # Falls Schrift nicht geladen werden kann
-    return preview
-# === HAUPTFUNKTION: TEXT ZU BILD MIT AUTOMATISCHEM FACE-FIX - KORRIGIERT ===
 def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()):
     try:
         if not prompt or not prompt.strip():
             return None, "Bitte einen Prompt eingeben"
-        print(f"\n" + "="*60)
-        print(f"🔧 START: Text zu Bild Generierung")
-        print(f"🔧 Prompt: {prompt}")
-        print(f"🔧 Modell: {model_id}")
-        # Personenerkennung ZUERST auf dem ORIGINAL Prompt!
-        is_person = is_person_prompt(prompt)
-        print(f"🔧 Person erkannt? {is_person}")
         auto_negatives = auto_negative_prompt(prompt)
-        print(f"🔧 Auto Negative Prompt: {auto_negatives[:100]}...")
         start_time = time.time()
-        # Qualitäts-Boost nur wenn nicht vorhanden
-        quality_keywords = ['masterpiece', 'best quality', 'raw', 'highly detailed', 'ultra realistic']
-        has_quality = any(kw in prompt.lower() for kw in quality_keywords)
-        has_weights = bool(re.search(r':\d+\.\d+|\([^)]+:\d', prompt))
-        enhanced_prompt = f"masterpiece, raw, best quality, highly detailed, {prompt}" if not (has_quality or has_weights) else prompt
-        print(f"🔧 Enhanced Prompt: {enhanced_prompt[:100]}...")
         progress(0, desc="Lade Modell...")
         pipe = load_txt2img(model_id)
         seed = random.randint(0, 2**32 - 1)
         generator = torch.Generator(device=device).manual_seed(seed)
-        print(f"🔧 Seed: {seed}")
-        print(f"🔧 Starte Bildgenerierung...")
         image = pipe(
             prompt=enhanced_prompt,
             negative_prompt=auto_negatives,
-            height=512, width=512,
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             generator=generator,
-            callback_on_step_end=TextToImageProgressCallback(progress, steps),
             callback_on_step_end_tensor_inputs=[],
         ).images[0]
-        print(f"✅ Bildgenerierung abgeschlossen")
-        # AUTOMATISCHER FACE-FIX NUR BEI PERSONEN
-        if FACEFIX_AVAILABLE and is_person:
-            print("\n" + "🎭"*30)
-            print("🎭 PERSON ERKANNT → Starte Face-Fix für perfekte Gesichter...")
-            print("🎭"*30)
-            progress(0.9, desc="Perfektioniere Gesicht & Hände...")
-            try:
-                # Originalbild speichern für Vergleich
-                original_image = image.copy()
-                print("🎭 Originalbild gespeichert, starte Face-Fix...")
-                # Face-Fix anwenden
-                fixed_image = apply_facefix(
-                    image=image,
-                    prompt=enhanced_prompt,
-                    negative_prompt=auto_negatives,
-                    seed=seed,
-                    model_id=model_id
-                )
-                image = fixed_image
-                print("✅✅✅ Face-Fix ABGESCHLOSSEN! ✅✅✅")
-                # Optional: Vergleichsbild erstellen
-                try:
-                    width, height = image.size
-                    comparison = Image.new('RGB', (width * 2, height))
-                    comparison.paste(original_image, (0, 0))
-                    comparison.paste(image, (width, 0))
-                    # Trennlinie
-                    draw = ImageDraw.Draw(comparison)
-                    draw.line([(width, 0), (width, height)], fill="white", width=2)
-                    # Beschriftung hinzufügen
-                    try:
-                        font = ImageFont.truetype("arial.ttf", 20)
-                    except:
-                        font = ImageFont.load_default()
-                    draw.text((10, 10), "Vor Face-Fix", fill="white", font=font)
-                    draw.text((width + 10, 10), "Nach Face-Fix", fill="white", font=font)
-                    # Vergleichsbild als Option zurückgeben
-                    image = comparison
-                    print("✅ Vergleichsbild erstellt")
-                except Exception as e:
-                    print(f"⚠️ Vergleichsbild konnte nicht erstellt werden: {e}")
-            except Exception as e:
-                print(f"❌❌❌ Face-Fix FEHLGESCHLAGEN: {e} ❌❌❌")
-                import traceback
-                traceback.print_exc()
-        else:
-            if not FACEFIX_AVAILABLE:
-                print("ℹ️ Face-Fix nicht verfügbar")
-            if not is_person:
-                print("ℹ️ Keine Person im Prompt erkannt")
-        duration = time.time() - start_time
-        config = MODEL_CONFIGS.get(model_id, {"name": model_id})
-        # Status-Nachricht mit Face-Fix Info
-        if FACEFIX_AVAILABLE and is_person:
-            status_msg = f"✅ Generiert mit {config.get('name', model_id)} + Face-Fix in {duration:.1f}s"
-        else:
-            status_msg = f"Generiert mit {config.get('name', model_id)} in {duration:.1f}s"
-        print(f"\n" + "="*60)
-        print(f"✅ FERTIG: {status_msg}")
-        print(f"="*60 + "\n")
         return image, status_msg
     except Exception as e:
         print(f"❌ Fehler in text_to_image: {e}")
         import traceback
         traceback.print_exc()
-        return None, f"Fehler: {str(e)}"
 def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
                  face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
@@ -445,11 +532,12 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         print(f"Negativ-Prompt: {neg_prompt}")
         print(f"Gesicht beibehalten: {face_preserve}")
-        # Automatischen Negativ-Prompt generieren
         auto_negatives = auto_negative_prompt(prompt)
         print(f"🤖 Automatisch generierter Negativ-Prompt: {auto_negatives}")
-        # Kombiniere manuellen und automatischen Prompt
         combined_negative_prompt = ""
         if neg_prompt and neg_prompt.strip():
@@ -458,6 +546,7 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
             print(f"👤 Benutzer Negativ-Prompt: {user_neg}")
             # Entferne Duplikate zwischen automatischen und manuellen Prompts
             user_words = [word.strip().lower() for word in user_neg.split(",")]
             auto_words = [word.strip().lower() for word in auto_negatives.split(",")]
@@ -469,7 +558,7 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
                 if auto_word and auto_word not in user_words:
                     combined_words.append(auto_word)
-            # Zusammenfügen und Duplikate entfernen
             unique_words = []
             seen_words = set()
             for word in combined_words:
@@ -484,6 +573,8 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
             print(f"ℹ️ Kein manueller Negativ-Prompt, verwende nur automatischen: {combined_negative_prompt}")
         print(f"✅ Finaler kombinierter Negativ-Prompt: {combined_negative_prompt}")
         progress(0, desc="Starte Generierung mit ControlNet...")
@@ -567,22 +658,6 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         print(f"🕒 Dauer: {end_time - start_time:.2f} Sekunden")
         generated_image = result.images[0]
-        # Optional: Face-Fix auch auf das transformierte Bild anwenden
-        if FACEFIX_AVAILABLE and is_person_prompt(prompt):
-            print("Transformiertes Bild → Wende Face-Fix an...")
-            try:
-                generated_image = apply_facefix(
-                    image=generated_image,
-                    prompt=prompt,
-                    negative_prompt=combined_negative_prompt,
-                    seed=seed,
-                    model_id="runwayml/stable-diffusion-v1-5"
-                )
-                print("Face-Fix auf transformiertem Bild abgeschlossen!")
-            except Exception as e:
-                print(f"Face-Fix auf transformiertem Bild fehlgeschlagen: {e}")
         return generated_image
     except Exception as e:
@@ -591,6 +666,24 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
         traceback.print_exc()
         return None
 def main_ui():
     with gr.Blocks(
         title="AI Image Generator",
@@ -685,15 +778,6 @@ def main_ui():
             color: #721c24;
             border: 1px solid #f5c6cb;
         }
-        .face-fix-badge {
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            color: white;
-            padding: 4px 8px;
-            border-radius: 12px;
-            font-size: 12px;
-            margin-left: 10px;
-            display: inline-block;
-        }
         """
     ) as demo:
@@ -701,16 +785,6 @@ def main_ui():
             with gr.Tab("Text zu Bild"):
                 gr.Markdown("## 🎨 Text zu Bild Generator")
-                # Face-Fix Info Badge
-                if FACEFIX_AVAILABLE:
-                    gr.Markdown(
-                        f"""
-                        <div style="background: #e3f2fd; padding: 10px; border-radius: 8px; margin-bottom: 20px; border-left: 4px solid #2196f3;">
-                        🎭 <strong>Face-Fix aktiviert!</strong> Gesichter werden automatisch verbessert.
-                        </div>
-                        """
-                    )
                 with gr.Row():
                     with gr.Column(scale=2):
                         # Modellauswahl Dropdown (NUR 2 MODELLE)
@@ -736,7 +810,7 @@ def main_ui():
                     with gr.Column(scale=3):
                         txt_input = gr.Textbox(
-                            placeholder="z.B. ultra realistic portrait of a beautiful woman with detailed skin, perfect eyes, sharp focus, cinematic lighting",
                             lines=3,
                             label="🎯 Prompt (Englisch)",
                             info="Beschreibe detailliert, was du sehen möchtest. Negative Prompts werden automatisch generiert."
@@ -953,4 +1027,4 @@ if __name__ == "__main__":
         show_error=True,
         share=False,
         ssr_mode=False  # SSR deaktivieren für Stabilität
-    )

 from diffusers import DPMSolverMultistepScheduler, PNDMScheduler
 from controlnet_module import controlnet_processor
 import torch
+from PIL import Image, ImageDraw
 import time
 import os
 import tempfile
 import random
+Import re
 # === OPTIMIERTE EINSTELLUNGEN ===
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # === MODELLKONFIGURATION (NUR 2 MODELLE) ===
 MODEL_CONFIGS = {
     "runwayml/stable-diffusion-v1-5": {
+        "name": "🏠 Stable Diffusion 1.5 (Universal)",
         "description": "Universal model, good all-rounder, reliable results",
         "requires_vae": False,
         "recommended_steps": 35,
         "supports_fp16": True
     },
     "SG161222/Realistic_Vision_V6.0_B1_noVAE": {
+        "name": "👤 Realistic Vision V6.0 (Portraits)",
         "description": "Best for photorealistic faces, skin details, human portraits",
         "requires_vae": True,
         "vae_model": "stabilityai/sd-vae-ft-mse",
     }
 }
+# === SAFETENSORS KONFIGURATION ===
 SAFETENSORS_MODELS = ["runwayml/stable-diffusion-v1-5"]
+# Aktuell ausgewähltes Modell (wird vom User gesetzt)
 current_model_id = "runwayml/stable-diffusion-v1-5"
 # === AUTOMATISCHE NEGATIVE PROMPT GENERIERUNG ===
 def auto_negative_prompt(positive_prompt):
+    """Generiert automatisch negative Prompts basierend auf dem positiven Prompt"""
     p = positive_prompt.lower()
     negatives = []
+    # Personen / Portraits
     if any(w in p for w in [
+    "person", "man", "woman", "face", "portrait", "team", "employee",
+    "people", "crowd", "character", "figure", "human", "child", "baby",
+    "girl", "boy", "lady", "gentleman", "fairy", "elf", "dwarf", "santa claus"
+    "mermaid", "angel", "demon", "witch", "wizard", "creature", "being",
+    "model", "actor", "actress", "celebrity", "avatar", "group"]):
         negatives.append(
             "blurry face, lowres face, deformed pupils, bad anatomy, malformed hands, extra fingers, uneven eyes, distorted face, "
             "unrealistic skin, mutated, ugly, disfigured, poorly drawn face, "
             "missing limbs, extra limbs, fused fingers, too many fingers, bad teeth, "
+            "mutated hands, long neck, extra wings, multiple wings,grainy face, noisy face, "
             "compression artifacts, rendering artifacts, digital artifacts, overprocessed face, oversmoothed face "
         )
+    # Business / Corporate
     if any(w in p for w in ["office", "business", "team", "meeting", "corporate", "company", "workplace"]):
+        negatives.append(
+            "overexposed, oversaturated, harsh lighting, watermark, text, logo, brand"
+        )
+    # Produkt / CGI
     if any(w in p for w in ["product", "packshot", "mockup", "render", "3d", "cgi", "packaging"]):
+        negatives.append(
+            "plastic texture, noisy, overly reflective surfaces, watermark, text, low poly"
+        )
+    # Landschaft / Umgebung
     if any(w in p for w in ["landscape", "nature", "mountain", "forest", "outdoor", "beach", "sky"]):
+        negatives.append(
+            "blurry, oversaturated, unnatural colors, distorted horizon, floating objects"
+        )
+    # Logos / Symbole
     if any(w in p for w in ["logo", "symbol", "icon", "typography", "badge", "emblem"]):
+        negatives.append(
+            "watermark, signature, username, text, writing, scribble, messy"
+        )
+    # Architektur / Gebäude
     if any(w in p for w in ["building", "architecture", "house", "interior", "room", "facade"]):
+        negatives.append(
+            "deformed, distorted perspective, floating objects, collapsing structure"
+        )
+    # Basis negative Prompts für alle Fälle
     base_negatives = "low quality, worst quality, blurry, jpeg artifacts, ugly, deformed"
+    if negatives:
+        return base_negatives + ", " + ", ".join(negatives)
+    else:
+        return base_negatives
 # === GESICHTSMASKEN-FUNKTIONEN ===
 def create_face_mask(image, bbox_coords, face_preserve):
+    """Erzeugt eine Gesichtsmaske - WEIßE Bereiche werden VERÄNDERT, SCHWARZE BLEIBEN"""
+    mask = Image.new("L", image.size, 0)  # Start mit komplett schwarzer Maske (alles geschützt)
     if bbox_coords and all(coord is not None for coord in bbox_coords):
         x1, y1, x2, y2 = bbox_coords
         draw = ImageDraw.Draw(mask)
         if face_preserve:
+            # GESICHTSERHALTUNG: Maske um das Gesicht herum zeichnen
+            draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255)  # Alles weiß = verändern
+            draw.rectangle([x1, y1, x2, y2], fill=0)  # Gesicht schwarz = geschützt (rechteckig)
+            print("Gesicht wird GESCHÜTZT - Umgebung wird verändert (rechteckige Maske)")
         else:
+            # NUR GESICHT VERÄNDERN: Nur das Gesicht wird weiß (verändert)
+            draw.rectangle([x1, y1, x2, y2], fill=255)  # Gesicht weiß = verändern (rechteckig)
+            print("Nur Gesicht wird verändert - Umgebung bleibt erhalten (rechteckige Maske)")
     return mask
 def auto_detect_face_area(image):
+    """Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken"""
     width, height = image.size
+    # Größere Bounding Box für bessere Abdeckung (50% statt 40%)
     face_size = min(width, height) * 0.4
+    # Verschiebe y1 nach oben, um Stirn und Kinn besser abzudecken
     x1 = (width - face_size) / 2
+    y1 = (height - face_size) / 4  # Höher positioniert (25% statt 33%)
     x2 = x1 + face_size
+    y2 = y1 + face_size * 1.2  # Leicht länglicher für ovale Gesichter
+    # Stelle sicher, dass Koordinaten innerhalb des Bildes liegen
     x1, y1 = max(0, int(x1)), max(0, int(y1))
     x2, y2 = min(width, int(x2)), min(height, int(y2))
+    print(f"Geschätzte Gesichtskoordinaten: [{x1}, {y1}, {x2}, {y2}]")
     return [x1, y1, x2, y2]
 # === PIPELINES ===
 pipe_img2img = None
 def load_txt2img(model_id):
+    """Lädt das Text-to-Image Modell basierend auf der Auswahl"""
     global pipe_txt2img, current_pipe_model_id
+    # Wenn bereits das richtige Modell geladen ist, nichts tun
     if pipe_txt2img is not None and current_pipe_model_id == model_id:
+        print(f"✅ Modell {model_id} bereits geladen")
         return pipe_txt2img
+    print(f"🔄 Lade Modell: {model_id}")
     config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
+    print(f"📋 Modell-Konfiguration: {config['name']}")
+    print(f"📝 Beschreibung: {config['description']}")
     try:
+        # VAE-Handling basierend auf Modellkonfiguration
         vae = None
         if config.get("requires_vae", False):
+            print(f"🔧 Lade externe VAE: {config['vae_model']}")
+            try:
+                vae = AutoencoderKL.from_pretrained(
+                    config["vae_model"],
+                    torch_dtype=torch_dtype
+                ).to(device)
+                print("✅ VAE erfolgreich geladen")
+            except Exception as vae_error:
+                print(f"⚠️ Fehler beim Laden der VAE: {vae_error}")
+                print("ℹ️ Versuche ohne VAE weiter...")
+                vae = None
+        # Modellparameter basierend auf Modelltyp
         model_params = {
             "torch_dtype": torch_dtype,
             "safety_checker": None,
             "requires_safety_checker": False,
+            "add_watermarker": False,
+            "allow_pickle": True,  # Für .bin Modelle wichtig
         }
+        # SAFETENSORS LOGIK
         if model_id in SAFETENSORS_MODELS:
             model_params["use_safetensors"] = True
+            print(f"ℹ️ Verwende safetensors für {model_id}")
+        else:
+            model_params["use_safetensors"] = False
+            print(f"ℹ️ Verwende .bin weights für {model_id}")
+        # FP16 Variante nur wenn Modell sie unterstützt UND wir auf GPU sind
         if config.get("supports_fp16", False) and torch_dtype == torch.float16:
             model_params["variant"] = "fp16"
+            print("ℹ️ Verwende FP16 Variante")
+        else:
+            print("ℹ️ Verwende Standard Variante (kein FP16)")
+        # VAE nur wenn nicht None
         if vae is not None:
             model_params["vae"] = vae
+        print(f"📥 Lade Hauptmodell von Hugging Face...")
+        pipe_txt2img = StableDiffusionPipeline.from_pretrained(
+            model_id,
+            **model_params
+        ).to(device)
+        # SICHERER SCHEDULER-HANDLING
+        print("⚙️ Konfiguriere Scheduler...")
+        # Prüfe ob Scheduler existiert
+        if pipe_txt2img.scheduler is None:
+            print("⚠️ Scheduler ist None, setze Standard-Scheduler")
+            pipe_txt2img.scheduler = PNDMScheduler.from_pretrained(
+                model_id,
+                subfolder="scheduler"
+            )
+        # Versuche DPM-Solver zu verwenden (bessere Ergebnisse)
         try:
+            # Hole die Scheduler-Konfiguration
+            if hasattr(pipe_txt2img.scheduler, 'config'):
+                scheduler_config = pipe_txt2img.scheduler.config
+            else:
+                # Fallback-Konfiguration für Scheduler
+                scheduler_config = {
+                    "beta_start": 0.00085,
+                    "beta_end": 0.012,
+                    "beta_schedule": "scaled_linear",
+                    "num_train_timesteps": 1000,
+                    "prediction_type": "epsilon",
+                    "steps_offset": 1
+                }
+                print("⚠️ Keine Scheduler-Konfig gefunden, verwende Standard")
+            # Setze DPM-Solver Scheduler
             pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
+                scheduler_config,
                 use_karras_sigmas=True,
                 algorithm_type="sde-dpmsolver++"
             )
+            print("✅ DPM-Solver Multistep Scheduler konfiguriert")
+        except Exception as scheduler_error:
+            print(f"⚠️ Konnte DPM-Scheduler nicht setzen: {scheduler_error}")
+            print("ℹ️ Verwende Standard-Scheduler weiter")
+        # Optimierungen
+        pipe_txt2img.enable_attention_slicing()
+        print("✅ Attention Slicing aktiviert")
+        # VAE Slicing nur wenn VAE existiert
+        if hasattr(pipe_txt2img, 'vae') and pipe_txt2img.vae is not None:
+            try:
+                pipe_txt2img.enable_vae_slicing()
+                if hasattr(pipe_txt2img.vae, 'enable_slicing'):
+                    pipe_txt2img.vae.enable_slicing()
+                print("✅ VAE Slicing aktiviert")
+            except Exception as vae_slice_error:
+                print(f"⚠️ VAE Slicing nicht möglich: {vae_slice_error}")
         current_pipe_model_id = model_id
+        print(f"✅ {config['name']} erfolgreich geladen")
+        print(f"📊 Modell-Dtype: {pipe_txt2img.dtype}")
+        print(f"📊 Scheduler: {type(pipe_txt2img.scheduler).__name__}")
+        print(f"⚙️ Empfohlene Einstellungen: Steps={config['recommended_steps']}, CFG={config['recommended_cfg']}")
         return pipe_txt2img
     except Exception as e:
+        print(f"❌ Fehler beim Laden von {model_id}: {str(e)[:200]}...")
+        import traceback
+        traceback.print_exc()
+        print("🔄 Fallback auf SD 1.5...")
+        # Fallback auf Standard SD 1.5
+        try:
+            pipe_txt2img = StableDiffusionPipeline.from_pretrained(
+                "runwayml/stable-diffusion-v1-5",
+                torch_dtype=torch_dtype,
+                use_safetensors=True,
+            ).to(device)
+            pipe_txt2img.enable_attention_slicing()
+            current_pipe_model_id = "runwayml/stable-diffusion-v1-5"
+            print("✅ Fallback auf SD 1.5 erfolgreich")
+            return pipe_txt2img
+        except Exception as fallback_error:
+            print(f"❌ Auch Fallback fehlgeschlagen: {fallback_error}")
+            raise
 def load_img2img():
     global pipe_img2img
     if pipe_img2img is None:
+        print("Loading Inpainting model...")
+        try:
+            pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained(
+                "runwayml/stable-diffusion-inpainting",
+                torch_dtype=torch_dtype,
+                allow_pickle=False,
+                safety_checker=None,
+            ).to(device)
+        except Exception as e:
+            print(f"Fehler beim Laden des Inpainting-Modells: {e}")
+            raise
+        from diffusers import DPMSolverMultistepScheduler
+        pipe_img2img.scheduler = DPMSolverMultistepScheduler.from_config(
+            pipe_img2img.scheduler.config,
+            algorithm_type="sde-dpmsolver++",
+            use_karras_sigmas=True,
+            timestep_spacing="trailing"
+        )
         pipe_img2img.enable_attention_slicing()
         pipe_img2img.enable_vae_tiling()
+        if hasattr(pipe_img2img, 'vae_slicing'):
+            pipe_img2img.vae_slicing = True
     return pipe_img2img
+# === NEUE CALLBACK-FUNKTIONEN FÜR FORTSCHRITT ===
 class TextToImageProgressCallback:
     def __init__(self, progress, total_steps):
         self.progress = progress
         self.total_steps = total_steps
+        self.current_step = 0
     def __call__(self, pipe, step, timestep, callback_kwargs):
+        self.current_step = step + 1
+        progress_percent = (step / self.total_steps) * 100
+        self.progress(progress_percent / 100, desc="Generierung läuft...")
         return callback_kwargs
 class ImageToImageProgressCallback:
     def __init__(self, progress, total_steps, strength):
         self.progress = progress
         self.total_steps = total_steps
+        self.current_step = 0
         self.strength = strength
+        self.actual_total_steps = None
     def __call__(self, pipe, step, timestep, callback_kwargs):
+        self.current_step = step + 1
+        if self.actual_total_steps is None:
+            if self.strength < 1.0:
+                self.actual_total_steps = int(self.total_steps * self.strength)
+            else:
+                self.actual_total_steps = self.total_steps
+            print(f"🎯 INTERNE STEP-AUSGABE: Strength {self.strength} → {self.actual_total_steps} tatsächliche Denoising-Schritte")
+        progress_percent = (step / self.actual_total_steps) * 100
+        self.progress(progress_percent / 100, desc="Generierung läuft...")
         return callback_kwargs
+# === NEUE FUNKTIONEN FÜR DIE FEATURES ===
+def create_preview_image(image, bbox_coords, face_preserve, mode_color):
+    """Erstellt eine Vorschau mit farbigem Rahmen basierend auf dem Modus"""
     if image is None:
+        return None
+    preview = image.copy()
+    draw = ImageDraw.Draw(preview)
+    if mode_color == "red":
+        border_color = (255, 0, 0, 180)
+        mode_text = "NUR BILDELEMENT VERÄNDERN"
+    else:
+        border_color = (0, 255, 0, 180)
+        mode_text = "BILDELEMENT BEIBEHALTEN"
+    border_width = 8
+    draw.rectangle([0, 0, preview.width-1, preview.height-1],
+                  outline=border_color, width=border_width)
+    if bbox_coords and all(coord is not None for coord in bbox_coords):
+        x1, y1, x2, y2 = bbox_coords
+        box_color = (255, 255, 0, 200)
+        draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3)
+        text_color = (255, 255, 255)
+        bg_color = (0, 0, 0, 160)
+        text_bbox = draw.textbbox((x1, y1 - 25), mode_text)
+        draw.rectangle([text_bbox[0]-5, text_bbox[1]-2, text_bbox[2]+5, text_bbox[3]+2],
+                      fill=bg_color)
+        draw.text((x1, y1 - 25), mode_text, fill=text_color)
+    return preview
+def update_live_preview(image, bbox_x1, bbox_y1, bbox_x2, bbox_y2, face_preserve):
+    """Aktualisiert die Live-Vorschau bei Koordinaten-Änderungen"""
     if image is None:
         return None
+    bbox_coords = [bbox_x1, bbox_y1, bbox_x2, bbox_y2]
+    mode_color = "green" if face_preserve else "red"
+    return create_preview_image(image, bbox_coords, face_preserve, mode_color)
+def process_image_upload(image):
+    """Verarbeitet Bild-Upload und gibt Bild + Koordinaten zurück"""
+    if image is None:
+        return None, None, None, None, None
+    if image.size != (512, 512):
+        image = image.resize((512, 512), Image.LANCZOS)
+        print(f"Bild auf 512x512 skaliert")
+    bbox = auto_detect_face_area(image)
+    bbox_x1, bbox_y1, bbox_x2, bbox_y2 = bbox
+    preview = create_preview_image(image, bbox, True, "green")
+    return preview, bbox_x1, bbox_y1, bbox_x2, bbox_y2
+# === HAUPTFUNKTIONEN ===
 def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()):
     try:
         if not prompt or not prompt.strip():
             return None, "Bitte einen Prompt eingeben"
+        print(f"🚀 Starte Generierung mit Modell: {model_id}")
+        print(f"📝 Prompt: {prompt}")
+        # Automatische negative Prompts generieren
         auto_negatives = auto_negative_prompt(prompt)
+        print(f"🤖 Automatisch generierte Negative Prompts: {auto_negatives}")
         start_time = time.time()
+        # Liste von Qualitätswörtern/Gewichten, die auf Benutzereingaben prüfen
+        quality_keywords = ['masterpiece', 'best quality', 'high quality', 'highly detailed',
+                           'exquisite', 'detailed', 'ultra detailed', 'professional',
+                           'perfect', 'excellent', 'amazing', 'stunning', 'beautiful']
+        # Prüfe, ob der Benutzer bereits Qualitätswörter/Gewichte verwendet hat
+        user_has_quality_words = False
+        # Konvertiere Prompt zu Kleinbuchstaben für die Prüfung
+        prompt_lower = prompt.lower()
+        # Prüfe auf einfache Qualitätswörter
+        for keyword in quality_keywords:
+            if keyword in prompt_lower:
+                user_has_quality_words = True
+                print(f"✓ Benutzer verwendet bereits Qualitätswort: {keyword}")
+                break
+        # Prüfe auf Gewichte (z.B. (word:1.5), [word], etc.)
+        weight_patterns = [r'\([^)]+:\d+(\.\d+)?\)', r'\[[^\]]+\]']
+        for pattern in weight_patterns:
+            if re.search(pattern, prompt):
+                user_has_quality_words = True
+                print("✓ Benutzer verwendet bereits Gewichte im Prompt")
+                break
+        # Prompt basierend auf Prüfung anpassen
+        if not user_has_quality_words:
+            enhanced_prompt = f"masterpiece, raw, best quality, highly detailed, {prompt}"
+            print(f"🔄 Verbesserter Prompt: {enhanced_prompt}")
+        else:
+            enhanced_prompt = prompt
+            print("✓ Benutzerprompt wird unverändert verwendet")
+        print(f"Finaler Prompt für Generation: {enhanced_prompt}")
         progress(0, desc="Lade Modell...")
         pipe = load_txt2img(model_id)
         seed = random.randint(0, 2**32 - 1)
         generator = torch.Generator(device=device).manual_seed(seed)
+        print(f"🌱 Seed: {seed}")
+        callback = TextToImageProgressCallback(progress, steps)
+        print(f"⚙️ Einstellungen: Steps={steps}, CFG={guidance_scale}")
         image = pipe(
             prompt=enhanced_prompt,
             negative_prompt=auto_negatives,
+            height=512,
+            width=512,
             num_inference_steps=int(steps),
             guidance_scale=guidance_scale,
             generator=generator,
+            callback_on_step_end=callback,
             callback_on_step_end_tensor_inputs=[],
         ).images[0]
+        end_time = time.time()
+        duration = end_time - start_time
+        print(f"✅ Bild generiert in {duration:.2f} Sekunden")
+        config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
+        status_msg = f"✅ Generiert mit {config['name']} in {duration:.1f}s"
         return image, status_msg
     except Exception as e:
+        error_msg = f"❌ Fehler: {str(e)}"
         print(f"❌ Fehler in text_to_image: {e}")
         import traceback
         traceback.print_exc()
+        return None, error_msg
 def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
                  face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
         print(f"Negativ-Prompt: {neg_prompt}")
         print(f"Gesicht beibehalten: {face_preserve}")
+                 # ===== NEU: AUTOMATISCHEN NEGATIV-PROMPT GENERIEREN =====
         auto_negatives = auto_negative_prompt(prompt)
         print(f"🤖 Automatisch generierter Negativ-Prompt: {auto_negatives}")
+        # ===== NEU: KOMBINIERE MANUELLEN UND AUTOMATISCHEN PROMPT =====
         combined_negative_prompt = ""
         if neg_prompt and neg_prompt.strip():
             print(f"👤 Benutzer Negativ-Prompt: {user_neg}")
             # Entferne Duplikate zwischen automatischen und manuellen Prompts
+            # Konvertiere beide in Sets für einfachen Duplikatvergleich
             user_words = [word.strip().lower() for word in user_neg.split(",")]
             auto_words = [word.strip().lower() for word in auto_negatives.split(",")]
                 if auto_word and auto_word not in user_words:
                     combined_words.append(auto_word)
+            # Zusammenfügen und Duplikate entfernen (für den Fall von Duplikaten innerhalb des gleichen Prompts)
             unique_words = []
             seen_words = set()
             for word in combined_words:
             print(f"ℹ️ Kein manueller Negativ-Prompt, verwende nur automatischen: {combined_negative_prompt}")
         print(f"✅ Finaler kombinierter Negativ-Prompt: {combined_negative_prompt}")
+        # ===== ENDE DER NEUEN LOGIK =====
         progress(0, desc="Starte Generierung mit ControlNet...")
         print(f"🕒 Dauer: {end_time - start_time:.2f} Sekunden")
         generated_image = result.images[0]
         return generated_image
     except Exception as e:
         traceback.print_exc()
         return None
+def update_bbox_from_image(image):
+    """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
+    if image is None:
+        return None, None, None, None
+    bbox = auto_detect_face_area(image)
+    return bbox[0], bbox[1], bbox[2], bbox[3]
+def update_model_settings(model_id):
+    """Aktualisiert die empfohlenen Einstellungen basierend auf Modellauswahl"""
+    config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
+    return (
+        config["recommended_steps"],  # steps
+        config["recommended_cfg"],    # guidance_scale
+        f"📊 Empfohlene Einstellungen: {config['steps']} Steps, CFG {config['cfg']}"
+    )
 def main_ui():
     with gr.Blocks(
         title="AI Image Generator",
             color: #721c24;
             border: 1px solid #f5c6cb;
         }
         """
     ) as demo:
             with gr.Tab("Text zu Bild"):
                 gr.Markdown("## 🎨 Text zu Bild Generator")
                 with gr.Row():
                     with gr.Column(scale=2):
                         # Modellauswahl Dropdown (NUR 2 MODELLE)
                     with gr.Column(scale=3):
                         txt_input = gr.Textbox(
+                            placeholder="z.B. ultra realistic mountain landscape at sunrise, soft mist over the valley, detailed foliage, crisp textures, depth of field, sunlight rays through clouds, shot on medium format camera, 8k, HDR, hyper-detailed, natural lighting, masterpiece",
                             lines=3,
                             label="🎯 Prompt (Englisch)",
                             info="Beschreibe detailliert, was du sehen möchtest. Negative Prompts werden automatisch generiert."
         show_error=True,
         share=False,
         ssr_mode=False  # SSR deaktivieren für Stabilität
+    )