import gradio as gr
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
from diffusers import StableDiffusionInpaintPipeline, AutoencoderKL
from diffusers import DPMSolverMultistepScheduler, PNDMScheduler
from controlnet_module import controlnet_processor
from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel
import torch
from PIL import Image, ImageDraw
import time
import os
import tempfile
import random
import re
from PIL import ImageFilter  # Für GaussianBlur wird nur für SAM benötigt!
import numpy as np 


# === OPTIMIERTE EINSTELLUNGEN ===
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32
IMG_SIZE = 512
MAX_IMAGE_SIZE = 4096  # Maximale Bildgröße für Verarbeitung

print(f"Running on: {device}")

# === MODELLKONFIGURATION (NUR 2 MODELLE) ===
MODEL_CONFIGS = {
    "runwayml/stable-diffusion-v1-5": {
        "name": "🏠 Stable Diffusion 1.5 (Universal)",
        "description": "Universal model, good all-rounder, reliable results",
        "requires_vae": False,
        "vae_model": "stabilityai/sd-vae-ft-mse",
        "recommended_steps": 35,
        "recommended_cfg": 7.5,
        "supports_fp16": True
    },
    "SG161222/Realistic_Vision_V6.0_B1_noVAE": {
        "name": "👤 Realistic Vision V6.0 (Portraits)",
        "description": "Best for photorealistic faces, skin details, human portraits",
        "requires_vae": True,
        "vae_model": "stabilityai/sd-vae-ft-mse",
        "recommended_steps": 40,
        "recommended_cfg": 7.0,
        "supports_fp16": False
    }
}

# === SAFETENSORS KONFIGURATION ===
SAFETENSORS_MODELS = ["runwayml/stable-diffusion-v1-5"]

# Aktuell ausgewähltes Modell (wird vom User gesetzt)
current_model_id = "runwayml/stable-diffusion-v1-5"

# === AUTOMATISCHE NEGATIVE PROMPT GENERIERUNG ===
def auto_negative_prompt(positive_prompt):
    """Generiert automatisch negative Prompts basierend auf dem positiven Prompt"""
    p = positive_prompt.lower()
    negatives = []

    # Personen / Portraits
    if any(w in p for w in [
    "person", "man", "woman", "face", "portrait", "team", "employee", 
    "people", "crowd", "character", "figure", "human", "child", "baby",
    "girl", "boy", "lady", "gentleman", "fairy", "elf", "dwarf", "santa claus",
    "mermaid", "angel", "demon", "witch", "wizard", "creature", "being",
    "model", "actor", "actress", "celebrity", "avatar", "group"]):
        negatives.append(
            "blurry face, lowres face, deformed pupils, bad anatomy, malformed hands, extra fingers, uneven eyes, distorted face, "
            "unrealisticy skin, mutated, ugly, disfigured, poorly drawn face, "
            "missing limbs, extra limbs, fused fingers, too many fingers, bad teeth, "
            "mutated hands, long neck, extra wings, multiple wings,grainy face, noisy face, "
            "compression artifacts, rendering artifacts, digital artifacts, overprocessed face, oversmoothed face "
        )
  
    # Business / Corporate
    if any(w in p for w in ["office", "business", "team", "meeting", "corporate", "company", "workplace"]):
        negatives.append(
            "overexposed, oversaturated, harsh lighting, watermark, text, logo, brand"
        )

    # Produkt / CGI
    if any(w in p for w in ["product", "packshot", "mockup", "render", "3d", "cgi", "packaging"]):
        negatives.append(
            "plastic texture, noisy, overly reflective surfaces, watermark, text, low poly"
        )

    # Landschaft / Umgebung
    if any(w in p for w in ["landscape", "nature", "mountain", "forest", "outdoor", "beach", "sky"]):
        negatives.append(
            "blurry, oversaturated, unnatural colors, distorted horizon, floating objects"
        )

    # Logos / Symbole
    if any(w in p for w in ["logo", "symbol", "icon", "typography", "badge", "emblem"]):
        negatives.append(
            "watermark, signature, username, text, writing, scribble, messy"
        )

    # Architektur / Gebäude
    if any(w in p for w in ["building", "architecture", "house", "interior", "room", "facade"]):
        negatives.append(
            "deformed, distorted perspective, floating objects, collapsing structure"
        )

    # Basis negative Prompts für alle Fälle
    base_negatives = "low quality, worst quality, blurry, jpeg artifacts, ugly, deformed"
    
    if negatives:
        return base_negatives + ", " + ", ".join(negatives)
    else:
        return base_negatives

# === HILFSFUNKTION: KOORDINATEN SORTIEREN ===
def sort_coordinates(x1, y1, x2, y2):
    """Sortiert Koordinaten, so dass x1 <= x2 und y1 <= y2"""
    sorted_x1 = min(x1, x2)
    sorted_x2 = max(x1, x2)
    sorted_y1 = min(y1, y2)
    sorted_y2 = max(y1, y2)
    return sorted_x1, sorted_y1, sorted_x2, sorted_y2

# === GESICHTSMASKEN-FUNKTIONEN (ERWEITERT FÜR 3 MODI) ===
def create_face_mask(image, bbox_coords, mode):
    """
    ERWEITERTE FUNKTION: Erzeugt Maske basierend auf 3 Modi
    Weiße Bereiche werden VERÄNDERT, Schwarze bleiben ERHALTEN
    
    Parameter:
    - image: PIL Image
    - bbox_coords: [x1, y1, x2, y2]
    - mode: "environment_change", "focus_change", "face_only_change"
    
    Returns:
    - PIL Image (L-Modus, 0=schwarz=erhalten, 255=weiß=verändern)
    """
    mask = Image.new("L", image.size, 0)  # Start mit komplett schwarzer Maske (alles geschützt)
    
    if bbox_coords and all(coord is not None for coord in bbox_coords):
        # Sortiere Koordinaten
        x1, y1, x2, y2 = sort_coordinates(*bbox_coords)
        
        # Stelle sicher, dass Koordinaten innerhalb des Bildes liegen
        x1 = max(0, min(x1, image.width-1))
        y1 = max(0, min(y1, image.height-1))
        x2 = max(0, min(x2, image.width-1))
        y2 = max(0, min(y2, image.height-1))
        
        draw = ImageDraw.Draw(mask)
        
        if mode == "environment_change":
            # MODUS 1: Umgebung ändern (Depth + Canny)
            # Maske: Alles weiß AUSSER Bereich (schwarz)
            draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255)  # Alles weiß = verändern
            draw.rectangle([x1, y1, x2, y2], fill=0)  # Bereich schwarz = geschützt (rechteckig)
            print(f"🎯 MODUS: Umgebung ändern - Alles außer BBox wird verändert (BBox: {x1},{y1},{x2},{y2})")
            
        elif mode == "focus_change":
            # MODUS 2: Focus verändern (OpenPose + Canny)
            # Maske: Nur innerhalb der Box weiß (Rest schwarz)
            draw.rectangle([x1, y1, x2, y2], fill=255)  # Nur Box weiß = verändern
            print(f"🎯 MODUS: Focus verändern - Nur innerhalb der BBox wird verändert (BBox: {x1},{y1},{x2},{y2})")
            
        elif mode == "face_only_change":
            # MODUS 3: Ausschließlich Gesicht (Depth + Canny)
            # Maske: Nur innerhalb der Box weiß (Rest schwarz) - wie focus_change
            draw.rectangle([x1, y1, x2, y2], fill=255)  # Nur Box weiß = verändern
            print(f"🎯 MODUS: Ausschließlich Gesicht - Nur innerhalb der BBox wird verändert (BBox: {x1},{y1},{x2},{y2})")
    
    return mask

# === KORREKTE GEMEINSAME PROPORTIONALE SKALIERUNG MIT PADDING ===
    """
    SKALIERT BILD UND MASKE GEMEINSAM MIT GLEICHEN PROPORTIONEN (MIT PADDING)
    Behält das Seitenverhältnis bei und fügt ggf. Padding hinzu
    
    Parameter:
    - image: PIL Image (RGB)
    - mask: PIL Image (L-Modus, Maske)
    - target_size: Zielgröße (Standard 512)
    
    Returns:
    - padded_image: skaliertes Bild mit Padding (RGB)
    - padded_mask: skalierte Maske mit Padding (L)
    - padding_info: Dictionary mit Skalierungsinfo für späteres Compositing   
    """

# Herunterskalierung von Bild und BBox/SAM-Maske und SAM-Maske-Binär auf 512x512 für ControlnetInpaint-Pipeline
def scale_image_and_mask_together(image, mask_inpaint, mask_composite, target_size=512, bbox_coords=None, mode=None):
    if image is None or mask_inpaint is None or mask_composite is None:
        raise ValueError("Bild oder Maske ist None")

    if image.size != mask_inpaint.size or image.size != mask_composite.size:
        raise ValueError("Bild und Masken haben unterschiedliche Größen: {image.size} vs {mask_inpaint.size}") #Stoppt Programm sofort mit Fehlermeldung!
  

    #Variablen für Bildmaße
    original_width, original_height = image.size
    
    # Bestimme Skalierungsfaktor (längere Seite auf target_size)
    scale = target_size / max(original_width, original_height)
    
    new_width = int(original_width * scale)
    new_height = int(original_height * scale)
    
    print(f"📐 Gemeinsame Skalierung: {original_width}x{original_height} → {new_width}x{new_height} (Skalierung: {scale:.4f})")
    
    # Skaliere Bild und Maske getrennt voneinander aber proportional
    scaled_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
    scaled_mask_inpaint = mask_inpaint.resize((new_width, new_height), Image.Resampling.NEAREST)
    scaled_mask_composite = mask_composite.resize((new_width, new_height), Image.Resampling.NEAREST)
    
    # Auf Zielgröße padden (zentriert)
    #Image.new("RGB", (target_size, target_size), (0, 0, 0)) erstellt ein neues, leeres, schwarzes Bild in der Ziel-Verarbeitungsgröße des Modells (512×512 für SD 1.5 oder 1024×1024 für SDXL)
    # in das später das Bild eingefügt wird
    padded_image = Image.new("RGB", (target_size, target_size), (0, 0, 0))
    #Damit wird ein 512x512 Graustufenbild erstellt in das später die BBox eingefügt wird
    padded_mask_inpaint = Image.new("L", (target_size, target_size), 0)
    padded_mask_composite = Image.new("L", (target_size, target_size), 0)
    
    # Zentrierte Position berechnen 
    # das ist der Padding-Bereich bei nicht quadratischen 512x512 Bildern damit daraus 512x512-Bilder werden
    x_offset = (target_size - new_width) // 2
    y_offset = (target_size - new_height) // 2
    
    # mit Hilfe der Offsets kann das skalierte Bild mittig in das RGB-Schwarzbild eingefügt werden. Dadurch ergibt sich 
    # indirekt der Padding-Bereich.
    padded_image.paste(scaled_image, (x_offset, y_offset))
    # mit Hilfe der Offsets wird nun die herunterskalierte BBox (entweder als Rechteck oder als SAM-Maske)
    # in das Graustufenbild eingefügt. Das Padding ergibt sich aus dem Graustufenbild!
    padded_mask_inpaint.paste(scaled_mask_inpaint, (x_offset, y_offset))
    padded_mask_composite.paste(scaled_mask_composite, (x_offset, y_offset))

    # hiermit wird die (transformierte BBox)= skalierte BBox + Padding berechnet.
    scaled_bbox = None
    if bbox_coords and all(c is not None for c in bbox_coords):
        x1, y1, x2, y2 = bbox_coords
        scaled_bbox = (
            int(x1 * scale) + x_offset,    # Einmalige, konsistente Berechnung
            int(y1 * scale) + y_offset,
            int(x2 * scale) + x_offset,
            int(y2 * scale) + y_offset
        )
        print(f"📐 Skalierte BBox gespeichert: {scaled_bbox} (von {bbox_coords})")
    
    
    # WICHTIG: Speichere alle Informationen für späteres Compositing
    padding_info = {
        'x_offset': x_offset,
        'y_offset': y_offset,
        'scaled_width': new_width,
        'scaled_height': new_height,
        'original_width': original_width,
        'original_height': original_height,
        'scale_factor': scale,
        'target_size': target_size,
        'original_bbox': bbox_coords,
        'scaled_bbox': scaled_bbox,
        'mode': mode
    }
    
    print(f"📦 Padding hinzugefügt: Offsets ({x_offset}, {y_offset})")
    print(f"BBox gespeicher: {bbox_coords}, Modus:{mode}")
    print(f"✅ 1 Bild + 2 Masken skaliert. Inpaint-Maske binär: {np.unique(np.array(padded_mask_inpaint))}")
    
    return padded_image, padded_mask_inpaint, padded_mask_composite, padding_info


# Composition Workflow nach Ausgabe ControlnetInpaint-Pipeline
def enhanced_composite_with_sam(original_image, inpaint_result, original_mask, 
                                padding_info, bbox_coords, mode):
    """
    COMPOSITING MIT SAM-MASKEN UND BBox-KOORDINATEN
    Berücksichtigt die präzisen Kanten der SAM-Maske
    """
    print(f"🎨 Verbessertes Compositing für Modus: {mode}")
    
    # Extrahiere Padding-Info
    x_offset = padding_info['x_offset']
    y_offset = padding_info['y_offset']
    scaled_width = padding_info['scaled_width']
    scaled_height = padding_info['scaled_height']
    scale_factor = padding_info['scale_factor']
    original_width = padding_info['original_width']
    original_height = padding_info['original_height']
    
    # ==============================================
    # FALL 1: Bild war bereits 512×512 (keine Skalierung)
    # ==============================================
    if scale_factor == 1.0 and x_offset == 0 and y_offset == 0:
        print(f"✅ FALL 1: Bild 512×512 - kein Compositing nötig")
        return inpaint_result
    
    # ==============================================
    # FALL 2 & 3: Bild wurde skaliert
    # ==============================================
    print(f"🔄 FALL 2/3: Bild skaliert - Compositing mit SAM-Maske")
    
    # 1. PADDING ENTFERNEN von 512×512 Inpaint-Ergebnis
    downscaled_result = inpaint_result.crop(
        (x_offset, y_offset, x_offset + scaled_width, y_offset + scaled_height)
    )
    
    # 2. AUF ORIGINALGRÖßE SKALIEREN
    final_image = original_image.copy()
    
    if mode == "environment_change":
        # ==============================================
        # MODUS: UMWELT ÄNDERN (Objekt bleibt original)
        # In dem Fall muß die BBox nicht berücksichtigt werden da Originalbild ausgeschnitten wird
        # anhand der SAM-Maske
        # ==============================================
        print("🌳 Modus: Umwelt ändern mit SAM-Maske")
        
        # Gesamtes bearbeitetes Bild (Ergebnis-Inpaint) hochskalieren
        new_background = downscaled_result.resize(
            (original_width, original_height), 
            Image.Resampling.LANCZOS
        )
        
        # Originalbild wird kopiert und mit transparenter Folie überzogen (.convert)
        # In der Fachsprache heißt das: ein Alpha-Kanal hinzugefügt.
        # Diese Folie wird an den Stellen ausgestanzt an denen die Maske schwarz ist.
        original_with_alpha = original_image.copy().convert("RGBA")
        
        # Invertierte Maske (BBox, SAM-Maske=original_mask) kommt von SAM zurück!
        # Invertierung nötig weil für Alpha-Kanal die Logik andersherum ist. schwarz-weg, weiß-behalten
        mask_inverted = Image.eval(original_mask, lambda x: 255 - x)
        
        # Weiche Kanten für natürlichen Übergang, damit werden 1,5 Pixel von Person grau
        # und 1,5 Pixel von Umgebung. Effektiv können damit 6-8 Pixel sanft überbrückt werden. 
        # Gehen graue Pixel nach Inpaint ist das ja ein unsichere Bereich. Inpaint kann Geisterobjekte (halbe Pferde) erzeugen!
        soft_mask = mask_inverted.filter(ImageFilter.GaussianBlur(3))

        # putalpha stanzt Löcher in die Folie des Originalbildes an denen das Bild weg muß (schwarz), 
        # läßt Folie ganz da wo weiß (bleibt) und markiert grau für Anpassung. Person bleibt!
        original_with_alpha.putalpha(soft_mask)
        
        # Compositing
        # Hiermit kommt eine Folie über das neu generierte Bild und wird kopiert
        final_image = new_background.copy().convert("RGBA")

        # Durch das Einfügen wird die zu erhaltende Person in das neu generierte Bild eingefügt
        final_image.paste(original_with_alpha, (0, 0), original_with_alpha)
        
    else:
        # ==============================================
        # MODUS: FOCUS oder GESICHT ÄNDERN
        # Hier muß die BBox berücksichtigt werden da generiertes Bild ausgeschnitten wird
        # ohne die BBox wird entlang der SAM-Maske geschnitten -> ungenau!
        # ==============================================
        mode_name = "Focus" if mode == "focus_change" else "Gesicht"
        print(f"👤 Modus: {mode_name} ändern mit SAM-Maske")
        
        if not bbox_coords or not all(c is not None for c in bbox_coords):
            # Keine BBox: gesamtes Bild zurückgeben
            final_image = downscaled_result.resize(
                (original_width, original_height), 
                Image.Resampling.LANCZOS
            )
            return final_image.convert("RGB")


        # Verwende gespeicherte BBox aus scaled_image_and_mask_together()
        if 'scaled_bbox' in padding_info and padding_info['scaled_bbox'] is not None:
            bbox_in_512 = padding_info['scaled_bbox']  # ← WICHTIG: Verwende die gespeicherte skalierte BBox
            print(f"✅ Verwende gespeicherte BBox: {bbox_in_512}")
        else:
            #BBox-Koordinaten korrekt transformieren 
            #Die BBox-Koordinaten müssen vom Originalbild nach 512x512 transformiert werden
            bbox_scaled = (
                int(bbox_coords[0] * scale_factor),
                int(bbox_coords[1] * scale_factor),
                int(bbox_coords[2] * scale_factor),
                int(bbox_coords[3] * scale_factor)         
            )
            #Mit den Padding-Offsets wird bei nicht quadratischen 512x512 Bildern das Padding hinzugefügt
            bbox_in_512 = (
                bbox_scaled[0] + x_offset,
                bbox_scaled[1] + y_offset,
                bbox_scaled[2] + x_offset,
                bbox_scaled[3] + y_offset
            )

            
        print(f"🔍 [COMPOSIT] Original-BBox: {bbox_coords}")
        print(f"🔍 [COMPOSIT] Scale/Offset: {scale_factor}, ({x_offset},{y_offset})")
        print(f"🔍 [COMPOSIT] BBox in 512: {bbox_in_512}")
        print(f"🔍 [COMPOSIT] Inpaint Size: {inpaint_result.size}")
        

        # Die BBox-Koordinaten sind durch 2 Punkte gegeben: oben links (x,y)-unten rechts (x,y)
        # Prüfung: hat BBox gültige Koordinaten
        if bbox_in_512[2] > bbox_in_512[0] and bbox_in_512[3] > bbox_in_512[1]:
            # Bearbeiteten Bereich aus dem 512×512-Ergebnis ausschneiden in Größe der 512x512-skalierten BBox
            edited_region = inpaint_result.crop(bbox_in_512)

            print(f"🔍 [CROP] Ausgeschnitten: {edited_region.size}")    

            
            # Damit wird der 512er BBox-Inhalt auf Originalgröße-BBox hochskaliert 
            original_bbox_size = (bbox_coords[2] - bbox_coords[0], 
                                  bbox_coords[3] - bbox_coords[1])
            edited_region_fullsize = edited_region.resize(
                original_bbox_size, 
                Image.Resampling.LANCZOS
            )

            print(f"🔍 [RESIZE] Original-BBox-Size: {original_bbox_size}")
            print(f"🔍 [RESIZE] Hochskaliert auf: {edited_region_fullsize.size}")
            
            # SAM-Maske= original_mask in Originalgröße (also Smartphone: 4032x3024). Aus dieser Maske muß nun der
            # Original BBox-Bereich ausgeschnitten werden und
            mask_cropped = original_mask.crop(bbox_coords)
            print(f"🔍 [MASK] Mask-Crop Size: {mask_cropped.size}")
            
            # der Randbereich des BBox-Ausschnittes muß für Übergänge weich gezeichnet werden
            soft_mask = mask_cropped.filter(ImageFilter.GaussianBlur(3))
            
            # Alpha-Compositing mit präziser SAM-Maske
            # damit wird auf den neu generirten BBox-Bereich in Originalgröße eine Folie gezogen
            edited_rgba = edited_region_fullsize.convert("RGBA")
            # Dadurch werden in die Folie der weichen SAM-Maske wieder an den Stellen schwarze/transparente Löcher
            # gerissen wo der Hintergrund innerhalb der BBox bleiben muß!
            mask_inverted = Image.eval(soft_mask, lambda x: 255 - x) #invertieren
            mask_rgba = mask_inverted.convert("L") # SAM-Maske als Alpha-Kanal also als Löcherfolie

            print(f"🔍 Alpha-Maske Werte: min={np.array(mask_rgba).min()}, max={np.array(mask_rgba).max()}")
            print(f"🔍 Generierte Person Alpha: {edited_rgba.getchannel('A').getextrema()}")

            # generiere hiermit ein neues transparantes Bild in original BBox-Größe (unsichtbare Trägerfolie)
            temp_image = Image.new("RGBA", original_bbox_size, (0, 0, 0, 0))
            # darauf klebe ich die neu generierte Person edited_rgba und SAM-Maske als Löcher-Folie-mask_rgba
            temp_image.paste(edited_rgba, (0, 0), mask_rgba)

            # hiermit hole ich mir den Hintergrund außerhalb der BBox zurück!
            final_image.paste(temp_image, (bbox_coords[0], bbox_coords[1]), temp_image)

    # Debug-Info
    print(f"🔍 DEBUG COMPOSITING:")
    print(f"   Original BBox: {bbox_coords}")
    print(f"   Scale Factor: {scale_factor}")
    print(f"   Offsets: ({x_offset}, {y_offset})")
    print(f"   Inpaint Size: {inpaint_result.size}")  
    print(f"✅ Korrektes Compositing abgeschlossen. Finale Größe: {final_image.size}")
    
    return final_image.convert("RGB")


def auto_detect_face_area(image):
    """Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken"""
    width, height = image.size
    face_size = min(width, height) * 0.4
    x1 = (width - face_size) / 2
    y1 = (height - face_size) / 4
    x2 = x1 + face_size
    y2 = y1 + face_size * 1.2
    
    # Sortiere Koordinaten und stelle sicher, dass sie innerhalb des Bildes liegen
    x1 = max(0, int(min(x1, x2)))
    y1 = max(0, int(min(y1, y2)))
    x2 = min(width, int(max(x1, x2)))
    y2 = min(height, int(max(y1, y2)))
    
    print(f"Geschätzte Gesichtskoordinaten: [{x1}, {y1}, {x2}, {y2}] (Bild: {width}x{height})")
    return [x1, y1, x2, y2]

# === PIPELINES ===
pipe_txt2img = None
current_pipe_model_id = None
pipe_img2img = None
pipe_img2img_pose = None
pipe_img2img_depth = None


#Das Laden des Modells bedeutet, die trainierten Gewichte (Parameter) von der Festplatte zu lesen und 
#im Arbeitsspeicher (RAM) und idealerweise im Grafikspeicher (VRAM) zu halten, damit sie für Berechnungen schnell verfügbar sind.
#die Funktion load_txt2img() verwaltet zwei separate Pipeline-Instanzen (für SD 1.5 und Realistic Vision) und gibt je nach 
#Modellauswahl (model_id) die entsprechende Instanz zurück.
def load_txt2img(model_id):
    """Lädt das Text-to-Image Modell basierend auf der Auswahl"""
    global pipe_txt2img, current_pipe_model_id
    
    if pipe_txt2img is not None and current_pipe_model_id == model_id:
        print(f"✅ Modell {model_id} bereits geladen")
        return pipe_txt2img
    
    print(f"🔄 Lade Modell: {model_id}")
    
    config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
    print(f"📋 Modell-Konfiguration: {config['name']}")
    print(f"📝 Beschreibung: {config['description']}")
    
    try:
        # VAE-Handling basierend auf Modellkonfiguration (Realistic Vision hat kein VAE-der Autoencoder ist ein CNN)
        vae = None
        if config.get("requires_vae", False):
            print(f"🔧 Lade externe VAE: {config['vae_model']}")
            try:
                vae = AutoencoderKL.from_pretrained(
                    config["vae_model"],
                    torch_dtype=torch_dtype
                ).to(device)
                print("✅ VAE erfolgreich geladen")
            except Exception as vae_error:
                print(f"⚠️ Fehler beim Laden der VAE: {vae_error}")
                print("ℹ️ Versuche ohne VAE weiter...")
                vae = None
        
        model_params = {
            "torch_dtype": torch_dtype,
            "safety_checker": None,
            "requires_safety_checker": False,
            "add_watermarker": False,
            "cache_dir": "/tmp/models"   # Für Hugging Face Spaces
        }

        # Die Modelle haben unterscgiedliche Gewichtsformate. Safetensors neu und schneller Zugriff!  
        if model_id == "SG161222/Realistic_Vision_V6.0_B1_noVAE":
            model_params["allow_pickle"] = False  # WICHTIG für PyTorch 2.6
            model_params["use_safetensors"] = False
            print("⚠️ Realistic Vision Modell - Nutzt .bin-Dateien.")
        else:
            model_params["allow_pickle"] = True
            model_params["use_safetensors"] = True
            print("✅ Verwende SafeTensors für sicheres Laden.")

        
        if config.get("supports_fp16", False) and torch_dtype == torch.float16:
            model_params["variant"] = "fp16"
            print("ℹ️ Verwende FP16 Variante")
        else:
            print("ℹ️ Verwende Standard Variante (kein FP16)")
        
        if vae is not None:
            model_params["vae"] = vae
        
        print(f"📥 Lade Hauptmodell von Hugging Face...")
        pipe_txt2img = StableDiffusionPipeline.from_pretrained(
            model_id,
            **model_params
        ).to(device)


        # Der Scheduler (z.B. DPM-Solver++ oder PNDM) ist der Algorithmus, der den Zeitplan für das schrittweise Entrauschen (Denoising) 
        # festlegt - er bestimmt, wie viele und welche Rauschschritte in welcher Reihenfolge abgearbeitet werden.
        print("⚙️ Konfiguriere Scheduler...")
        
        if pipe_txt2img.scheduler is None:
            print("⚠️ Scheduler ist None, setze Standard-Scheduler")
            pipe_txt2img.scheduler = PNDMScheduler.from_pretrained(
                model_id,
                subfolder="scheduler"
            )
        
        try:
            if hasattr(pipe_txt2img.scheduler, 'config'):
                scheduler_config = pipe_txt2img.scheduler.config
            else:
                scheduler_config = {
                    "beta_start": 0.00085,
                    "beta_end": 0.012,
                    "beta_schedule": "scaled_linear",
                    "num_train_timesteps": 1000,
                    "prediction_type": "epsilon",
                    "steps_offset": 1
                }
                print("⚠️ Keine Scheduler-Konfig gefunden, verwende Standard")
            
            pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
                scheduler_config,
                use_karras_sigmas=True,
                algorithm_type="sde-dpmsolver++"
            )
            print("✅ DPM-Solver Multistep Scheduler konfiguriert")
            
        except Exception as scheduler_error:
            print(f"⚠️ Konnte DPM-Scheduler nicht setzen: {scheduler_error}")
            print("ℹ️ Verwende Standard-Scheduler weiter")
        
        pipe_txt2img.enable_attention_slicing()
        print("✅ Attention Slicing aktiviert")

        # Attention Slicing ist Aufteilung der Attention-Matrix auf die Heads -> späteres concat      
        if hasattr(pipe_txt2img, 'vae') and pipe_txt2img.vae is not None:
            try:
                pipe_txt2img.enable_vae_slicing()
                if hasattr(pipe_txt2img.vae, 'enable_slicing'):
                    pipe_txt2img.vae.enable_slicing()
                print("✅ VAE Slicing aktiviert")
            except Exception as vae_slice_error:
                print(f"⚠️ VAE Slicing nicht möglich: {vae_slice_error}")
        
        current_pipe_model_id = model_id
        print(f"✅ {config['name']} erfolgreich geladen")
        print(f"📊 Modell-Dtype: {pipe_txt2img.dtype}")
        print(f"📊 Scheduler: {type(pipe_txt2img.scheduler).__name__}")
        print(f"⚙️ Empfohlene Einstellungen: Steps={config['recommended_steps']}, CFG={config['recommended_cfg']}")
        
        return pipe_txt2img
        
    except Exception as e:
        print(f"❌ Fehler beim Laden von {model_id}: {str(e)[:200]}...")
        import traceback
        traceback.print_exc()
        print("🔄 Fallback auf SD 1.5...")
        
        try:
            pipe_txt2img = StableDiffusionPipeline.from_pretrained(
                "runwayml/stable-diffusion-v1-5",
                torch_dtype=torch_dtype,
                use_safetensors=True,
            ).to(device)
            pipe_txt2img.enable_attention_slicing()
            current_pipe_model_id = "runwayml/stable-diffusion-v1-5"
            print("✅ Fallback auf SD 1.5 erfolgreich")
            
            return pipe_txt2img
        except Exception as fallback_error:
            print(f"❌ Auch Fallback fehlgeschlagen: {fallback_error}")
            raise


def load_img2img(keep_environment=False):
  
    global pipe_img2img_pose, pipe_img2img_depth
    
    # Initialisiere globale Variablen, falls noch nicht geschehen
    if 'pipe_img2img_pose' not in globals():
        pipe_img2img_pose = None
    if 'pipe_img2img_depth' not in globals():
        pipe_img2img_depth = None
    
    if keep_environment:
        # ===== MODUS: Depth + Canny =====
        if pipe_img2img_depth is None:
            print("🔄 Lade Multi-ControlNet-Inpainting-Modell (Depth + Canny)...")
            try:
                # LADE BEIDE ControlNet-Modelle für Depth-Modus
                controlnet_depth = ControlNetModel.from_pretrained(
                    "lllyasviel/sd-controlnet-depth",
                    torch_dtype=torch_dtype
                )
                controlnet_canny = ControlNetModel.from_pretrained(
                    "lllyasviel/sd-controlnet-canny",
                    torch_dtype=torch_dtype
                )
                
                # WICHTIG: Reihenfolge muss mit prepare_controlnet_maps übereinstimmen!
                # [Depth, Canny]
                pipe_img2img_depth = StableDiffusionControlNetInpaintPipeline.from_pretrained(
                    "runwayml/stable-diffusion-v1-5",
                    controlnet=[controlnet_depth, controlnet_canny],  # Depth zuerst!
                    torch_dtype=torch_dtype,
                    safety_checker=None,
                    requires_safety_checker=False,
                    cache_dir="/tmp/models",
                    use_safetensors=True
                ).to(device)
                
                # Scheduler konfigurieren
                pipe_img2img_depth.scheduler = DPMSolverMultistepScheduler.from_config(
                    pipe_img2img_depth.scheduler.config,
                    algorithm_type="sde-dpmsolver++",
                    use_karras_sigmas=True,
                    timestep_spacing="trailing"
                )
                
                # Optimierungen
                pipe_img2img_depth.enable_attention_slicing()
                print("✅ Multi-ControlNet-Inpainting-Pipeline geladen (Depth + Canny)")
                
            except Exception as e:
                print(f"❌ Fehler beim Laden der Depth+Canny Pipeline: {e}")
                raise
        
        return pipe_img2img_depth
    
    else:
        # ===== MODUS: OpenPose + Canny =====
        if pipe_img2img_pose is None:
            print("🔄 Lade Multi-ControlNet-Inpainting-Modell (OpenPose + Canny)...")
            try:
                # LADE BEIDE ControlNet-Modelle für Pose-Modus
                controlnet_openpose = ControlNetModel.from_pretrained(
                    "lllyasviel/sd-controlnet-openpose",
                    torch_dtype=torch_dtype
                )
                controlnet_canny = ControlNetModel.from_pretrained(
                    "lllyasviel/sd-controlnet-canny",
                    torch_dtype=torch_dtype
                )
                
                # WICHTIG: Reihenfolge muss mit prepare_controlnet_maps übereinstimmen!
                # [OpenPose, Canny]
                pipe_img2img_pose = StableDiffusionControlNetInpaintPipeline.from_pretrained(
                    "runwayml/stable-diffusion-v1-5",
                    controlnet=[controlnet_openpose, controlnet_canny],  # OpenPose zuerst!
                    torch_dtype=torch_dtype,
                    safety_checker=None,
                    requires_safety_checker=False,
                    cache_dir="/tmp/models",
                    use_safetensors=True
                ).to(device)
                
                # Scheduler konfigurieren
                pipe_img2img_pose.scheduler = DPMSolverMultistepScheduler.from_config(
                    pipe_img2img_pose.scheduler.config,
                    algorithm_type="sde-dpmsolver++",
                    use_karras_sigmas=True,
                    timestep_spacing="trailing"
                )
                
                # Optimierungen
                pipe_img2img_pose.enable_attention_slicing()
                print("✅ Multi-ControlNet-Inpainting-Pipeline geladen (OpenPose + Canny)")
                
            except Exception as e:
                print(f"❌ Fehler beim Laden der OpenPose+Canny Pipeline: {e}")
                raise
        
        return pipe_img2img_pose


#Die Callback-Funktion wird von der Pipeline nach jedem Verarbeitungsschritt aufgerufen und erhält Informationen 
#wie den aktuellen step und timestep. Diese nutzt der Progressbalken-Callback, um den Fortschritt zu berechnen und anzuzeigen.
# === CALLBACK-FUNKTIONEN FÜR FORTSCHRITT ===
class TextToImageProgressCallback:
    def __init__(self, progress, total_steps):
        self.progress = progress
        self.total_steps = total_steps
        self.current_step = 0
    
    def __call__(self, pipe, step, timestep, callback_kwargs):
        self.current_step = step + 1
        progress_percent = (step / self.total_steps) * 100
        self.progress(progress_percent / 100, desc="Generierung läuft...")
        return callback_kwargs

class ImageToImageProgressCallback:
    def __init__(self, progress, total_steps, strength):
        self.progress = progress
        self.total_steps = total_steps
        self.current_step = 0
        self.strength = strength
        self.actual_total_steps = None

    def __call__(self, pipe, step, timestep, callback_kwargs):
        self.current_step = step + 1

        if self.actual_total_steps is None:
            self.actual_total_steps = int(self.total_steps * self.strength)
            
            print(f"🎯 Steps: {self.total_steps} × {self.strength} → {self.actual_total_steps} tatsächliche Denoising-Schritte")      
        
        progress_percent = (step / self.actual_total_steps) * 100
        self.progress(progress_percent / 100, desc="Generierung läuft...")
        return callback_kwargs
        

# === NEUE FUNKTIONEN FÜR DIE FEATURES (ANGEPASST FÜR 3 MODI) ===
def create_preview_image(image, bbox_coords, mode):
    """
    NEUE FUNKTION: Erstellt Vorschau basierend auf 3 Modi mit farbigen Rahmen
    
    Parameter:
    - image: PIL Image
    - bbox_coords: [x1, y1, x2, y2]
    - mode: "environment_change", "focus_change", "face_only_change"
    
    Returns:
    - PIL Image mit farbigem Rahmen und Text
    """
    if image is None:
        return None
        
    preview = image.copy()
    draw = ImageDraw.Draw(preview)
    
    # Farben basierend auf Modus
    if mode == "environment_change":
        border_color = (0, 255, 0, 180)  # Grün für Umgebung
        mode_text = "UMGEBUNG ÄNDERN (Bereich geschützt)"
        box_color = (255, 255, 0, 200)  # Gelb für geschützten Bereich
        text_bg_color = (0, 128, 0, 160)  # Dunkelgrün
        
    elif mode == "focus_change":
        border_color = (255, 165, 0, 180)  # Orange für Focus
        mode_text = "FOCUS VERÄNDERN (Bereich+Körper)"
        box_color = (255, 0, 0, 200)  # Rot für Veränderungsbereich
        text_bg_color = (255, 140, 0, 160)  # Dunkelorange
        
    elif mode == "face_only_change":
        border_color = (255, 0, 0, 180)  # Rot für nur Gesicht
        mode_text = "NUR BEREICH VERÄNDERN"
        box_color = (255, 0, 0, 200)  # Rot für Veränderungsbereich
        text_bg_color = (128, 0, 0, 160)  # Dunkelrot
    else:
        # Fallback
        border_color = (128, 128, 128, 180)
        mode_text = "UNBEKANNTER MODUS"
        box_color = (128, 128, 128, 200)
        text_bg_color = (64, 64, 64, 160)
    
    # Skaliere Rahmendicke basierend auf Bildgröße (sonst bei großen Bildern ganz dünne Rahmen!)
    border_width = max(8, image.width // 200)  # Mindestens 8px, bei großen Bildern dicker
    draw.rectangle([0, 0, preview.width-1, preview.height-1], 
                  outline=border_color, width=border_width)
    
    if bbox_coords and all(coord is not None for coord in bbox_coords):
        # Sortiere Koordinaten
        x1, y1, x2, y2 = sort_coordinates(*bbox_coords)
        
        # Stelle sicher, dass die Koordinaten innerhalb des Bildes liegen
        x1 = max(0, min(x1, preview.width-1))
        y1 = max(0, min(y1, preview.height-1))
        x2 = max(0, min(x2, preview.width-1))
        y2 = max(0, min(y2, preview.height-1))
        
        # Nur zeichnen, wenn die Bounding Box gültig ist
        if x2 > x1 and y2 > y1:
            # Skaliere Box-Rahmen basierend auf Bildgröße
            box_width = max(3, image.width // 400)
            draw.rectangle([x1, y1, x2, y2], outline=box_color, width=box_width)
            
            text_color = (255, 255, 255)
            
            # Text über der Bounding Box platzieren
            text_y = max(0, y1 - 25)
            text_bbox = draw.textbbox((x1, text_y), mode_text)
            draw.rectangle([text_bbox[0]-5, text_bbox[1]-2, text_bbox[2]+5, text_bbox[3]+2], 
                          fill=text_bg_color)
            
            draw.text((x1, text_y), mode_text, fill=text_color)
    
    return preview

def update_live_preview(image, bbox_x1, bbox_y1, bbox_x2, bbox_y2, mode):
    """
    Aktualisiert die Live-Vorschau bei Koordinaten-Änderungen
    NEU: Verwendet 3 Modi statt Boolean
    """
    if image is None:
        return None
    
    # Sortiere die Koordinaten (Slider zeigen Originalkoordinaten)
    bbox_coords = sort_coordinates(bbox_x1, bbox_y1, bbox_x2, bbox_y2)
    
    return create_preview_image(image, bbox_coords, mode)
    

def process_image_upload(image):
    """Verarbeitet Bild-Upload -wenn kein Bild hochgeladen wird None zurückgegeben-> kein Absturz! und gibt Bild + Koordinaten zurück"""
    if image is None:
        return None, None, None, None, None

    width, height = image.size
    
    # Berechne Bounding-Box basierend auf der tatsächlichen Bildgröße
    bbox = auto_detect_face_area(image)
    
    # Sortiere die Koordinaten
    bbox_x1, bbox_y1, bbox_x2, bbox_y2 = sort_coordinates(*bbox)
    
    # Für die Vorschau verwende die Originalkoordinaten
    preview = create_preview_image(image, [bbox_x1, bbox_y1, bbox_x2, bbox_y2], "environment_change")
    
    # Slider-Werte SIND JETZT ORIGINALKOORDINATEN (keine 512-Skalierung!)
    print(f"Bild {width}x{height} -> Slider-Originalwerte: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]")
    
    return preview, bbox_x1, bbox_y1, bbox_x2, bbox_y2
    

# === FUNKTION FÜR SLIDER-UPDATE ===
def update_slider_for_image(image):
    """Aktualisiert Slider-Maxima basierend auf Bildgröße bis 4096x4096"""
    if image is None:
        return (
            gr.update(maximum=MAX_IMAGE_SIZE),
            gr.update(maximum=MAX_IMAGE_SIZE),
            gr.update(maximum=MAX_IMAGE_SIZE),
            gr.update(maximum=MAX_IMAGE_SIZE)
        )
    
    width, height = image.size
    
    # Setze Slider-Maxima auf Bildgröße (begrenzt auf MAX_IMAGE_SIZE für Stabilität)
    max_width = min(width, MAX_IMAGE_SIZE)
    max_height = min(height, MAX_IMAGE_SIZE)
    
    print(f"Slider-Maxima gesetzt auf: {max_width}x{max_height}")
    
    return (
        gr.update(maximum=max_width),
        gr.update(maximum=max_height),
        gr.update(maximum=max_width),
        gr.update(maximum=max_height)
    )


def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()):
    try:
        if not prompt or not prompt.strip():
            return None, "Bitte einen Prompt eingeben"

        print("\n" + "="*80)
        print(f"🚀 Starte Generierung mit Modell: {model_id}")
        print("\n" + "="*80)
        print(f"📝 Prompt: {prompt}")
        
        # Automatische negative Prompts generieren
        auto_negatives = auto_negative_prompt(prompt)
        print(f"🤖 Automatisch generierte Negative Prompts: {auto_negatives}")
        
        start_time = time.time()
        

        # Liste von Qualitätswörtern/Gewichten, die auf Benutzereingaben prüfen
        quality_keywords = ['masterpiece', 'best quality', 'high quality', 'highly detailed', 
                           'exquisite', 'ultra detailed', 'professional', 
                           'perfect', 'excellent', 'amazing', 'stunning', 'beautiful']
        
        # Prüfe, ob der Benutzer bereits Qualitätswörter/Gewichte verwendet hat
        user_has_quality_words = False
        
        # Konvertiere Prompt zu Kleinbuchstaben für die Prüfung
        prompt_lower = prompt.lower()
        
        # Prüfe auf einfache Qualitätswörter
        for keyword in quality_keywords:
            if keyword in prompt_lower:
                user_has_quality_words = True
                print(f"✓ Benutzer verwendet bereits Qualitätswort: {keyword}")
                break
        
        # Prüfe auf Gewichte (z.B. (word:1.5), [word], etc.)
        weight_patterns = [r'\([^)]+:\d+(\.\d+)?\)', r'\[[^\]]+\]']
        for pattern in weight_patterns:
            if re.search(pattern, prompt):
                user_has_quality_words = True
                print("✓ Benutzer verwendet bereits Gewichte im Prompt")
                break
        
        # Prompt basierend auf Prüfung anpassen
        if not user_has_quality_words:
            enhanced_prompt = f"masterpiece, best quality, {prompt}"
            print(f"🔄 Verbesserter Prompt: {enhanced_prompt}")
        else:
            enhanced_prompt = prompt
            print("✓ Benutzerprompt wird unverändert verwendet")
        
        print(f"Finaler Prompt für Generation: {enhanced_prompt}")

        
        progress(0, desc="Lade Modell...")
        pipe = load_txt2img(model_id)
        
        seed = random.randint(0, 2**32 - 1)
        generator = torch.Generator(device=device).manual_seed(seed)
        print(f"🌱 Seed: {seed}")
        
        callback = TextToImageProgressCallback(progress, steps)
        
        print(f"⚙️ Einstellungen: Steps={steps}, CFG={guidance_scale}")
        
        image = pipe(
            prompt=enhanced_prompt,
            negative_prompt=auto_negatives,
            height=512,
            width=512,
            num_inference_steps=int(steps),
            guidance_scale=guidance_scale,
            generator=generator,
            callback_on_step_end=callback,
            callback_on_step_end_tensor_inputs=[],
        ).images[0]
        
        end_time = time.time()
        duration = end_time - start_time
        print(f"✅ Bild generiert in {duration:.2f} Sekunden")
        
        config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
        status_msg = f"✅ Generiert mit {config['name']} in {duration:.1f}s"
        
        return image, status_msg
        
    except Exception as e:
        error_msg = f"❌ Fehler: {str(e)}"
        print(f"❌ Fehler in text_to_image: {e}")
        import traceback
        traceback.print_exc()
        return None, error_msg
        

def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
                 mode, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
                 progress=gr.Progress()):
    """
    KORRIGIERTE HAUPTFUNKTION FÜR CONTROLNET-GESTEUERTES INPAINTING
    """
    try:
        if image is None:
            return None, None, None, None, None

        import time, random
        start_time = time.time()

        print("\n" + "="*80)
        print(f"🚀 Img2Img Start → Modus: {mode}")
        print("\n" + "="*80)
        print(f"📊 Einstellungen: Strength: {strength}, Steps: {steps}, Guidance: {guidance_scale}")
        print(f"📝 Prompt: {prompt}")
        print(f"🚫 Negativ-Prompt: {neg_prompt}")

        final_image = None # Variable wird initiiert!
        
        # ===== AUTOMATISCHEN NEGATIV-PROMPT GENERIEREN =====
        auto_negatives = auto_negative_prompt(prompt)
        print(f"🤖 Automatisch generierter Negativ-Prompt: {auto_negatives}")
        
        # ===== KOMBINIERE MANUELLEN UND AUTOMATISCHEN PROMPT =====
        combined_negative_prompt = ""
        
        if neg_prompt and neg_prompt.strip():
            user_neg = neg_prompt.strip()
            print(f"👤 Benutzer Negativ-Prompt: {user_neg}")
            
            user_words = [word.strip().lower() for word in user_neg.split(",")]
            auto_words = [word.strip().lower() for word in auto_negatives.split(",")]
            
            combined_words = user_words.copy()
            
            for auto_word in auto_words:
                if auto_word and auto_word not in user_words:
                    combined_words.append(auto_word)
            
            unique_words = []
            seen_words = set()
            for word in combined_words:
                if word and word not in seen_words:
                    unique_words.append(word)
                    seen_words.add(word)
            
            combined_negative_prompt = ", ".join(unique_words)
        else:
            combined_negative_prompt = auto_negatives
            print(f"ℹ️ Kein manueller Negativ-Prompt, verwende nur automatischen: {combined_negative_prompt}")
        
        print(f"✅ Finaler kombinierter Negativ-Prompt: {combined_negative_prompt}")

        # ===== PROMPT-BOOSTER FÜR DREI MODI =====
        if mode == "face_only_change":

            prompt_lower = prompt.lower()
             
            front_face_keywords = [
                "portrait", "face", "eyes", "smile", "lips", "nose", "expression",
                "looking at camera", "frontal view", "headshot", "selfie", "close-up",
                "profile", "side view", "front", "frontal", "facing camera", "jawline"
            ]
    
            back_head_keywords = [
                "back of head", "from behind", "rear view", "looking away",
                "turned away", "back view", "backside", "back", "rear",
                "hair only", "ponytail", "hairstyle", "hair", "back hair"
            ]
    
            # Bestimme ob Gesicht vorne oder Hinterkopf vorne
            is_front_face = any(keyword in prompt_lower for keyword in front_face_keywords)
            is_back_head = any(keyword in prompt_lower for keyword in back_head_keywords)
    
            # Fallback: Wenn keine spezifischen Keywords, annehmen es ist Gesicht
            if not is_front_face and not is_back_head:
                is_front_face = True  # Standard: Gesicht vorne
                print("   ℹ️ Keine Gesicht/Hinterkopf-Keywords → Standard: Gesicht vorne")
    
            print(f"   🎯 Gesichtserkenner für Boosters: Vorne={is_front_face}, Hinten={is_back_head}")
    
            # NUR für frontale Gesichter Gesichts-Booster hinzufügen
            if is_front_face and not is_back_head:
                face_boosters = "(perfect face:1.2), (symmetrical face:1.1), realistic shaded perfect face, "
        
                if not any(keyword in prompt_lower for keyword in 
                           ["perfect face", "symmetrical", "realistic face", "shaded face"]):
                    enhanced_prompt = face_boosters + prompt
                    print(f"👤 Gesichts-Booster hinzugefügt: {face_boosters}")
                else:
                    enhanced_prompt = prompt
                    print(f"👤 Benutzer hat bereits Gesichts-Booster im Prompt")
            else:
                # Keine Gesichts-Booster für Hinterkopf oder unklare Fälle
                enhanced_prompt = prompt
                
                if is_back_head:
                    print(f"💇 Hinterkopf erkannt → Keine Gesichts-Booster")
                else:
                    print(f"👤 Keine Gesichts-Booster (unspezifischer Prompt)")

            
            #face_boosters = "(perfect face:1.2), (symmetrical face:1.1), realistic shaded perfect face, "
            
            #if not any(keyword in prompt.lower() for keyword in 
            #           ["perfect face", "symmetrical", "realistic face", "shaded face"]):
            #    enhanced_prompt = face_boosters + prompt
            #    print(f"👤 Gesichts-Booster hinzugefügt: {face_boosters}")
            #else:
            #    enhanced_prompt = prompt
            #    print(f"👤 Benutzer hat bereits Gesichts-Booster im Prompt")
                
        elif mode == "focus_change":
            focus_boosters = "(sharp focus:1.2), (detailed subject:1.1), (clear foreground:1.1), "
            
            if not any(keyword in prompt.lower() for keyword in 
                       ["sharp focus", "detailed subject", "clear foreground", "well-defined"]):
                enhanced_prompt = focus_boosters + prompt
                print(f"🎯 Focus-Booster hinzugefügt: {focus_boosters}")
            else:
                enhanced_prompt = prompt
                print(f"🎯 Benutzer hat bereits Focus-Booster im Prompt")
                
        elif mode == "environment_change":
            background_boosters = "complete scene, full background, entire environment, "
            
            if not any(keyword in prompt.lower() for keyword in 
                       ["complete scene", "full background", "entire environment", "whole setting"]):
                enhanced_prompt = background_boosters + prompt
                print(f"🌳 Hintergrund-Booster hinzugefügt: {background_boosters}")
            else:
                enhanced_prompt = prompt
                print(f"🌳 Benutzer hat bereits Hintergrund-Booster im Prompt")
        else:
            enhanced_prompt = prompt

        print(f"🎯 Finaler Prompt für {mode}: {enhanced_prompt}")

        progress(0, desc="Starte Generierung...")

        ####################################################################################################################################
        # ===== OPTIMIERTE MODUS-SPEZIFISCHE EINSTELLUNGEN FÜR CONTROLNET =====
        # Je radikaler die Veränderung, desto weniger ControlNet braucht man! 
        # Radikale Veränderung (Struktur komplett anders-Mensch/Auto):controlnet_strength = 0.3
        # STRUKTURELLER WECHSEL (Anatomie ändert sich-Mensch/Tier): controlnet_strength = 0.4-0.5
        # Detailveränderung (gleiche Struktur-Mensch/Hexe):controlnet_strength = 0.5-0.6
        # Je spezifischer ich weiß, WAS genau gleich bleiben soll, desto gezielter wähle ich die entsprechende ControlNet-Map.
        # Demnach ist ControlNet für den Erhalt der gewünschten Bildobjekte. Canny-erhält Kanten, Pose-Köperhalten, Depth-Tiefeninformationen
        ####################################################################################################################################
        if mode == "focus_change":
            # Optimale UI-Werte (für alle focus_change-Fälle):
		    # Strength: 0.55 – 0.6 → ideal: 0.58
		    # Guidance (CFG): 7.5 – 8 → ideal: 8
		    # Steps: 32 – 36
            keep_environment = False
    
            # adj_strength ist die Denoising-Stärke.0.1-0.3: Leichte Veränderung (behält Original)
            # 0.4-0.6: Mittlere Veränderung, 0.7-0.9: Starke Veränderung
            adj_strength = min(0.6, strength)
    
            # CONTROLNET-STÄRKE=Anteil der Controlnet-Kontrolle die an Inpaint weitergegeben werden soll
            # 0.3-0.5: Wenig Kontrolle → Inpaint hat mehr Freiheit, 0.6-0.8: Mittlere Kontrolle → Balance
            # 0.9: Starke Kontrolle → Inpaint folgt streng ControlNet
            controlnet_strength = 0.5 # Stärkere ControlNet-Kontrolle für Inpaint

            # Standard-Ratio (Controlnet gesteuertes Inpainting) - Ratio entscheidet über Anatomie und Stilfreiheit
            pose_ratio = 0.7     # 70%
            canny_ratio = 0.3    # 30%
    
            # Konvertiert den gesamten Prompt in Kleinbuchstaben um ggf. bei den keywords zu mappen
            prompt_lower = prompt.lower()
    
            # Keyword-Gruppen
            humanoid_keywords = [
                "anime", "cartoon", "manga", "witch", "wizard", "sorcerer",
                "alien", "elf", "fairy", "character", "fantasy", "superhero",
                "cyborg", "robot", "android", "santa", "person", "woman", "man",
                "girl", "boy", "child", "business", "suit", "professional",
                "sports", "athlete", "runner", "dancer", "portrait", "face"
            ]
    
            object_keywords = [
                "car", "vehicle", "automobile", "chair", "table", "desk",
                "statue", "sculpture", "monument", "lamp", "bottle", "vase",
                "product", "object", "furniture", "device", "tool", "item",
                "building", "house", "tree", "plant", "rock", "stone"
            ]
    
            animal_keywords = [
                "dog", "cat", "wolf", "lion", "tiger", "bear", "rabbit",
                "horse", "bird", "animal", "creature", "beast", "monkey",
                "elephant", "giraffe", "zebra", "deer", "fox", "pet"
            ]
    
  
            # Anpassung für Humanoid → Humanoid
            #if any(keyword in prompt_lower for keyword in humanoid_keywords):
            #    adj_strength = 0.5  # wie stark entrauscht wird. Wenn Bereiche transparent oft nicht genug entrauscht. Strukturveränderung ist nicht sehr hoch-niedriger Wert!
            #    controlnet_strength = 0.8   # controlnet_strength runter: Reduziert global und gleichmäßig den Einfluss beider Maps. Man kann auch beide Maps einzeln heruntersetzen. Ist das Gleiche!
            #    pose_ratio = 0.60  #   95%Pose, 5%Canny - wenn Pose gehalten und kaum Detailveränderung-
            #    canny_ratio = 0.10      # wenn Pose gehalten und mehr Detailveränderung -empfohlen 0.85/0.15, canny schlecht für Anime
            #    Achtung: Hoher strength-Wert im UI (0.85) instabiler. Zwar radikale Änderung aber ggf verdrehte Körper, fehlende Gliedmaße, extra Glieder, verzerrte Proportionen

            if any(keyword in prompt_lower for keyword in humanoid_keywords):
                # Der Parameter 'strength' ist die UI-Veränderungsstärke d.h. die Einstellungen
                # für Controlnet werden anhand von strength berechnet. Nun muß über Prompt Finetuning erfolgen!
                ui_strength = strength  


                # Smoothstep-Hilfsfunktion sorgt für weiche Übergänge zwischen den Posen. Die Smoothstep-Funktion sorgt dafür
                # dass am Anfang und Ende des UI-Strength-Wertes nicht viel passiert hauptsächlich in der Mitte
                def smoothstep(min_val, max_val, x):
                    x = max(0, min(1, (x - min_val) / (max_val - min_val)))
                    return x * x * (3 - 2 * x)
              
    
                # 1. Basierend auf der UI-Stärke (strength) berechnen
                adj_strength = 0.15 + 0.8 * ui_strength


                if ui_strength <= 0.7:
                    # AB UI=0.7: Höhere ControlNet-Stärke für Stabilität
                    controlnet_strength = 0.85 - 0.83 * ui_strength  # 0.85 → 0.265 bei UI=0.9
                else:
                    # BIS UI=0.7: controlnet wird für mehr Freiheit reduziert
                    t = (ui_strength - 0.7) / 0.2
                    controlnet_strength = 0.269 + 0.081 * t  # 0.85 → 0.27 bei UI=0.7  


                # 2. DYNAMISCHE POSE-ANPASSUNG: Mehr Pose bei hoher Stärke für Stabilität!
                # Standard: 0.85 → 0.45 (wie bisher)
                base_pose = 0.85 - 0.4 * smoothstep(0.4, 0.8, ui_strength)


                # Canny-Reduktion bei hoher Stärke für Farbfreiheit
                if ui_strength > 0.6:
                    # Je höher die Stärke, desto weniger Canny (für Farbänderungen)
                    canny_reduction = smoothstep(0.6, 0.9, ui_strength) * 0.15
                    pose_ratio = min(0.60, base_pose + canny_reduction)  # Pose erhöhen = Canny reduzieren
                else:
                    pose_ratio = base_pose
                                    
                canny_ratio = 1.0 - pose_ratio
         
                
                # 2. Werte auf sinnvolle Bereiche begrenzen (Clipping)
                adj_strength = max(0.15, min(adj_strength, 0.95))
                controlnet_strength = max(0.12, min(controlnet_strength, 0.85))
                pose_ratio = max(0.45, min(pose_ratio, 0.60))
                canny_ratio = max(0.40, min(canny_ratio, 0.55))

                conditioning_scale = [
                    controlnet_strength * pose_ratio,  # Depth-Gewichtung
                    controlnet_strength * canny_ratio   # Canny-Gewichtung
                ]
    
                print(f"👤 Humanoid → Humanoid (UI-Stärke: {ui_strength})")
                print(f"   adj_strength: {adj_strength:.2f}, controlnet: {controlnet_strength:.2f}")
                print(f"   Verhältnis: Pose {pose_ratio*100:.0f}% : Canny {canny_ratio*100:.0f}%")
                print(f"   Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]")
                # Debug:
                print(f"DEBUG UI={ui_strength}: smoothstep={smoothstep(0.4, 0.8, ui_strength):.3f}")
                print(f"DEBUG Pose vor Clipping: {0.85 - 0.4 * smoothstep(0.4, 0.8, ui_strength):.3f}")
                print(f"DEBUG Pose nach Clipping: {pose_ratio:.3f}")
                
    
            # Anpassung für Gegenstand → Gegenstand
            elif any(keyword in prompt_lower for keyword in object_keywords):
                adj_strength = min(0.7, strength * 1.15)
                controlnet_strength = 0.5                  
                pose_ratio = 0.10   # 10% Pose
                canny_ratio = 0.90  # 90% Canny

                conditioning_scale = [
                    controlnet_strength * pose_ratio,  # Depth-Gewichtung
                    controlnet_strength * canny_ratio   # Canny-Gewichtung
                ]
                print("📦 Gegenstand → Gegenstand → Ratio 25:75 (Pose:Canny)")
    
            # Anpassung für Mensch → Tier
            elif any(keyword in prompt_lower for keyword in animal_keywords):
                adj_strength = min(0.6, strength * 1.1)
                controlnet_strength = 0.5  
                pose_ratio = 0.5   # 50% Pose - empfohlen 0.45/0.55
                canny_ratio = 0.5  # 50% Canny
                print("🐾 Mensch → Tier → Ratio 50:50 (Pose:Canny)")
    
                # CONDITIONING SCALE BERECHNEN (genau wie environment_change)
                conditioning_scale = [
                    controlnet_strength * pose_ratio,  # OpenPose
                    controlnet_strength * canny_ratio  # Canny
                ]

            else:  #Standard
                # CLIPPING
                adj_strength = max(0.4, min(adj_strength, 0.8))
                controlnet_strength = max(0.3, min(controlnet_strength, 0.7))
                pose_ratio = max(0.5, min(pose_ratio, 0.8))
                canny_ratio = max(0.2, min(canny_ratio, 0.5))
                
                # CONDITIONING_SCALE FEHLT HIER!
                conditioning_scale = [
                    controlnet_strength * pose_ratio,  # OpenPose
                    controlnet_strength * canny_ratio  # Canny
                ]
            
    
            print(f"🎯 MODUS: Focus verändern")
            print(f"   Strength: {adj_strength}, ControlNet: {controlnet_strength}")
            print(f"   OpenPose: {pose_ratio*100}%, Canny: {canny_ratio*100}%")
            print(f"   Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]")     
        

        elif mode == "environment_change":
            # optimale UI-Werte:
		    # Strength: 0.72 – 0.78 → ideal: 0.75
		    # guidance (CFG): 8.5 – 9.5 → ideal: 9
		    # Steps: 34 – 38 → ideal: 35
            keep_environment = True

            ui_strength = strength  # Veränderungsstärke 0.1-0.9

            #wandelt den Prompt in Kleinbuchstaben um (Keywords!)
            prompt_lower = prompt.lower()

            #Standardfall:wird genutzt wenn Prompt nicht eines der keywords unten beinhaltet
            # Denoising: starke Neugenerierung
            adj_strength = 0.75  # Leicht runter auf Realismus
    
            # CONTROLNET-STÄRKE 
            controlnet_strength = 0.55  #Inpaint kann bei Neugenerierung nicht so viel Kontrolle vertragen     

            #Ratios: Controlnet gesteuertes Inpainting
            depth_ratio = 0.50     # 35%
            canny_ratio = 0.12     # 10%

            
            # Heuristik für Naturszenen vs. Innenräume
            nature_keywords = ["beach", "forest", "mountain", "ocean", "sky", "field", "landscape", "nature", "outdoor", "desert", "snow", "arctic"]
            interior_keywords = ["office", "room", "interior", "kitchen", "bedroom", "living room", "indoor", "wall", "furniture"]
            
    
            # Anpassung für Innenräume (mehr Kantenerhalt)
            if any(keyword in prompt_lower for keyword in interior_keywords):
                # Ob Formel korrekt? kein Test!
                adj_strength = 0.2 + (ui_strength * 0.5)
                controlnet_strength = 0.7 + (ui_strength * 0.2)
                canny_ratio = 0.8 + (ui_strength * 0.1)
                depth_ratio = 1.0 - canny_ratio

                # Clipping: extreme Werte, Instabilität, Für exakten Objektschutz (Büro→Küche, Zimmer→Garten)
                adj_strength = max(0.15, min(adj_strength, 0.7))   # Max 70% Denoising
                controlnet_strength = max(0.6, min(controlnet_strength, 0.95)) # Min 60% ControlNet
                canny_ratio = max(0.7, min(canny_ratio, 0.95))     # Canny mind. 70% für Kanten
                depth_ratio = max(0.05, min(depth_ratio, 0.3))     # Depth max 30%
        
                conditioning_scale = [
                    controlnet_strength * depth_ratio,
                    controlnet_strength * canny_ratio
                ]
        
                print(f"🏠 INNENRÄUME: UI={ui_strength:.2f}")
                print(f"   Strength: {adj_strength}, ControlNet: {controlnet_strength}")
                print(f"   Depth: {depth_ratio*100:.0f}% (Maßstab), Canny: {canny_ratio*100:.0f}%") 
                print(f"   Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]")
                
    
            # Anpassung für Naturszenen (maximale Flexibilität) - die optimalen UI-Werte: strength:0,72 , steps: 35 , guidance: 9-9,5
            elif any(keyword in prompt_lower for keyword in nature_keywords):

                # DENOISING: Radikaler bei Naturszenen (Wald→Wüste)
                adj_strength = 0.15 + (ui_strength * 0.75)  # (0.15-0.9 linear) wie stark das bestehende Bild überschrieben wird im kompletten Denoising-Prozess-also Strukturveränderung
        
                # CONTROLNET: WENIGER bei Naturszenen (mehr Freiheit)
                controlnet_strength = 0.5 + (ui_strength * 0.25)  # 0.3-0.6
        
                # RATIOS: WENIG Canny (Kanten stören bei Naturveränderung)
                # MEHR Depth (Maßstab/Tiefe erhalten)
                depth_ratio = 0.9 - (ui_strength * 0.3)  # Depth-Wert hält Maßstab und Boden (Emi groß, Liege klein - Emi im Meer)
                canny_ratio = 1.0 - depth_ratio          # erzwingt Kanten - hält dadurch an alter Umgebung fest - schlecht bei Umgebungswechsel

                # Clipping verhindert extreme Werte, Controlnet hat immer etwas Einfluß sonst ist Pipline instabil
                adj_strength = max(0.15, min(adj_strength, 0.95))
                controlnet_strength = max(0.2, min(controlnet_strength, 0.6))
                depth_ratio = max(0.5, min(depth_ratio, 0.95))      # Depth nicht unter 30%
                canny_ratio = max(0.05, min(canny_ratio, 0.5))      # Canny nicht über 70%
        
                conditioning_scale = [
                    controlnet_strength * depth_ratio,
                    controlnet_strength * canny_ratio
                ]
        
                print(f"🌳 NATURSZENE: UI={ui_strength:.2f}")
                print(f"   Strength: {adj_strength}, ControlNet: {controlnet_strength}")
                print(f"   Depth: {depth_ratio*100:.0f}% (Maßstab), Canny: {canny_ratio*100:.0f}%")
                print(f"   Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]")
           
            else: #Standard

                # Clipping:
                adj_strength = max(0.15, min(adj_strength, 0.7))   # Max 70% Denoising
                controlnet_strength = max(0.6, min(controlnet_strength, 0.95)) # Min 60% ControlNet
                canny_ratio = max(0.7, min(canny_ratio, 0.95))     # Canny mind. 70% für Kanten
                depth_ratio = max(0.05, min(depth_ratio, 0.3))     # Depth max 30%


                conditioning_scale = [
                    controlnet_strength * depth_ratio,  # Depth-Gewichtung
                    controlnet_strength * canny_ratio   # Canny-Gewichtung
                ]
    
                print(f"🎯 STANDARD MODUS: Umgebung ändern")
                print(f"   Strength: {adj_strength}, ControlNet: {controlnet_strength}")
                print(f"   Depth: {depth_ratio*100}%, Canny: {canny_ratio*100}%")
                print(f"   Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]")
        
                               
        else:  # face_only_change 
            keep_environment = True

            ui_strength = strength  # 0.1-0.9 vom User
            prompt_lower = prompt.lower()

            #Standard für alle Gesichter
            adj_strength = 0.15 + (ui_strength * 0.75)  # 0.15-0.9 Bereich
    
            # ControlNet-Stärke (nimmt mit UI-Strength ab)
            controlnet_strength = 0.8 - (ui_strength * 0.6)  # 0.8 → 0.2 linear
    
            # Depth vs. Canny Basis-Ratio
            depth_ratio = 0.8 - (ui_strength * 0.4)  # 0.8 → 0.4
            canny_ratio = 0.2 + (ui_strength * 0.3)  # 0.2 → 0.5
 

            # Realistic/Photo-Stile  
            #realistic_keywords = ["photorealistic", "photography", "photo", "realistic", "portrait", "studio", "cinematic"]

            # Zeichnungen/Illustrationen (kein Anime)
            drawing_keywords = ["drawing", "illustration", "sketch", "painting", "artwork", "watercolor"]
            

            # Anime-Stile
            anime_keywords = ["anime", "manga", "cartoon", "character", "chibi", "cel-shading", "lineart"]

            #front_face_keywords = [
            #    "portrait", "face", "eyes", "smile", "lips", "nose", "expression",
            #    "looking at camera", "frontal view", "headshot", "selfie", "close-up",
            #    "profile", "side view", "front", "frontal", "facing camera", "jawline"
            #]
    
            #back_head_keywords = [
            #    "back of head", "from behind", "rear view", "looking away",
            #    "turned away", "back view", "backside", "back", "rear",
            #    "hair only", "ponytail", "hairstyle", "hair", "back hair"
            #]
    
            # Bestimme ob Gesicht vorne oder Hinterkopf vorne
            #is_front_face = any(keyword in prompt_lower for keyword in front_face_keywords)
            #is_back_head = any(keyword in prompt_lower for keyword in back_head_keywords)
    
            # Fallback: Wenn keine spezifischen Keywords, annehmen es ist Gesicht
            #if not is_front_face and not is_back_head:
            #    is_front_face = True  # Standard: Gesicht vorne
            #    print("   ℹ️ Keine Gesicht/Hinterkopf-Keywords → Standard: Gesicht vorne")
    
            print(f"   🎯 Gesichtserkennung: Vorne={is_front_face}, Hinten={is_back_head}")
    
            if any(keyword in prompt_lower for keyword in anime_keywords):
                print("🎨 ANIME-TRANSFORM-MODUS")
        
                def smoothstep(min_val, max_val, x):
                    x = max(0, min(1, (x - min_val) / (max_val - min_val)))
                    return x * x * (3 - 2 * x)
        
                # Basiseinstellungen für Anime
                adj_strength = 0.30 + 0.55 * smoothstep(0.35, 0.9, ui_strength)
                adj_strength = max(0.3, min(adj_strength, 0.85))
        
                controlnet_strength = 0.30 + 0.52 * smoothstep(0.65, 0.9, ui_strength)
                controlnet_strength = max(0.25, min(controlnet_strength, 0.85))
        
                # ANPASSUNG BASIEREND AUF GESICHT/HINTERKOPF
                if is_front_face:
                    # Anime-GESICHT vorne
                    depth_ratio = 0.65 + 0.15 * smoothstep(0.5, 0.9, ui_strength)  # Höher für Gesichtsstruktur
                    canny_ratio = 1.0 - depth_ratio
                    print("   👤 Anime-Gesicht (vorne): Mehr Depth für 3D-Struktur")
            
                elif is_back_head:
                    # Anime-HINTERKOPF
                    depth_ratio = 0.65 + 0.20 * smoothstep(0.5, 0.9, ui_strength)  # Niedriger
                    canny_ratio = 1.0 - depth_ratio
                  
                    # 2. CONTROLNET-BOOST AB 0.7 (neue Logik)
                    if ui_strength <= 0.7:
                        # Bis 0.7: normale Steigerung (wie getestet und gut)
                        controlnet_strength = 0.30 + 0.52 * smoothstep(0.65, 0.9, ui_strength)
                    else:
                        # Ab 0.7: DEUTLICH MEHR ControlNet für Stabilität
                        # Von 0.7 (≈0.5) auf 0.9 (≈0.85) linear steigern
                        boost_factor = (ui_strength - 0.7) / 0.2  # 0.0 → 1.0
                        controlnet_strength = 0.5 + (0.35 * boost_factor)  # 0.5 → 0.85
    
                    # 3. CLIPPING (sicherheitshalber)
                    controlnet_strength = max(0.3, min(controlnet_strength, 0.9))
    
                    print(f"   💇 Anime-Hinterkopf: Depth={depth_ratio:.2f}, ControlNet={controlnet_strength:.2f}")
                    if ui_strength > 0.7:
                        print("   ⚡ BOOST: ControlNet erhöht für bessere Strukturerhaltung")
                    
                else:
                    # Standard-Anime (Fallback)
                    depth_ratio = 0.55 + 0.15 * smoothstep(0.5, 0.9, ui_strength)
                    canny_ratio = 1.0 - depth_ratio
        
                conditioning_scale = [
                    controlnet_strength * depth_ratio,
                    controlnet_strength * canny_ratio
                ]

                print(f"UI Strength: {ui_strength}")
                print(f"adj_strength: {adj_strength:.3f}")
                print(f"controlnet_strength: {controlnet_strength:.3f}")
                print(f"Depth: {depth_ratio*100:.1f}%, Canny: {canny_ratio*100:.1f}%")
                print(f"conditioning_scale: {conditioning_scale}")

             
            elif any(keyword in prompt_lower for keyword in drawing_keywords):       
                # Weniger Denoising für anatomische Korrektheit (nicht getestet)
                adj_strength = max(0.3, adj_strength * 0.9) # Konservativer
        
                # Mehr ControlNet für Strukturerhalt
                controlnet_strength = min(0.9, controlnet_strength * 1.2) # Mehr Kontrolle
        
                # Mehr Depth, weniger Canny
                depth_ratio = min(0.9, depth_ratio * 1.2)
                canny_ratio = max(0.1, canny_ratio * 0.8)  # Weniger Canny

                #Clipping
                adj_strength = max(0.15, min(adj_strength, 0.95))
                controlnet_strength = max(0.1, min(controlnet_strength, 0.9))
                depth_ratio = max(0.1, min(depth_ratio, 0.9))
                canny_ratio = max(0.1, min(canny_ratio, 0.9))
                

                conditioning_scale = [
                    controlnet_strength * depth_ratio,
                    controlnet_strength * canny_ratio
                ]
        
                print("   📸 Drawing-Modus: Mehr Strukturerhalt")
                print(f"   Strength: {adj_strength}, ControlNet: {controlnet_strength}")
                print(f"   Depth: {depth_ratio*100}%, Canny: {canny_ratio*100}%")
                print(f"   Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]")
    
            else: #Standard
                #Clipping
                adj_strength = max(0.15, min(adj_strength, 0.95))
                controlnet_strength = max(0.1, min(controlnet_strength, 0.9))
                depth_ratio = max(0.1, min(depth_ratio, 0.9))
                canny_ratio = max(0.1, min(canny_ratio, 0.9))


                conditioning_scale = [
                    controlnet_strength * depth_ratio,
                    controlnet_strength * canny_ratio
                ]
    
                print("   📸 Standard-Modus: Mehr Strukturerhalt")
                print(f"   Strength: {adj_strength}, ControlNet: {controlnet_strength}")
                print(f"   Depth: {depth_ratio*100}%, Canny: {canny_ratio*100}%")
                print(f"   Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]")
                          

        #################################################################################################
        # Controlnet-Einstellungen ENDE
        #################################################################################################


        # ===== WICHTIG: VARIABLEN FÜR KOMPLETTEN WORKFLOW =====
        original_mask = None
        padding_info = None
        scaled_image = None
        scaled_mask = None

        
        if bbox_x1 is not None and bbox_y1 is not None and bbox_x2 is not None and bbox_y2 is not None:
            print(f"🎯 BBox Koordinaten erhalten: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]")
            
            # === WICHTIGE ÄNDERUNG: SAM 2 STATT create_face_mask ===
            # 1. MASKE mit SAM 2 erzeugen (transparent für Benutzer)         
            processed_mask, raw_mask, binary_mask = controlnet_processor.create_sam_mask(
                image=image,
                bbox_coords=(bbox_x1, bbox_y1, bbox_x2, bbox_y2),
                mode=mode,
                is_front_face=is_front_face, 
                is_back_head=is_back_head 
            )

            original_mask = processed_mask
            
            # 2. BILD UND MASKE GEMEINSAM SKALIEREN (mit Padding)
            scaled_image, scaled_mask_inpaint, scaled_mask_composite, padding_info = scale_image_and_mask_together(
                image.convert("RGB"),  # Originalbild
                binary_mask,             # SAM 2 Binärmaske ohne Blur
                original_mask,          # SAM 2 Maske geglättet (oder Fallback)
                target_size=IMG_SIZE,
                bbox_coords=(bbox_x1, bbox_y1, bbox_x2, bbox_y2),
                mode=mode
            )
            

            print(f"✅ Gemeinsame Skalierung abgeschlossen")
            print(f"   Original: {image.size} → Skaliert: {scaled_image.size}")
        else:
            # Keine BBox: Normales Img2Img (ohne Maske)
            print(f"ℹ️ Keine BBox angegeben → normales Img2Img (ohne Maske)")
            scaled_image = image.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS)
            scaled_mask = Image.new("L", (IMG_SIZE, IMG_SIZE), 255)  # Volle Maske

            padding_info = None


        progress(0.1, desc="ControlNet läuft...")

        # ===== CONTROLNET: MAPS ERSTELLEN =====
        print(f"📊 ControlNet Input Größe: {scaled_image.size}")
        
        controlnet_maps, debug_maps = controlnet_processor.prepare_controlnet_maps(
            image=scaled_image,
            keep_environment=keep_environment
        )

        print(f"✅ ControlNet Maps erstellt: {len(controlnet_maps)} Maps")

        progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")

        # ===== CONTROLNET-INPAINTING PIPELINE ===== Laden der Pipeline!
        pipe = load_img2img(keep_environment=keep_environment)
        
        # ===== SEED UND GENERATOR =====
        adj_guidance = min(guidance_scale, 12.0)
        seed = random.randint(0, 2**32 - 1)
        generator = torch.Generator(device=device).manual_seed(seed)
        print(f"🌱 Inpaint Seed: {seed}")

        # ===== FORTSCHRITTS-CALLBACK =====
        callback = ImageToImageProgressCallback(progress, int(steps), adj_strength)

        # ===== CONTROLNET-GESTEUERTES INPAINTING DURCHFÜHREN =====
        print(f"🔄 Führe ControlNet-gesteuertes Inpainting durch...")
           

        result = pipe(
            prompt=enhanced_prompt,
            negative_prompt=combined_negative_prompt,
            image=scaled_image,
            mask_image=scaled_mask_inpaint,
            #mask_image=scaled_mask,
            control_image=controlnet_maps,
            controlnet_conditioning_scale=conditioning_scale,  # DYNAMISCHE Liste
            strength=adj_strength,
            num_inference_steps=int(steps),
            guidance_scale=adj_guidance,
            generator=generator,
            callback_on_step_end=callback,
            callback_on_step_end_tensor_inputs=[],
        )
    
        print("✅ ControlNet-Inpainting abgeschlossen")

        # ===== KORREKTES COMPOSITING =====
        generated_image = result.images[0]

              
        if original_mask is not None and padding_info is not None:
            # KORREKTER WORKFLOW: Nur bearbeiteten Bereich in Originalbild einfügen
            final_image = enhanced_composite_with_sam(
                original_image=image.convert("RGB"),
                inpaint_result=generated_image,
                original_mask=original_mask,
                padding_info=padding_info,
                bbox_coords=(bbox_x1, bbox_y1, bbox_x2, bbox_y2),
                mode=mode
            )            
            print(f"✅ Korrektes Compositing durchgeführt")
        else:
            # Keine Maske: Einfach das generierte Bild zurückgeben
            final_image = generated_image
            mask_preview = Image.new("RGB", (512, 512), color="gray")
            raw_sam_mask_display = Image.new("RGB", (512, 512), color="gray")
            controlnet_map1 = Image.new("RGB", (512, 512), color="gray")
            controlnet_map2 = Image.new("RGB", (512, 512), color="gray")           
            print(f"ℹ️ Keine Maske → Direkte Rückgabe des Bildes")
            

        end_time = time.time()
        duration = end_time - start_time
        
        print(f"✅ Transformation abgeschlossen in {duration:.2f} Sekunden")
        print(f"🎯 Verwendeter Modus: {mode}")
        print(f"⚙️ ControlNet: {'Depth+Canny' if keep_environment else 'OpenPose+Canny'}")
        print(f"📊 Finale Bildgröße: {final_image.size}")

        # 1. Maske in RGB für die Anzeige konvertieren
        mask_preview = original_mask.convert("RGB")
        raw_sam_mask_display = raw_mask.convert("RGB")

    
        if "pose" in debug_maps:
            controlnet_map1 = debug_maps["pose"]
            map1_label = "🎭 Pose Map"
        else:
            controlnet_map1 = debug_maps["depth"]
            map1_label = "🏔️ Depth Map"

        controlnet_map2 = debug_maps["canny"]

        # Return 5 Werte:
        return final_image, raw_sam_mask_display, mask_preview, controlnet_map1, controlnet_map2


    except Exception as e:
        print(f"❌ Fehler in img_to_image: {e}")
        import traceback
        traceback.print_exc()

        # Fallback: Return das Originalbild oder ein leeres Bild
        if image is not None:
            fallback_image = image.copy()
        else:
            fallback_image = Image.new("RGB", (512, 512), color="gray")
        
        return final_image, None, None, None, None


def update_bbox_from_image(image):
    """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
    if image is None:
        return None, None, None, None
    
    bbox = auto_detect_face_area(image)
    return bbox[0], bbox[1], bbox[2], bbox[3]

def update_model_settings(model_id):
    """Aktualisiert die empfohlenen Einstellungen basierend auf Modellauswahl"""
    config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
    
    return (
        config["recommended_steps"],  # steps
        config["recommended_cfg"],    # guidance_scale
        f"📊 Empfohlene Einstellungen: {config['recommended_steps']} Steps, CFG {config['recommended_cfg']}"
    )

def main_ui():
    """
    HAUPT-UI (ANGEPASST FÜR 3 MODI)
    """
    with gr.Blocks(
        title="AI Image Generator", 
        theme=gr.themes.Base(),
        css="""
        /* ===== INFO-BOXEN über Textboxen ===== */
        .info-box {
            background: #f8fafc;
            padding: 8px 12px;
            border-radius: 6px;
            border: 2px solid #e2e8f0;
            margin-bottom: 6px;
            font-size: 12px;
            line-height: 1.3;
            min-height: 50px !important;
            height: 50px !important;
            display: flex !important;
            align-items: center;
            justify-content: flex-start !important;
            text-align: left;
            padding-left: 15px;   
            overflow: hidden !important;      /* KEIN Scroll */
            border: none !important;
        }

        /* Linke Box (Prompt) - Blau */
        .gr-column:first-child .info-box {
            border-left: 4px solid #3b82f6;
            background: #eff6ff;
        }

        /* Rechte Box (Negativ) - Rot */
        .gr-column:last-child .info-box {
            border-left: 4px solid #ef4444;
            background: #fef2f2;
        }

        /* Code in Info-Boxen */
        .info-box code {
            background: white;
            padding: 3px 3px;
            border-radius: 4px;
            font-family: monospace;
            font-size: 12px;
            border: 1px solid #e2e8f0;
            display: inline-block;
            margin: 3px 0;
        }


        /* ===== TEXTBOXEN ===== */
        .prompt-box textarea {
            min-height: 90px !important;
            border-radius: 6px !important;
            border: 2px solid #e2e8f0 !important;
            padding: 10px !important;
            font-size: 14px !important;
        }

        /* Focus-State */
        .prompt-box textarea:focus {
            border-color: #3b82f6 !important;
            outline: none !important;
            box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.1) !important;
        }

        /* Platzhalter */
        .prompt-box textarea::placeholder {
            color: #94a3b8 !important;
        }
        
       
        .clickable-file {
           color: #1976d2;
           cursor: pointer;
           text-decoration: none;
           font-family: 'Monaco', 'Consolas', monospace;
           background: #e3f2fd;
           padding: 2px 6px;
           border-radius: 4px;
           border: 1px solid #bbdefb;
         }
        .clickable-file:hover {
           background: #bbdefb;
           text-decoration: underline;
         }
        .model-info-box {
            background: #e8f4fd;
            padding: 12px;
            border-radius: 6px;
            margin: 10px 0;
            border-left: 4px solid #2196f3;
            font-size: 14px;
        }
        #generate-button {
            background-color: #0080FF !important;
            border: none !important;
            margin: 20px auto !important;
            display: block !important;
            font-weight: 600;
            width: 280px;
        }
        #generate-button:hover {
            background-color: #0066CC !important;
        }
        .hint-box {
            margin-top: 20px;
        }
        .custom-text {
            font-size: 25px !important;
        }
        .image-upload .svelte-1p4f8co {
            display: block !important;
        }
        .preview-box {
            border: 2px dashed #ccc;
            padding: 10px;
            border-radius: 8px;
            margin: 10px 0;
        }
        .mode-red {
            border: 3px solid #ff4444 !important;
        }
        .mode-green {
            border: 3px solid #44ff44 !important;
        }
        .coordinate-sliders {
            background: #f8f9fa;
            padding: 15px;
            border-radius: 8px;
            margin: 10px 0;
        }
        .gr-checkbox .wrap .text-gray {
            font-size: 14px !important;
            font-weight: 600 !important;
            line-height: 1.4 !important;
        }
        .status-message {
            padding: 10px;
            border-radius: 5px;
            margin: 10px 0;
            text-align: center;
            font-weight: 500;
        }
        .status-success {
            background-color: #d4edda;
            color: #155724;
            border: 1px solid #c3e6cb;
        }
        .status-error {
            background-color: #f8d7da;
            color: #721c24;
            border: 1px solid #f5c6cb;
        }
        .radio-group {
            background: #f8f9fa;
            padding: 15px;
            border-radius: 8px;
            margin: 10px 0;
            border: 2px solid #e9ecef;
        }
        .radio-item {
            padding: 8px 12px;
            margin: 5px 0;
            border-radius: 4px;
            transition: background 0.3s;
        }
        .radio-item:hover {
            background: #e9ecef;
        }
        .radio-label {
            font-weight: 600;
            font-size: 14px;
        }
        .radio-description {
            font-size: 12px;
            color: #6c757d;
            margin-left: 24px;
        }
        """
    ) as demo:

        with gr.Column(visible=True) as content_area:
            with gr.Tab("Text zu Bild"):
                gr.Markdown("## 🎨 Text zu Bild Generator")
                
                with gr.Row():
                    with gr.Column(scale=2):
                        # Modellauswahl Dropdown (NUR 2 MODELLE)
                        model_dropdown = gr.Dropdown(
                            choices=[
                                (config["name"], model_id) 
                                for model_id, config in MODEL_CONFIGS.items()
                            ],
                            value="runwayml/stable-diffusion-v1-5",
                            label="📁 Modellauswahl",
                            info="🏠 Universal vs 👤 Portraits"
                        )
                        
                        # Modellinformationen Box
                        model_info_box = gr.Markdown(
                            value="<div class='model-info-box'>"
                                  "**🏠 Stable Diffusion 1.5 (Universal)**<br>"
                                  "Universal model, good all-rounder, reliable results<br>"
                                  "Empfohlene Einstellungen: 35 Steps, CFG 7.5"
                                  "</div>",
                            label="Modellinformationen"
                        )
                        
                    with gr.Column(scale=3):
                        txt_input = gr.Textbox(
                            placeholder="z.B. ultra realistic mountain landscape at sunrise, soft mist over the valley, detailed foliage, crisp textures, depth of field, sunlight rays through clouds, shot on medium format camera, 8k, HDR, hyper-detailed, natural lighting, masterpiece",
                            lines=3,
                            label="🎯 Prompt (Englisch)",
                            info="Beschreibe detailliert, was du sehen möchtest. Negative Prompts werden automatisch generiert."
                        )
                
                with gr.Row():
                    with gr.Column():
                        txt_steps = gr.Slider(
                            minimum=10, maximum=100, value=35, step=1,
                            label="⚙️ Inferenz-Schritte",
                            info="Mehr Schritte = bessere Qualität, aber langsamer (20-50 empfohlen)"
                        )
                    with gr.Column():
                        txt_guidance = gr.Slider(
                            minimum=1.0, maximum=20.0, value=7.5, step=0.5,
                            label="🎛️ Prompt-Stärke (CFG Scale)",
                            info="Wie stark der Prompt befolgt wird (7-12 für gute Balance)"
                        )
                
                # Status-Nachricht
                status_output = gr.Markdown(
                    value="",
                    elem_classes="status-message"
                )
                
                generate_btn = gr.Button("🚀 Bild generieren", variant="primary", elem_id="generate-button")
                
                with gr.Row():
                    txt_output = gr.Image(
                        label="🖼️ Generiertes Bild", 
                        show_download_button=True,
                        type="pil",
                        height=400
                    )
                
                # Event-Handler für Modelländerung
                def update_model_info(model_id):
                    config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
                    info_html = f"""
                    <div class='model-info-box'>
                    <strong>{config['name']}</strong><br>
                    {config['description']}<br>
                    <em>Empfohlene Einstellungen: {config['recommended_steps']} Steps, CFG {config['recommended_cfg']}</em>
                    </div>
                    """
                    return info_html, config["recommended_steps"], config["recommended_cfg"]
                
                model_dropdown.change(
                    fn=update_model_info,
                    inputs=[model_dropdown],
                    outputs=[model_info_box, txt_steps, txt_guidance]
                )
                
                generate_btn.click(
                    fn=text_to_image,
                    inputs=[txt_input, model_dropdown, txt_steps, txt_guidance],
                    outputs=[txt_output, status_output],
                    concurrency_limit=1
                )

            with gr.Tab("Bild zu Bild"):
                gr.Markdown("## 🖼️ Bild zu Bild Transformation (3 MODI)")
                
                with gr.Row():
                    with gr.Column():
                        img_input = gr.Image(
                            type="pil", 
                            label="📤 Eingabebild",
                            height=300,
                            sources=["upload"],
                            elem_id="image-upload"
                        )
                    with gr.Column():
                        preview_output = gr.Image(
                            label="🎯 Live-Vorschau mit Maske",
                            height=300,
                            interactive=False,
                            show_download_button=False
                        )
                
                # ===== NEUE RADIO-BUTTONS STATT CHECKBOX =====
                with gr.Row():
                    with gr.Column():
                        gr.Markdown("### 🎛️ Transformations-Modus")
                        
                        # NEU: 3 Radio-Buttons statt 1 Checkbox
                        mode_radio = gr.Radio(
                            choices=[
                                ("🌳 Umgebung ändern", "environment_change"),
                                ("🎯 Focus verändern", "focus_change"),
                                ("👤 Ausschließlich Gesicht", "face_only_change")
                            ],
                            value="environment_change",  # Standardmodus
                            label="Wähle den Transformationsmodus:",
                            info="Steuert, welcher Teil des Bildes verändert wird",
                            elem_classes="radio-group"
                        )
                        
                        # Detailierte Erklärungen
                        gr.Markdown("""
                        <div style="font-size: 12px; color: #666; margin-top: 10px;">
                        <strong>Modus-Erklärungen:</strong><br>
                        • <strong>🌳 Umgebung ändern:</strong> Ändert alles AUSSER dem markierten Bereich (Depth+Canny)<br>
                        • <strong>🎯 Focus verändern:</strong> Ändert markierten Bereich+Körper (OpenPose+Canny)<br>
                        • <strong>👤 Ausschließlich Gesicht:</strong> Ändert NUR den markierten Bereich (Depth+Canny)
                        </div>
                        """)
                
                with gr.Row():
                    gr.Markdown("### 📐 Bildelementbereich anpassen")
                
                # SLIDER MIT DYNAMISCHEM MAXIMUM (4096 für große Bilder)
                with gr.Row():
                    with gr.Column():
                        bbox_x1 = gr.Slider(
                            label="← Links (x1)", 
                            minimum=0, maximum=MAX_IMAGE_SIZE, value=100, step=1,
                            info="Linke Kante des Bildelementbereichs"
                        )
                    with gr.Column():
                        bbox_y1 = gr.Slider(
                            label="↑ Oben (y1)", 
                            minimum=0, maximum=MAX_IMAGE_SIZE, value=100, step=1,
                            info="Obere Kante des Bildelementbereichs"
                        )
                with gr.Row():
                    with gr.Column():
                        bbox_x2 = gr.Slider(
                            label="→ Rechts (x2)", 
                            minimum=0, maximum=MAX_IMAGE_SIZE, value=300, step=1,
                            info="Rechte Kante des Bildelementbereichs"
                        )
                    with gr.Column():
                        bbox_y2 = gr.Slider(
                            label="↓ Unten (y2)", 
                            minimum=0, maximum=MAX_IMAGE_SIZE, value=300, step=1,
                            info="Untere Kante des Bildelementbereichs"
                        )

                
                with gr.Row():
                    with gr.Column():
                        # Info-Block über Positiv-Prompt
                        pos_info = gr.Markdown(
                            value="`[STIL-MOTIV], [UMGEBUNG], [VOR/HINTERGRUND], [DETAILS], [QUALITÄT], [BELEUCHTUNG]`",
                            elem_classes=["info-box"]
                        )
                        img_prompt = gr.Textbox(
                            placeholder="photorealistic coastal beach, keep person unchanged, high detailed, diffused light",
                            lines=2,
                            label="🎯 Transformations-Prompt (Englisch)",
                            #info="Was soll verändert werden? Sei spezifisch."
                            elem_classes=["prompt-box"]
                        )
                    with gr.Column():
                        # Info_Block über Negativ-Prompt
                        neg_info = gr.Markdown(
                            value="`[GESICHTER/ANATOMIE], [FEHLER], [QUALITÄT], [UNERWÜNSCHTES]`",
                            elem_classes=["info-box"]
                        )        
                        img_neg_prompt = gr.Textbox(
                            placeholder="blurry face, deformed anatomy, ugly, extra limbs, poorly drawn hands",
                            lines=2,
                            label="🚫 Negativ-Prompt (Englisch)",
                            #info="Was soll vermieden werden? Unerwünschte Elemente auflisten."
                            elem_classes=["prompt-box"]
                        )
                        
                
                with gr.Row():
                    with gr.Column():
                        strength_slider = gr.Slider(
                            minimum=0.1, maximum=0.9, value=0.4, step=0.05,
                            label="💪 Veränderungs-Stärke (strength)",
                            info="0.1-0.3: Leichte Anpassungen, 0.4-0.6: Mittlere Veränderungen, 0.7-0.9: Starke Umgestaltung"
                        )
                    with gr.Column():
                        img_steps = gr.Slider(
                            minimum=10, maximum=45, value=35, step=1,
                            label="⚙️ Inferenz-Schritte",
                            info="Anzahl der Verarbeitungsschritte (25-45 für gute Ergebnisse)"
                        )
                    with gr.Column():
                        img_guidance = gr.Slider(
                            minimum=1.0, maximum=15.0, value=7.5, step=0.5,
                            label="🎛️ Prompt-Stärke (guidance)",
                            info="Einfluss des Prompts auf das Ergebnis (6-10 für natürliche Ergebnisse)"
                        )

                with gr.Row():
                   gr.Markdown(
                     "### 📋 Hinweise:\n"
                     "• **🆕 3 Transformations-Modi** für präzise Kontrolle\n"
                     "• **🆕 Unterstützt Bilder bis 4096×4096 Pixel**\n"
                     "• **🆕 Automatische Bildelementerkennung** setzt Koordinaten beim Upload\n"  
                     "• **🆕 Live-Vorschau** zeigt farbige Rahmen je nach Modus\n"
                     "• **🆕 Dynamische Koordinaten-Schieberegler** passen sich an Bildgröße an\n"
                     "• **ControlNet-Technologie** für konsistente Ergebnisse\n"
                     "• **Automatische Negative Prompts** für bessere Qualität\n"
                     "• **KORREKTER COMPOSITING-WORKFLOW** – nur bearbeiteter Bereich wird eingefügt\n"
                     "• **Ausgabe in Eingabebildgröße"
                    )
                
                transform_btn = gr.Button("🔄 Bild transformieren", variant="primary")


                with gr.Row():
                    img_output = gr.Image(
                        label="✨ Transformiertes Bild",
                        show_download_button=True,
                        type="pil",
                        height=400
                    )
    
                with gr.Row():
                    sam_raw_mask_output = gr.Image(
                        label="🔍 SAM-Rohmaske (Vor Nachbearbeitung)",
                        type="pil",
                        height=300,
                        show_download_button=False
                    )
                    processed_mask_output = gr.Image(
                        label="🛠️ Nachbearbeitete Maske (Für Inpainting)",
                        type="pil",
                        height=300,
                        show_download_button=False
                    )

                with gr.Row():                 
                    pose_map_output = gr.Image(
                        label="🎭 Pose/Depth Map", 
                        type="pil", 
                        height=300,
                        show_download_button=False
                    )
                    canny_map_output = gr.Image(
                        label="📐 Canny Edge Map", 
                        type="pil", 
                        height=300,
                        show_download_button=False
                    )
                     

                # EVENT-HANDLER FÜR DYNAMISCHE BILDGRÖßEN
                img_input.upload(
                    fn=process_image_upload,
                    inputs=[img_input],
                    outputs=[preview_output, bbox_x1, bbox_y1, bbox_x2, bbox_y2]
                ).then(
                    fn=update_slider_for_image,
                    inputs=[img_input],
                    outputs=[bbox_x1, bbox_y1, bbox_x2, bbox_y2]
                )
                
                # NEUE Input-Liste mit mode_radio statt face_preserve
                coordinate_inputs = [img_input, bbox_x1, bbox_y1, bbox_x2, bbox_y2, mode_radio]
                
                # Live-Vorschau Updates für alle Steuerelemente
                for slider in [bbox_x1, bbox_y1, bbox_x2, bbox_y2]:
                    slider.release(
                        fn=update_live_preview,
                        inputs=coordinate_inputs,
                        outputs=preview_output
                    )


                # 2. PLATZHALTER-FUNKTION
                def update_info(mode): 
                    if mode == "environment_change":
                        return (
                            "`[STIL-MOTIV], [UMGEBUNG], [VOR/HINTERGRUND], [DETAILS], [QUALITÄT], [BELEUCHTUNG]`",          
                            "`[GESICHTER/ANATOMIE], [FEHLER], [QUALITÄT], [UNERWÜNSCHTES]`"
                        )
    
                    elif mode == "focus_change":
                        return (
                            "`[GESICHTSBESCHREIBUNG], [KLEIDUNG], [POSITION], [DETAILS], [STIL]`",        
                            "`[DEFORMIERT], [UNSCHÄRFE], [ANATOMIEFEHLER], [UNERWÜNSCHTES]`"
                        )
    
                    else:  # face_only_change
                        return (
                            "`[HAARFARBE], [AUGEN], [GESICHTSAUSDRUCK], [DETAILS], [BELEUCHTUNG]`",          
                            "`[UNREALISTISCH], [ASYMETRISCH], [FEHLER], [UNERWÜNSCHTES]`"
                        )
              

                # 3. EVENT-HANDLER zur Änderung Textbox-Info- Verbindung Text-Box und Funktion
                mode_radio.change(
                    fn=update_info,
                    inputs=[mode_radio],
                    outputs=[pos_info, neg_info]
                )


                # Mode-Radio-Button ändert auch Live-Vorschau
                mode_radio.change(
                    fn=update_live_preview,
                    inputs=coordinate_inputs,
                    outputs=preview_output
                )
                
                # NEU: Transform-Button mit mode_radio statt face_preserve
                transform_btn.click(
                    fn=img_to_image,
                    inputs=[
                        img_input, img_prompt, img_neg_prompt, 
                        strength_slider, img_steps, img_guidance, 
                        mode_radio, bbox_x1, bbox_y1, bbox_x2, bbox_y2
                    ],
                    outputs=[img_output, sam_raw_mask_output, processed_mask_output, pose_map_output, canny_map_output],
                    concurrency_limit=1
                )

    return demo