Astridkraft's picture
Update app.py
f6176b5 verified
raw
history blame
39.6 kB
import gradio as gr
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
from diffusers import StableDiffusionInpaintPipeline, AutoencoderKL
from diffusers import DPMSolverMultistepScheduler, PNDMScheduler
from controlnet_module import controlnet_processor
import torch
from PIL import Image, ImageDraw
import time
import os
import tempfile
import random
import threading
from queue import Queue, Empty
import warnings
# === OPTIMIERTE EINSTELLUNGEN ===
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if device == "cuda" else torch.float32
IMG_SIZE = 512
print(f"Running on: {device}")
# === MODELLKONFIGURATION (NUR 2 MODELLE) ===
MODEL_CONFIGS = {
"runwayml/stable-diffusion-v1-5": {
"name": "🏠 Stable Diffusion 1.5 (Universal)",
"description": "Universal model, good all-rounder, reliable results",
"requires_vae": False,
"recommended_steps": 35,
"recommended_cfg": 7.5,
"supports_fp16": True
},
"SG161222/Realistic_Vision_V6.0_B1_noVAE": {
"name": "👤 Realistic Vision V6.0 (Portraits)",
"description": "Best for photorealistic faces, skin details, human portraits",
"requires_vae": True,
"vae_model": "stabilityai/sd-vae-ft-mse",
"recommended_steps": 40,
"recommended_cfg": 7.0,
"supports_fp16": False
}
}
# === SAFETENSORS KONFIGURATION ===
SAFETENSORS_MODELS = ["runwayml/stable-diffusion-v1-5"]
# === GLOBALE CACHE FÜR MODELLE ===
_model_cache = {}
_model_cache_lock = threading.Lock()
_current_loading_model = None
_loading_lock = threading.Lock()
# === AUTOMATISCHE NEGATIVE PROMPT GENERIERUNG ===
def auto_negative_prompt(positive_prompt):
"""Generiert automatisch negative Prompts basierend auf dem positiven Prompt"""
p = positive_prompt.lower()
negatives = []
# Personen / Portraits
if any(w in p for w in [
"person", "man", "woman", "face", "portrait", "team", "employee",
"people", "crowd", "character", "figure", "human", "child", "baby",
"girl", "boy", "lady", "gentleman", "fairy", "elf", "dwarf", "orc",
"mermaid", "angel", "demon", "witch", "wizard", "creature", "being",
"model", "actor", "actress", "celebrity", "avatar", "persona"]):
negatives.append(
"bad anatomy, malformed hands, extra fingers, uneven eyes, distorted face, "
"unrealistic skin, mutated, deformed, ugly, disfigured, poorly drawn face, "
"missing limbs, extra limbs, fused fingers, too many fingers, bad teeth, "
"mutated hands, long neck, extra wings, multiple wings"
)
# Business / Corporate
if any(w in p for w in ["office", "business", "team", "meeting", "corporate", "company", "workplace"]):
negatives.append(
"overexposed, oversaturated, harsh lighting, watermark, text, logo, brand"
)
# Produkt / CGI
if any(w in p for w in ["product", "packshot", "mockup", "render", "3d", "cgi", "packaging"]):
negatives.append(
"plastic texture, noisy, overly reflective surfaces, watermark, text, low poly"
)
# Landschaft / Umgebung
if any(w in p for w in ["landscape", "nature", "mountain", "forest", "outdoor", "beach", "sky"]):
negatives.append(
"blurry, oversaturated, unnatural colors, distorted horizon, floating objects"
)
# Logos / Symbole
if any(w in p for w in ["logo", "symbol", "icon", "typography", "badge", "emblem"]):
negatives.append(
"watermark, signature, username, text, writing, scribble, messy"
)
# Architektur / Gebäude
if any(w in p for w in ["building", "architecture", "house", "interior", "room", "facade"]):
negatives.append(
"deformed, distorted perspective, floating objects, collapsing structure"
)
# Basis negative Prompts für alle Fälle
base_negatives = "low quality, worst quality, blurry, jpeg artifacts, ugly, deformed"
if negatives:
return base_negatives + ", " + ", ".join(negatives)
else:
return base_negatives
# === GESICHTSMASKEN-FUNKTIONEN ===
def create_face_mask(image, bbox_coords, face_preserve):
"""Erzeugt eine Gesichtsmaske - WEIßE Bereiche werden VERÄNDERT, SCHWARZE BLEIBEN"""
mask = Image.new("L", image.size, 0) # Start mit komplett schwarzer Maske (alles geschützt)
if bbox_coords and all(coord is not None for coord in bbox_coords):
x1, y1, x2, y2 = bbox_coords
draw = ImageDraw.Draw(mask)
if face_preserve:
# GESICHTSERHALTUNG: Maske um das Gesicht herum zeichnen
draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255) # Alles weiß = verändern
draw.rectangle([x1, y1, x2, y2], fill=0) # Gesicht schwarz = geschützt (rechteckig)
print("Gesicht wird GESCHÜTZT - Umgebung wird verändert (rechteckige Maske)")
else:
# NUR GESICHT VERÄNDERN: Nur das Gesicht wird weiß (verändert)
draw.rectangle([x1, y1, x2, y2], fill=255) # Gesicht weiß = verändern (rechteckig)
print("Nur Gesicht wird verändert - Umgebung bleibt erhalten (rechteckige Maske)")
return mask
def auto_detect_face_area(image):
"""Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken"""
width, height = image.size
# Größere Bounding Box für bessere Abdeckung (50% statt 40%)
face_size = min(width, height) * 0.4
# Verschiebe y1 nach oben, um Stirn und Kinn besser abzudecken
x1 = (width - face_size) / 2
y1 = (height - face_size) / 4 # Höher positioniert (25% statt 33%)
x2 = x1 + face_size
y2 = y1 + face_size * 1.2 # Leicht länglicher für ovale Gesichter
# Stelle sicher, dass Koordinaten innerhalb des Bildes liegen
x1, y1 = max(0, int(x1)), max(0, int(y1))
x2, y2 = min(width, int(x2)), min(height, int(y2))
print(f"Geschätzte Gesichtskoordinaten: [{x1}, {y1}, {x2}, {y2}]")
return [x1, y1, x2, y2]
# === MODELL-LADEN MIT CACHING UND LOAD-BALANCING ===
def load_model_with_cache(model_id, force_reload=False):
"""Lädt Modelle mit Caching und Thread-Sicherheit"""
global _model_cache, _current_loading_model
# Prüfe Cache
with _model_cache_lock:
if model_id in _model_cache and not force_reload:
print(f"✅ Modell {model_id} aus Cache geladen")
return _model_cache[model_id]
# Verhindere paralleles Laden desselben Modells
with _loading_lock:
if _current_loading_model == model_id:
print(f"⏳ Modell {model_id} wird bereits geladen, warte...")
while model_id not in _model_cache:
time.sleep(0.1)
return _model_cache.get(model_id)
_current_loading_model = model_id
try:
print(f"🔄 Lade Modell: {model_id}")
config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
print(f"📋 Modell-Konfiguration: {config['name']}")
# VAE-Handling
vae = None
if config.get("requires_vae", False):
print(f"🔧 Lade externe VAE: {config['vae_model']}")
try:
vae = AutoencoderKL.from_pretrained(
config["vae_model"],
torch_dtype=torch_dtype
).to(device)
print("✅ VAE erfolgreich geladen")
except Exception as vae_error:
print(f"⚠️ Fehler beim Laden der VAE: {vae_error}")
vae = None
# Modellparameter
model_params = {
"torch_dtype": torch_dtype,
"safety_checker": None,
"requires_safety_checker": False,
"add_watermarker": False,
"allow_pickle": True,
}
# SAFETENSORS LOGIK
if model_id in SAFETENSORS_MODELS:
model_params["use_safetensors"] = True
print(f"ℹ️ Verwende safetensors für {model_id}")
else:
model_params["use_safetensors"] = False
print(f"ℹ️ Verwende .bin weights für {model_id}")
# FP16 nur wenn unterstützt
if config.get("supports_fp16", False) and torch_dtype == torch.float16:
model_params["variant"] = "fp16"
print("ℹ️ Verwende FP16 Variante")
# VAE hinzufügen
if vae is not None:
model_params["vae"] = vae
print(f"📥 Lade Hauptmodell...")
pipe = StableDiffusionPipeline.from_pretrained(
model_id,
**model_params
).to(device)
# Scheduler-Konfiguration
if pipe.scheduler is None:
print("⚠️ Scheduler ist None, setze Standard-Scheduler")
pipe.scheduler = PNDMScheduler.from_pretrained(
model_id,
subfolder="scheduler"
)
# Optimierungen
try:
# Versuche DPM-Solver
scheduler_config = pipe.scheduler.config if hasattr(pipe.scheduler, 'config') else {
"beta_start": 0.00085,
"beta_end": 0.012,
"beta_schedule": "scaled_linear",
"num_train_timesteps": 1000,
"prediction_type": "epsilon",
"steps_offset": 1
}
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
scheduler_config,
use_karras_sigmas=True,
algorithm_type="sde-dpmsolver++"
)
print("✅ DPM-Solver Multistep Scheduler konfiguriert")
except Exception:
print("ℹ️ Verwende Standard-Scheduler")
pipe.enable_attention_slicing()
if hasattr(pipe, 'vae') and pipe.vae is not None:
try:
pipe.enable_vae_slicing()
print("✅ VAE Slicing aktiviert")
except Exception:
pass
# In Cache speichern
with _model_cache_lock:
_model_cache[model_id] = pipe
print(f"✅ {config['name']} erfolgreich geladen und gecached")
return pipe
except Exception as e:
print(f"❌ Fehler beim Laden von {model_id}: {str(e)[:200]}")
import traceback
traceback.print_exc()
# Fallback auf SD 1.5
try:
print("🔄 Fallback auf SD 1.5...")
pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
torch_dtype=torch_dtype,
safety_checker=None,
).to(device)
pipe.enable_attention_slicing()
with _model_cache_lock:
_model_cache["runwayml/stable-diffusion-v1-5"] = pipe
return pipe
except Exception as fallback_error:
print(f"❌ Auch Fallback fehlgeschlagen: {fallback_error}")
raise
finally:
with _loading_lock:
_current_loading_model = None
# === LAZY LOADING FÜR IMG2IMG ===
_img2img_pipe = None
_img2img_lock = threading.Lock()
def get_img2img_pipe():
"""Lazy Loading für Img2Img Pipeline mit Thread-Sicherheit"""
global _img2img_pipe
if _img2img_pipe is not None:
return _img2img_pipe
with _img2img_lock:
if _img2img_pipe is not None: # Double-check locking
return _img2img_pipe
print("🔄 Lade Inpainting-Modell...")
try:
_img2img_pipe = StableDiffusionInpaintPipeline.from_pretrained(
"runwayml/stable-diffusion-inpainting",
torch_dtype=torch_dtype,
safety_checker=None,
).to(device)
_img2img_pipe.enable_attention_slicing()
_img2img_pipe.enable_vae_tiling()
print("✅ Inpainting-Modell geladen")
except Exception as e:
print(f"❌ Fehler beim Laden des Inpainting-Modells: {e}")
raise
return _img2img_pipe
# === OPTIMIERTE PIPELINE FUNKTIONEN ===
def load_txt2img(model_id):
"""Lädt das Text-to-Image Modell aus Cache oder neu"""
return load_model_with_cache(model_id)
def load_img2img():
"""Lädt Img2Img Pipeline mit Lazy Loading"""
return get_img2img_pipe()
# === ASYNCHRONE MODELL-VORLADUNG BEI TAB-WECHSEL ===
class ModelPreloader:
"""Asynchrones Vorladen von Modellen bei Tab-Aktivierung"""
def __init__(self):
self.queue = Queue()
self.worker_thread = None
self.stop_flag = False
def start(self):
"""Startet den Worker-Thread"""
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
self.worker_thread.start()
print("✅ ModelPreloader gestartet")
def stop(self):
"""Stoppt den Worker-Thread"""
self.stop_flag = True
if self.worker_thread:
self.worker_thread.join(timeout=1.0)
def schedule_preload(self, model_id):
"""Plant das Vorladen eines Modells"""
if model_id not in _model_cache:
self.queue.put(model_id)
def _worker(self):
"""Worker-Thread für asynchrones Laden"""
while not self.stop_flag:
try:
model_id = self.queue.get(timeout=0.5)
if model_id:
try:
print(f"⚡ Vorlade Modell: {model_id}")
load_model_with_cache(model_id)
except Exception as e:
print(f"⚠️ Vorladen von {model_id} fehlgeschlagen: {e}")
except Empty:
continue
except Exception as e:
print(f"⚠️ Fehler im Preloader: {e}")
# Preloader initialisieren
model_preloader = ModelPreloader()
model_preloader.start()
# === NEUE CALLBACK-FUNKTIONEN FÜR FORTSCHRITT ===
class TextToImageProgressCallback:
def __init__(self, progress, total_steps):
self.progress = progress
self.total_steps = total_steps
self.current_step = 0
def __call__(self, pipe, step, timestep, callback_kwargs):
self.current_step = step + 1
progress_percent = (step / self.total_steps) * 100
self.progress(progress_percent / 100, desc="Generierung läuft...")
return callback_kwargs
class ImageToImageProgressCallback:
def __init__(self, progress, total_steps, strength):
self.progress = progress
self.total_steps = total_steps
self.current_step = 0
self.strength = strength
self.actual_total_steps = None
def __call__(self, pipe, step, timestep, callback_kwargs):
self.current_step = step + 1
if self.actual_total_steps is None:
if self.strength < 1.0:
self.actual_total_steps = int(self.total_steps * self.strength)
else:
self.actual_total_steps = self.total_steps
print(f"🎯 INTERNE STEP-AUSGABE: Strength {self.strength}{self.actual_total_steps} tatsächliche Denoising-Schritte")
progress_percent = (step / self.actual_total_steps) * 100
self.progress(progress_percent / 100, desc="Generierung läuft...")
return callback_kwargs
# === NEUE FUNKTIONEN FÜR DIE FEATURES ===
def create_preview_image(image, bbox_coords, face_preserve, mode_color):
"""Erstellt eine Vorschau mit farbigem Rahmen basierend auf dem Modus"""
if image is None:
return None
preview = image.copy()
draw = ImageDraw.Draw(preview)
if mode_color == "red":
border_color = (255, 0, 0, 180)
mode_text = "NUR BILDELEMENT VERÄNDERN"
else:
border_color = (0, 255, 0, 180)
mode_text = "BILDELEMENT BEIBEHALTEN"
border_width = 8
draw.rectangle([0, 0, preview.width-1, preview.height-1],
outline=border_color, width=border_width)
if bbox_coords and all(coord is not None for coord in bbox_coords):
x1, y1, x2, y2 = bbox_coords
box_color = (255, 255, 0, 200)
draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3)
text_color = (255, 255, 255)
bg_color = (0, 0, 0, 160)
text_bbox = draw.textbbox((x1, y1 - 25), mode_text)
draw.rectangle([text_bbox[0]-5, text_bbox[1]-2, text_bbox[2]+5, text_bbox[3]+2],
fill=bg_color)
draw.text((x1, y1 - 25), mode_text, fill=text_color)
return preview
def update_live_preview(image, bbox_x1, bbox_y1, bbox_x2, bbox_y2, face_preserve):
"""Aktualisiert die Live-Vorschau bei Koordinaten-Änderungen"""
if image is None:
return None
bbox_coords = [bbox_x1, bbox_y1, bbox_x2, bbox_y2]
mode_color = "green" if face_preserve else "red"
return create_preview_image(image, bbox_coords, face_preserve, mode_color)
def process_image_upload(image):
"""Verarbeitet Bild-Upload und gibt Bild + Koordinaten zurück"""
if image is None:
return None, None, None, None, None
if image.size != (512, 512):
image = image.resize((512, 512), Image.LANCZOS)
print(f"Bild auf 512x512 skaliert")
bbox = auto_detect_face_area(image)
bbox_x1, bbox_y1, bbox_x2, bbox_y2 = bbox
preview = create_preview_image(image, bbox, True, "green")
return preview, bbox_x1, bbox_y1, bbox_x2, bbox_y2
# === HAUPTFUNKTIONEN ===
def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()):
try:
if not prompt or not prompt.strip():
return None, "Bitte einen Prompt eingeben"
print(f"🚀 Starte Generierung mit Modell: {model_id}")
print(f"📝 Prompt: {prompt}")
# Automatische negative Prompts generieren
auto_negatives = auto_negative_prompt(prompt)
print(f"🤖 Automatisch generierte Negative Prompts: {auto_negatives}")
start_time = time.time()
progress(0, desc="Lade Modell...")
pipe = load_txt2img(model_id)
seed = random.randint(0, 2**32 - 1)
generator = torch.Generator(device=device).manual_seed(seed)
print(f"🌱 Seed: {seed}")
callback = TextToImageProgressCallback(progress, steps)
print(f"⚙️ Einstellungen: Steps={steps}, CFG={guidance_scale}")
image = pipe(
prompt=prompt,
negative_prompt=auto_negatives,
height=512,
width=512,
num_inference_steps=int(steps),
guidance_scale=guidance_scale,
generator=generator,
callback_on_step_end=callback,
callback_on_step_end_tensor_inputs=[],
).images[0]
end_time = time.time()
duration = end_time - start_time
print(f"✅ Bild generiert in {duration:.2f} Sekunden")
config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
status_msg = f"✅ Generiert mit {config['name']} in {duration:.1f}s"
return image, status_msg
except Exception as e:
error_msg = f"❌ Fehler: {str(e)}"
print(f"❌ Fehler in text_to_image: {e}")
import traceback
traceback.print_exc()
return None, error_msg
def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
progress=gr.Progress()):
try:
if image is None:
return None
import time, random
start_time = time.time()
print(f"Img2Img Start → Strength: {strength}, Steps: {steps}, Guidance: {guidance_scale}")
print(f"Prompt: {prompt}")
print(f"Negativ-Prompt: {neg_prompt}")
print(f"Gesicht beibehalten: {face_preserve}")
progress(0, desc="Starte Generierung mit ControlNet...")
adj_strength = min(0.85, strength * 1.25)
if face_preserve:
controlnet_strength = adj_strength * 0.8
print(f"🎯 ControlNet Modus: Umgebung beibehalten (Strength = {controlnet_strength:.3f})")
else:
controlnet_strength = adj_strength * 0.5
print(f"🎯 ControlNet Modus: Person beibehalten (Strength = {controlnet_strength:.3f})")
controlnet_steps = min(25, int(steps * 0.8))
print(f"🎯 Steps={steps}, ControlNet-Steps={controlnet_steps}, Strength={controlnet_strength:.3f}")
progress(0.05, desc="Erstelle ControlNet Maps...")
controlnet_output, inpaint_input = controlnet_processor.generate_with_controlnet(
image=image,
prompt=prompt,
negative_prompt=neg_prompt,
steps=controlnet_steps,
guidance_scale=guidance_scale,
controlnet_strength=controlnet_strength,
progress=progress,
keep_environment=face_preserve
)
print(f"✅ ControlNet Output erhalten: {type(controlnet_output)}")
print(f"✅ Inpaint Input erhalten: {type(inpaint_input)}")
progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")
pipe = load_img2img()
img_resized = inpaint_input.convert("RGB").resize((512, 512))
adj_guidance = min(guidance_scale, 12.0)
seed = random.randint(0, 2**32 - 1)
generator = torch.Generator(device=device).manual_seed(seed)
print(f"Using seed: {seed}")
mask = None
if bbox_x1 and bbox_y1 and bbox_x2 and bbox_y2:
orig_w, orig_h = image.size
scale_x, scale_y = 512 / orig_w, 512 / orig_h
bbox_coords = [
int(bbox_x1 * scale_x),
int(bbox_y1 * scale_y),
int(bbox_x2 * scale_x),
int(bbox_y2 * scale_y)
]
print(f"Skalierte Koordinaten: {bbox_coords}")
mask = create_face_mask(img_resized, bbox_coords, face_preserve)
if mask:
print("✅ Maske erfolgreich erstellt")
else:
print("⚠️ Keine gültigen Koordinaten – keine Maske")
from diffusers import EulerAncestralDiscreteScheduler
if not isinstance(pipe.scheduler, EulerAncestralDiscreteScheduler):
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
callback = ImageToImageProgressCallback(progress, int(steps), adj_strength)
result = pipe(
prompt=prompt,
negative_prompt=neg_prompt,
image=img_resized,
mask_image=mask,
strength=adj_strength,
num_inference_steps=int(steps),
guidance_scale=adj_guidance,
generator=generator,
callback_on_step_end=callback,
callback_on_step_end_tensor_inputs=[],
)
end_time = time.time()
print(f"🕒 Dauer: {end_time - start_time:.2f} Sekunden")
generated_image = result.images[0]
return generated_image
except Exception as e:
print(f"❌ Fehler in img_to_image: {e}")
import traceback
traceback.print_exc()
return None
def update_bbox_from_image(image):
"""Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
if image is None:
return None, None, None, None
bbox = auto_detect_face_area(image)
return bbox[0], bbox[1], bbox[2], bbox[3]
def update_model_settings(model_id):
"""Aktualisiert die empfohlenen Einstellungen basierend auf Modellauswahl"""
config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
return (
config["recommended_steps"], # steps
config["recommended_cfg"], # guidance_scale
f"📊 Empfohlene Einstellungen: {config['recommended_steps']} Steps, CFG {config['recommended_cfg']}"
)
# === TAB-WECHSEL HANDLER ===
def on_tab_change(tab_name):
"""Wird aufgerufen wenn Tab gewechselt wird"""
print(f"📌 Tab gewechselt zu: {tab_name}")
if tab_name == "Text zu Bild":
# Vorlade das aktuell ausgewählte Modell
model_id = "runwayml/stable-diffusion-v1-5" # Standardmodell
model_preloader.schedule_preload(model_id)
elif tab_name == "Bild zu Bild":
# Img2Img Modell im Hintergrund laden
threading.Thread(target=get_img2img_pipe, daemon=True).start()
return tab_name
def main_ui():
with gr.Blocks(
title="AI Image Generator",
theme=gr.themes.Base(),
css="""
.info-box {
background-color: #f8f4f0;
padding: 15px;
border-radius: 8px;
border-left: 4px solid #8B7355;
margin: 20px 0;
}
.clickable-file {
color: #1976d2;
cursor: pointer;
text-decoration: none;
font-family: 'Monaco', 'Consolas', monospace;
background: #e3f2fd;
padding: 2px 6px;
border-radius: 4px;
border: 1px solid #bbdefb;
}
.clickable-file:hover {
background: #bbdefb;
text-decoration: underline;
}
.model-info-box {
background: #e8f4fd;
padding: 12px;
border-radius: 6px;
margin: 10px 0;
border-left: 4px solid #2196f3;
font-size: 14px;
}
#generate-button {
background-color: #0080FF !important;
border: none !important;
margin: 20px auto !important;
display: block !important;
font-weight: 600;
width: 280px;
}
#generate-button:hover {
background-color: #0066CC !important;
}
.hint-box {
margin-top: 20px;
}
.custom-text {
font-size: 25px !important;
}
.image-upload .svelte-1p4f8co {
display: block !important;
}
.preview-box {
border: 2px dashed #ccc;
padding: 10px;
border-radius: 8px;
margin: 10px 0;
}
.mode-red {
border: 3px solid #ff4444 !important;
}
.mode-green {
border: 3px solid #44ff44 !important;
}
.coordinate-sliders {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
margin: 10px 0;
}
.gr-checkbox .wrap .text-gray {
font-size: 14px !important;
font-weight: 600 !important;
line-height: 1.4 !important;
}
.status-message {
padding: 10px;
border-radius: 5px;
margin: 10px 0;
text-align: center;
font-weight: 500;
}
.status-success {
background-color: #d4edda;
color: #155724;
border: 1px solid #c3e6cb;
}
.status-error {
background-color: #f8d7da;
color: #721c24;
border: 1px solid #f5c6cb;
}
.tab-nav {
padding: 10px 0;
}
.tab-nav button {
transition: all 0.3s ease;
}
.tab-nav button:hover {
transform: translateY(-2px);
}
"""
) as demo:
# Tab-Status Tracking
current_tab = gr.State(value="Text zu Bild")
with gr.Tab("Text zu Bild") as txt_tab:
gr.Markdown("## 🎨 Text zu Bild Generator")
with gr.Row():
with gr.Column(scale=2):
# Modellauswahl Dropdown (NUR 2 MODELLE)
model_dropdown = gr.Dropdown(
choices=[
(config["name"], model_id)
for model_id, config in MODEL_CONFIGS.items()
],
value="runwayml/stable-diffusion-v1-5",
label="📁 Modellauswahl",
info="🏠 Universal vs 👤 Portraits"
)
# Modellinformationen Box
model_info_box = gr.Markdown(
value="<div class='model-info-box'>"
"**🏠 Stable Diffusion 1.5 (Universal)**<br>"
"Universal model, good all-rounder, reliable results<br>"
"Empfohlene Einstellungen: 35 Steps, CFG 7.5"
"</div>",
label="Modellinformationen"
)
with gr.Column(scale=3):
txt_input = gr.Textbox(
placeholder="z.B. ultra realistic mountain landscape at sunrise, soft mist over the valley, detailed foliage, crisp textures, depth of field, sunlight rays through clouds, shot on medium format camera, 8k, HDR, hyper-detailed, natural lighting, masterpiece",
lines=3,
label="🎯 Prompt (Englisch)",
info="Beschreibe detailliert, was du sehen möchtest. Negative Prompts werden automatisch generiert."
)
with gr.Row():
with gr.Column():
txt_steps = gr.Slider(
minimum=10, maximum=100, value=35, step=1,
label="⚙️ Inferenz-Schritte",
info="Mehr Schritte = bessere Qualität, aber langsamer (20-50 empfohlen)"
)
with gr.Column():
txt_guidance = gr.Slider(
minimum=1.0, maximum=20.0, value=7.5, step=0.5,
label="🎛️ Prompt-Stärke (CFG Scale)",
info="Wie stark der Prompt befolgt wird (7-12 für gute Balance)"
)
# Status-Nachricht
status_output = gr.Markdown(
value="",
elem_classes="status-message"
)
generate_btn = gr.Button("🚀 Bild generieren", variant="primary", elem_id="generate-button")
with gr.Row():
txt_output = gr.Image(
label="🖼️ Generiertes Bild",
show_download_button=True,
type="pil",
height=400
)
# Event-Handler für Modelländerung mit Vorladen
def on_model_select(model_id):
# Vorlade das ausgewählte Modell im Hintergrund
model_preloader.schedule_preload(model_id)
config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
info_html = f"""
<div class='model-info-box'>
<strong>{config['name']}</strong><br>
{config['description']}<br>
<em>Empfohlene Einstellungen: {config['recommended_steps']} Steps, CFG {config['recommended_cfg']}</em>
</div>
"""
return info_html, config["recommended_steps"], config["recommended_cfg"]
model_dropdown.change(
fn=on_model_select,
inputs=[model_dropdown],
outputs=[model_info_box, txt_steps, txt_guidance],
queue=False # Wichtig: Keine Warteschlange für dieses Event
)
generate_btn.click(
fn=text_to_image,
inputs=[txt_input, model_dropdown, txt_steps, txt_guidance],
outputs=[txt_output, status_output],
concurrency_limit=1
)
with gr.Tab("Bild zu Bild") as img_tab:
gr.Markdown("## 🖼️ Bild zu Bild Transformation")
with gr.Row():
with gr.Column():
img_input = gr.Image(
type="pil",
label="📤 Eingabebild",
height=300,
sources=["upload"],
elem_id="image-upload"
)
with gr.Column():
preview_output = gr.Image(
label="🎯 Live-Vorschau mit Maske",
height=300,
interactive=False,
show_download_button=False
)
with gr.Row():
face_preserve = gr.Checkbox(
label="🛡️ Schutzmodus",
value=True,
info="🟢 AN: Alles AUSSERHALB des gelben Rahmens verändern | 🔴 AUS: Nur INNERHALB des gelben Rahmens verändern"
)
with gr.Row():
gr.Markdown("### 📐 Bildelementbereich anpassen")
with gr.Row():
with gr.Column():
bbox_x1 = gr.Slider(
label="← Links (x1)",
minimum=0, maximum=512, value=100, step=1,
info="Linke Kante des Bildelementbereichs"
)
with gr.Column():
bbox_y1 = gr.Slider(
label="↑ Oben (y1)",
minimum=0, maximum=512, value=100, step=1,
info="Obere Kante des Bildelementbereichs"
)
with gr.Row():
with gr.Column():
bbox_x2 = gr.Slider(
label="→ Rechts (x2)",
minimum=0, maximum=512, value=300, step=1,
info="Rechte Kante des Bildelementbereichs"
)
with gr.Column():
bbox_y2 = gr.Slider(
label="↓ Unten (y2)",
minimum=0, maximum=512, value=300, step=1,
info="Untere Kante des Bildelementbereichs"
)
with gr.Row():
with gr.Column():
img_prompt = gr.Textbox(
placeholder="change background to beach with palm trees, keep person unchanged, sunny day",
lines=2,
label="🎯 Transformations-Prompt (Englisch)",
info="Was soll verändert werden? Sei spezifisch."
)
with gr.Column():
img_neg_prompt = gr.Textbox(
placeholder="blurry, deformed, ugly, bad anatomy, extra limbs, poorly drawn hands",
lines=2,
label="🚫 Negativ-Prompt (Englisch)",
info="Was soll vermieden werden? Unerwünschte Elemente auflisten."
)
with gr.Row():
with gr.Column():
strength_slider = gr.Slider(
minimum=0.1, maximum=0.9, value=0.4, step=0.05,
label="💪 Veränderungs-Stärke",
info="0.1-0.3: Leichte Anpassungen, 0.4-0.6: Mittlere Veränderungen, 0.7-0.9: Starke Umgestaltung"
)
with gr.Column():
img_steps = gr.Slider(
minimum=10, maximum=100, value=35, step=1,
label="⚙️ Inferenz-Schritte",
info="Anzahl der Verarbeitungsschritte (25-45 für gute Ergebnisse)"
)
with gr.Column():
img_guidance = gr.Slider(
minimum=1.0, maximum=20.0, value=7.5, step=0.5,
label="🎛️ Prompt-Stärke",
info="Einfluss des Prompts auf das Ergebnis (6-10 für natürliche Ergebnisse)"
)
with gr.Row():
gr.Markdown(
"### 📋 Hinweise:\n"
"• **🆕 Automatische Bildelementerkennung** setzt Koordinaten beim Upload\n"
"• **🆕 Live-Vorschau** zeigt farbige Rahmen je nach Modus (🔴 Rot / 🟢 Grün)\n"
"• **🆕 Koordinaten-Schieberegler** für präzise Anpassung mit Live-Update\n"
"• **Koordinaten nur bei erkennbaren Verzerrungen anpassen** (Bereiche leicht verschieben)"
)
transform_btn = gr.Button("🔄 Bild transformieren", variant="primary")
with gr.Row():
img_output = gr.Image(
label="✨ Transformiertes Bild",
show_download_button=True,
type="pil",
height=400
)
img_input.change(
fn=process_image_upload,
inputs=[img_input],
outputs=[preview_output, bbox_x1, bbox_y1, bbox_x2, bbox_y2]
)
coordinate_inputs = [img_input, bbox_x1, bbox_y1, bbox_x2, bbox_y2, face_preserve]
for slider in [bbox_x1, bbox_y1, bbox_x2, bbox_y2]:
slider.change(
fn=update_live_preview,
inputs=coordinate_inputs,
outputs=preview_output
)
face_preserve.change(
fn=update_live_preview,
inputs=coordinate_inputs,
outputs=preview_output
)
transform_btn.click(
fn=img_to_image,
inputs=[
img_input, img_prompt, img_neg_prompt,
strength_slider, img_steps, img_guidance,
face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2
],
outputs=img_output,
concurrency_limit=1
)
def handle_tab_switch():
"""Leere Funktion nur um Tab-Wechsel zu registrieren"""
return
# Füge diese Event-Handler hinzu:
txt_tab.select(fn=handle_tab_switch, queue=False)
img_tab.select(fn=handle_tab_switch, queue=False)
# === ENDE TAB WECHSEL OPTIMIERUNG ===
# Queue mit Load-Balancing konfigurieren
demo.queue(max_size=2, default_concurrency_limit=1, api_open=False)
return demo
if __name__ == "__main__":
import atexit
# Cleanup-Handler
@atexit.register
def cleanup():
model_preloader.stop()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
print("🧹 Cleanup durchgeführt")
demo = main_ui()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
max_file_size="10MB",
show_error=True,
share=False,
ssl_verify=False
)