| | import gradio as gr |
| | from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline |
| | from diffusers import StableDiffusionInpaintPipeline, AutoencoderKL |
| | from diffusers import DPMSolverMultistepScheduler, PNDMScheduler |
| | from controlnet_module import controlnet_processor |
| | from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel |
| | import torch |
| | from PIL import Image, ImageDraw |
| | import time |
| | import os |
| | import tempfile |
| | import random |
| | import re |
| | from PIL import ImageFilter |
| | import numpy as np |
| |
|
| |
|
| |
|
| | |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | torch_dtype = torch.float16 if device == "cuda" else torch.float32 |
| | IMG_SIZE = 512 |
| | MAX_IMAGE_SIZE = 4096 |
| |
|
| | print(f"Running on: {device}") |
| |
|
| | |
| | MODEL_CONFIGS = { |
| | "runwayml/stable-diffusion-v1-5": { |
| | "name": "🏠 Stable Diffusion 1.5 (Universal)", |
| | "description": "Universal model, good all-rounder, reliable results", |
| | "requires_vae": False, |
| | "vae_model": "stabilityai/sd-vae-ft-mse", |
| | "recommended_steps": 35, |
| | "recommended_cfg": 7.5, |
| | "supports_fp16": True |
| | }, |
| | "SG161222/Realistic_Vision_V6.0_B1_noVAE": { |
| | "name": "👤 Realistic Vision V6.0 (Portraits)", |
| | "description": "Best for photorealistic faces, skin details, human portraits", |
| | "requires_vae": True, |
| | "vae_model": "stabilityai/sd-vae-ft-mse", |
| | "recommended_steps": 40, |
| | "recommended_cfg": 7.0, |
| | "supports_fp16": False |
| | } |
| | } |
| |
|
| | |
| | SAFETENSORS_MODELS = ["runwayml/stable-diffusion-v1-5"] |
| |
|
| | |
| | current_model_id = "runwayml/stable-diffusion-v1-5" |
| |
|
| | |
| | def auto_negative_prompt(positive_prompt): |
| | """Generiert automatisch negative Prompts basierend auf dem positiven Prompt""" |
| | p = positive_prompt.lower() |
| | negatives = [] |
| |
|
| | |
| | if any(w in p for w in [ |
| | "person", "man", "woman", "face", "portrait", "team", "employee", |
| | "people", "crowd", "character", "figure", "human", "child", "baby", |
| | "girl", "boy", "lady", "gentleman", "fairy", "elf", "dwarf", "santa claus", |
| | "mermaid", "angel", "demon", "witch", "wizard", "creature", "being", |
| | "model", "actor", "actress", "celebrity", "avatar", "group"]): |
| | negatives.append( |
| | "blurry face, lowres face, deformed pupils, bad anatomy, malformed hands, extra fingers, uneven eyes, distorted face, " |
| | "unrealisticy skin, mutated, ugly, disfigured, poorly drawn face, " |
| | "missing limbs, extra limbs, fused fingers, too many fingers, bad teeth, " |
| | "mutated hands, long neck, extra wings, multiple wings,grainy face, noisy face, " |
| | "compression artifacts, rendering artifacts, digital artifacts, overprocessed face, oversmoothed face " |
| | ) |
| | |
| | |
| | if any(w in p for w in ["office", "business", "team", "meeting", "corporate", "company", "workplace"]): |
| | negatives.append( |
| | "overexposed, oversaturated, harsh lighting, watermark, text, logo, brand" |
| | ) |
| |
|
| | |
| | if any(w in p for w in ["product", "packshot", "mockup", "render", "3d", "cgi", "packaging"]): |
| | negatives.append( |
| | "plastic texture, noisy, overly reflective surfaces, watermark, text, low poly" |
| | ) |
| |
|
| | |
| | if any(w in p for w in ["landscape", "nature", "mountain", "forest", "outdoor", "beach", "sky"]): |
| | negatives.append( |
| | "blurry, oversaturated, unnatural colors, distorted horizon, floating objects" |
| | ) |
| |
|
| | |
| | if any(w in p for w in ["logo", "symbol", "icon", "typography", "badge", "emblem"]): |
| | negatives.append( |
| | "watermark, signature, username, text, writing, scribble, messy" |
| | ) |
| |
|
| | |
| | if any(w in p for w in ["building", "architecture", "house", "interior", "room", "facade"]): |
| | negatives.append( |
| | "deformed, distorted perspective, floating objects, collapsing structure" |
| | ) |
| |
|
| | |
| | base_negatives = "low quality, worst quality, blurry, jpeg artifacts, ugly, deformed" |
| | |
| | if negatives: |
| | return base_negatives + ", " + ", ".join(negatives) |
| | else: |
| | return base_negatives |
| |
|
| | |
| | def sort_coordinates(x1, y1, x2, y2): |
| | """Sortiert Koordinaten, so dass x1 <= x2 und y1 <= y2""" |
| | sorted_x1 = min(x1, x2) |
| | sorted_x2 = max(x1, x2) |
| | sorted_y1 = min(y1, y2) |
| | sorted_y2 = max(y1, y2) |
| | return sorted_x1, sorted_y1, sorted_x2, sorted_y2 |
| |
|
| | |
| | def create_face_mask(image, bbox_coords, mode): |
| | """ |
| | ERWEITERTE FUNKTION: Erzeugt Maske basierend auf 3 Modi |
| | Weiße Bereiche werden VERÄNDERT, Schwarze bleiben ERHALTEN |
| | |
| | Parameter: |
| | - image: PIL Image |
| | - bbox_coords: [x1, y1, x2, y2] |
| | - mode: "environment_change", "focus_change", "face_only_change" |
| | |
| | Returns: |
| | - PIL Image (L-Modus, 0=schwarz=erhalten, 255=weiß=verändern) |
| | """ |
| | mask = Image.new("L", image.size, 0) |
| | |
| | if bbox_coords and all(coord is not None for coord in bbox_coords): |
| | |
| | x1, y1, x2, y2 = sort_coordinates(*bbox_coords) |
| | |
| | |
| | x1 = max(0, min(x1, image.width-1)) |
| | y1 = max(0, min(y1, image.height-1)) |
| | x2 = max(0, min(x2, image.width-1)) |
| | y2 = max(0, min(y2, image.height-1)) |
| | |
| | draw = ImageDraw.Draw(mask) |
| | |
| | if mode == "environment_change": |
| | |
| | |
| | draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255) |
| | draw.rectangle([x1, y1, x2, y2], fill=0) |
| | print(f"🎯 MODUS: Umgebung ändern - Alles außer BBox wird verändert (BBox: {x1},{y1},{x2},{y2})") |
| | |
| | elif mode == "focus_change": |
| | |
| | |
| | draw.rectangle([x1, y1, x2, y2], fill=255) |
| | print(f"🎯 MODUS: Focus verändern - Nur innerhalb der BBox wird verändert (BBox: {x1},{y1},{x2},{y2})") |
| | |
| | elif mode == "face_only_change": |
| | |
| | |
| | draw.rectangle([x1, y1, x2, y2], fill=255) |
| | print(f"🎯 MODUS: Ausschließlich Gesicht - Nur innerhalb der BBox wird verändert (BBox: {x1},{y1},{x2},{y2})") |
| | |
| | return mask |
| |
|
| | |
| | """ |
| | SKALIERT BILD UND MASKE GEMEINSAM MIT GLEICHEN PROPORTIONEN (MIT PADDING) |
| | Behält das Seitenverhältnis bei und fügt ggf. Padding hinzu |
| | |
| | Parameter: |
| | - image: PIL Image (RGB) |
| | - mask: PIL Image (L-Modus, Maske) |
| | - target_size: Zielgröße (Standard 512) |
| | |
| | Returns: |
| | - padded_image: skaliertes Bild mit Padding (RGB) |
| | - padded_mask: skalierte Maske mit Padding (L) |
| | - padding_info: Dictionary mit Skalierungsinfo für späteres Compositing |
| | """ |
| |
|
| | |
| | def scale_image_and_mask_together(image, mask_inpaint, mask_composite, target_size=512, bbox_coords=None, mode=None): |
| | if image is None or mask_inpaint is None or mask_composite is None: |
| | raise ValueError("Bild oder Maske ist None") |
| |
|
| | if image.size != mask_inpaint.size or image.size != mask_composite.size: |
| | raise ValueError("Bild und Masken haben unterschiedliche Größen: {image.size} vs {mask_inpaint.size}") |
| | |
| |
|
| | |
| | original_width, original_height = image.size |
| | |
| | |
| | scale = target_size / max(original_width, original_height) |
| | |
| | new_width = int(original_width * scale) |
| | new_height = int(original_height * scale) |
| | |
| | print(f"📐 Gemeinsame Skalierung: {original_width}x{original_height} → {new_width}x{new_height} (Skalierung: {scale:.4f})") |
| | |
| | |
| | scaled_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) |
| | scaled_mask_inpaint = mask_inpaint.resize((new_width, new_height), Image.Resampling.NEAREST) |
| | scaled_mask_composite = mask_composite.resize((new_width, new_height), Image.Resampling.NEAREST) |
| | |
| | |
| | |
| | |
| | padded_image = Image.new("RGB", (target_size, target_size), (0, 0, 0)) |
| | |
| | padded_mask_inpaint = Image.new("L", (target_size, target_size), 0) |
| | padded_mask_composite = Image.new("L", (target_size, target_size), 0) |
| | |
| | |
| | |
| | x_offset = (target_size - new_width) // 2 |
| | y_offset = (target_size - new_height) // 2 |
| | |
| | |
| | |
| | padded_image.paste(scaled_image, (x_offset, y_offset)) |
| | |
| | |
| | padded_mask_inpaint.paste(scaled_mask_inpaint, (x_offset, y_offset)) |
| | padded_mask_composite.paste(scaled_mask_composite, (x_offset, y_offset)) |
| |
|
| | |
| | scaled_bbox = None |
| | if bbox_coords and all(c is not None for c in bbox_coords): |
| | x1, y1, x2, y2 = bbox_coords |
| | scaled_bbox = ( |
| | int(x1 * scale) + x_offset, |
| | int(y1 * scale) + y_offset, |
| | int(x2 * scale) + x_offset, |
| | int(y2 * scale) + y_offset |
| | ) |
| | print(f"📐 Skalierte BBox gespeichert: {scaled_bbox} (von {bbox_coords})") |
| | |
| | |
| | |
| | padding_info = { |
| | 'x_offset': x_offset, |
| | 'y_offset': y_offset, |
| | 'scaled_width': new_width, |
| | 'scaled_height': new_height, |
| | 'original_width': original_width, |
| | 'original_height': original_height, |
| | 'scale_factor': scale, |
| | 'target_size': target_size, |
| | 'original_bbox': bbox_coords, |
| | 'scaled_bbox': scaled_bbox, |
| | 'mode': mode |
| | } |
| | |
| | print(f"📦 Padding hinzugefügt: Offsets ({x_offset}, {y_offset})") |
| | print(f"BBox gespeicher: {bbox_coords}, Modus:{mode}") |
| | print(f"✅ 1 Bild + 2 Masken skaliert. Inpaint-Maske binär: {np.unique(np.array(padded_mask_inpaint))}") |
| | |
| | return padded_image, padded_mask_inpaint, padded_mask_composite, padding_info |
| |
|
| |
|
| |
|
| | |
| | def enhanced_composite_with_sam(original_image, inpaint_result, original_mask, |
| | padding_info, bbox_coords, mode): |
| | """ |
| | COMPOSITING MIT SAM-MASKEN UND BBox-KOORDINATEN |
| | Berücksichtigt die präzisen Kanten der SAM-Maske |
| | """ |
| | print(f"🎨 Verbessertes Compositing für Modus: {mode}") |
| | |
| | |
| | x_offset = padding_info['x_offset'] |
| | y_offset = padding_info['y_offset'] |
| | scaled_width = padding_info['scaled_width'] |
| | scaled_height = padding_info['scaled_height'] |
| | scale_factor = padding_info['scale_factor'] |
| | original_width = padding_info['original_width'] |
| | original_height = padding_info['original_height'] |
| | |
| | |
| | |
| | |
| | if scale_factor == 1.0 and x_offset == 0 and y_offset == 0: |
| | print(f"✅ FALL 1: Bild 512×512 - kein Compositing nötig") |
| | return inpaint_result |
| | |
| | |
| | |
| | |
| | print(f"🔄 FALL 2/3: Bild skaliert - Compositing mit SAM-Maske") |
| | |
| | |
| | downscaled_result = inpaint_result.crop( |
| | (x_offset, y_offset, x_offset + scaled_width, y_offset + scaled_height) |
| | ) |
| | |
| | |
| | final_image = original_image.copy() |
| | |
| | if mode == "environment_change": |
| | |
| | |
| | |
| | |
| | |
| | print("🌳 Modus: Umwelt ändern mit SAM-Maske") |
| | |
| | |
| | new_background = downscaled_result.resize( |
| | (original_width, original_height), |
| | Image.Resampling.LANCZOS |
| | ) |
| | |
| | |
| | |
| | |
| | original_with_alpha = original_image.copy().convert("RGBA") |
| | |
| | |
| | |
| | mask_inverted = Image.eval(original_mask, lambda x: 255 - x) |
| | |
| | |
| | |
| | |
| | soft_mask = mask_inverted.filter(ImageFilter.GaussianBlur(3)) |
| |
|
| | |
| | |
| | original_with_alpha.putalpha(soft_mask) |
| | |
| | |
| | |
| | final_image = new_background.copy().convert("RGBA") |
| |
|
| | |
| | final_image.paste(original_with_alpha, (0, 0), original_with_alpha) |
| | |
| | else: |
| | |
| | |
| | |
| | |
| | |
| | mode_name = "Focus" if mode == "focus_change" else "Gesicht" |
| | print(f"👤 Modus: {mode_name} ändern mit SAM-Maske") |
| | |
| | if not bbox_coords or not all(c is not None for c in bbox_coords): |
| | |
| | final_image = downscaled_result.resize( |
| | (original_width, original_height), |
| | Image.Resampling.LANCZOS |
| | ) |
| | return final_image.convert("RGB") |
| |
|
| |
|
| | |
| | if 'scaled_bbox' in padding_info and padding_info['scaled_bbox'] is not None: |
| | bbox_in_512 = padding_info['scaled_bbox'] |
| | print(f"✅ Verwende gespeicherte BBox: {bbox_in_512}") |
| | else: |
| | |
| | |
| | bbox_scaled = ( |
| | int(bbox_coords[0] * scale_factor), |
| | int(bbox_coords[1] * scale_factor), |
| | int(bbox_coords[2] * scale_factor), |
| | int(bbox_coords[3] * scale_factor) |
| | ) |
| | |
| | bbox_in_512 = ( |
| | bbox_scaled[0] + x_offset, |
| | bbox_scaled[1] + y_offset, |
| | bbox_scaled[2] + x_offset, |
| | bbox_scaled[3] + y_offset |
| | ) |
| |
|
| | |
| | print(f"🔍 [COMPOSIT] Original-BBox: {bbox_coords}") |
| | print(f"🔍 [COMPOSIT] Scale/Offset: {scale_factor}, ({x_offset},{y_offset})") |
| | print(f"🔍 [COMPOSIT] BBox in 512: {bbox_in_512}") |
| | print(f"🔍 [COMPOSIT] Inpaint Size: {inpaint_result.size}") |
| | |
| |
|
| | |
| | |
| | if bbox_in_512[2] > bbox_in_512[0] and bbox_in_512[3] > bbox_in_512[1]: |
| | |
| | edited_region = inpaint_result.crop(bbox_in_512) |
| |
|
| | print(f"🔍 [CROP] Ausgeschnitten: {edited_region.size}") |
| |
|
| | |
| | |
| | original_bbox_size = (bbox_coords[2] - bbox_coords[0], |
| | bbox_coords[3] - bbox_coords[1]) |
| | edited_region_fullsize = edited_region.resize( |
| | original_bbox_size, |
| | Image.Resampling.LANCZOS |
| | ) |
| |
|
| | print(f"🔍 [RESIZE] Original-BBox-Size: {original_bbox_size}") |
| | print(f"🔍 [RESIZE] Hochskaliert auf: {edited_region_fullsize.size}") |
| | |
| | |
| | |
| | mask_cropped = original_mask.crop(bbox_coords) |
| | print(f"🔍 [MASK] Mask-Crop Size: {mask_cropped.size}") |
| | |
| | |
| | soft_mask = mask_cropped.filter(ImageFilter.GaussianBlur(3)) |
| | |
| | |
| | |
| | edited_rgba = edited_region_fullsize.convert("RGBA") |
| | |
| | |
| | mask_inverted = Image.eval(soft_mask, lambda x: 255 - x) |
| | mask_rgba = mask_inverted.convert("L") |
| |
|
| | print(f"🔍 Alpha-Maske Werte: min={np.array(mask_rgba).min()}, max={np.array(mask_rgba).max()}") |
| | print(f"🔍 Generierte Person Alpha: {edited_rgba.getchannel('A').getextrema()}") |
| |
|
| | |
| | temp_image = Image.new("RGBA", original_bbox_size, (0, 0, 0, 0)) |
| | |
| | temp_image.paste(edited_rgba, (0, 0), mask_rgba) |
| |
|
| | |
| | final_image.paste(temp_image, (bbox_coords[0], bbox_coords[1]), temp_image) |
| |
|
| | |
| | print(f"🔍 DEBUG COMPOSITING:") |
| | print(f" Original BBox: {bbox_coords}") |
| | print(f" Scale Factor: {scale_factor}") |
| | print(f" Offsets: ({x_offset}, {y_offset})") |
| | print(f" Inpaint Size: {inpaint_result.size}") |
| | print(f"✅ Korrektes Compositing abgeschlossen. Finale Größe: {final_image.size}") |
| | |
| | return final_image.convert("RGB") |
| |
|
| |
|
| | def auto_detect_face_area(image): |
| | """Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken""" |
| | width, height = image.size |
| | face_size = min(width, height) * 0.4 |
| | x1 = (width - face_size) / 2 |
| | y1 = (height - face_size) / 4 |
| | x2 = x1 + face_size |
| | y2 = y1 + face_size * 1.2 |
| | |
| | |
| | x1 = max(0, int(min(x1, x2))) |
| | y1 = max(0, int(min(y1, y2))) |
| | x2 = min(width, int(max(x1, x2))) |
| | y2 = min(height, int(max(y1, y2))) |
| | |
| | print(f"Geschätzte Gesichtskoordinaten: [{x1}, {y1}, {x2}, {y2}] (Bild: {width}x{height})") |
| | return [x1, y1, x2, y2] |
| |
|
| | |
| | pipe_txt2img = None |
| | current_pipe_model_id = None |
| | pipe_img2img = None |
| | pipe_img2img_pose = None |
| | pipe_img2img_depth = None |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | def load_txt2img(model_id): |
| | """Lädt das Text-to-Image Modell basierend auf der Auswahl""" |
| | global pipe_txt2img, current_pipe_model_id |
| | |
| | if pipe_txt2img is not None and current_pipe_model_id == model_id: |
| | print(f"✅ Modell {model_id} bereits geladen") |
| | return pipe_txt2img |
| | |
| | print(f"🔄 Lade Modell: {model_id}") |
| | |
| | config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"]) |
| | print(f"📋 Modell-Konfiguration: {config['name']}") |
| | print(f"📝 Beschreibung: {config['description']}") |
| | |
| | try: |
| | |
| | vae = None |
| | if config.get("requires_vae", False): |
| | print(f"🔧 Lade externe VAE: {config['vae_model']}") |
| | try: |
| | vae = AutoencoderKL.from_pretrained( |
| | config["vae_model"], |
| | torch_dtype=torch_dtype |
| | ).to(device) |
| | print("✅ VAE erfolgreich geladen") |
| | except Exception as vae_error: |
| | print(f"⚠️ Fehler beim Laden der VAE: {vae_error}") |
| | print("ℹ️ Versuche ohne VAE weiter...") |
| | vae = None |
| | |
| | model_params = { |
| | "torch_dtype": torch_dtype, |
| | "safety_checker": None, |
| | "requires_safety_checker": False, |
| | "add_watermarker": False, |
| | "cache_dir": "/tmp/models" |
| | } |
| |
|
| | |
| | if model_id == "SG161222/Realistic_Vision_V6.0_B1_noVAE": |
| | model_params["allow_pickle"] = False |
| | model_params["use_safetensors"] = False |
| | print("⚠️ Realistic Vision Modell - Nutzt .bin-Dateien.") |
| | else: |
| | model_params["allow_pickle"] = True |
| | model_params["use_safetensors"] = True |
| | print("✅ Verwende SafeTensors für sicheres Laden.") |
| |
|
| | |
| | if config.get("supports_fp16", False) and torch_dtype == torch.float16: |
| | model_params["variant"] = "fp16" |
| | print("ℹ️ Verwende FP16 Variante") |
| | else: |
| | print("ℹ️ Verwende Standard Variante (kein FP16)") |
| | |
| | if vae is not None: |
| | model_params["vae"] = vae |
| | |
| | print(f"📥 Lade Hauptmodell von Hugging Face...") |
| | pipe_txt2img = StableDiffusionPipeline.from_pretrained( |
| | model_id, |
| | **model_params |
| | ).to(device) |
| |
|
| |
|
| | |
| | |
| | print("⚙️ Konfiguriere Scheduler...") |
| | |
| | if pipe_txt2img.scheduler is None: |
| | print("⚠️ Scheduler ist None, setze Standard-Scheduler") |
| | pipe_txt2img.scheduler = PNDMScheduler.from_pretrained( |
| | model_id, |
| | subfolder="scheduler" |
| | ) |
| | |
| | try: |
| | if hasattr(pipe_txt2img.scheduler, 'config'): |
| | scheduler_config = pipe_txt2img.scheduler.config |
| | else: |
| | scheduler_config = { |
| | "beta_start": 0.00085, |
| | "beta_end": 0.012, |
| | "beta_schedule": "scaled_linear", |
| | "num_train_timesteps": 1000, |
| | "prediction_type": "epsilon", |
| | "steps_offset": 1 |
| | } |
| | print("⚠️ Keine Scheduler-Konfig gefunden, verwende Standard") |
| | |
| | pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config( |
| | scheduler_config, |
| | use_karras_sigmas=True, |
| | algorithm_type="sde-dpmsolver++" |
| | ) |
| | print("✅ DPM-Solver Multistep Scheduler konfiguriert") |
| | |
| | except Exception as scheduler_error: |
| | print(f"⚠️ Konnte DPM-Scheduler nicht setzen: {scheduler_error}") |
| | print("ℹ️ Verwende Standard-Scheduler weiter") |
| | |
| | pipe_txt2img.enable_attention_slicing() |
| | print("✅ Attention Slicing aktiviert") |
| |
|
| | |
| | if hasattr(pipe_txt2img, 'vae') and pipe_txt2img.vae is not None: |
| | try: |
| | pipe_txt2img.enable_vae_slicing() |
| | if hasattr(pipe_txt2img.vae, 'enable_slicing'): |
| | pipe_txt2img.vae.enable_slicing() |
| | print("✅ VAE Slicing aktiviert") |
| | except Exception as vae_slice_error: |
| | print(f"⚠️ VAE Slicing nicht möglich: {vae_slice_error}") |
| | |
| | current_pipe_model_id = model_id |
| | print(f"✅ {config['name']} erfolgreich geladen") |
| | print(f"📊 Modell-Dtype: {pipe_txt2img.dtype}") |
| | print(f"📊 Scheduler: {type(pipe_txt2img.scheduler).__name__}") |
| | print(f"⚙️ Empfohlene Einstellungen: Steps={config['recommended_steps']}, CFG={config['recommended_cfg']}") |
| | |
| | return pipe_txt2img |
| | |
| | except Exception as e: |
| | print(f"❌ Fehler beim Laden von {model_id}: {str(e)[:200]}...") |
| | import traceback |
| | traceback.print_exc() |
| | print("🔄 Fallback auf SD 1.5...") |
| | |
| | try: |
| | pipe_txt2img = StableDiffusionPipeline.from_pretrained( |
| | "runwayml/stable-diffusion-v1-5", |
| | torch_dtype=torch_dtype, |
| | use_safetensors=True, |
| | ).to(device) |
| | pipe_txt2img.enable_attention_slicing() |
| | current_pipe_model_id = "runwayml/stable-diffusion-v1-5" |
| | print("✅ Fallback auf SD 1.5 erfolgreich") |
| | |
| | return pipe_txt2img |
| | except Exception as fallback_error: |
| | print(f"❌ Auch Fallback fehlgeschlagen: {fallback_error}") |
| | raise |
| |
|
| |
|
| | def load_img2img(keep_environment=False): |
| | |
| | global pipe_img2img_pose, pipe_img2img_depth |
| | |
| | |
| | if 'pipe_img2img_pose' not in globals(): |
| | pipe_img2img_pose = None |
| | if 'pipe_img2img_depth' not in globals(): |
| | pipe_img2img_depth = None |
| | |
| | if keep_environment: |
| | |
| | if pipe_img2img_depth is None: |
| | print("🔄 Lade Multi-ControlNet-Inpainting-Modell (Depth + Canny)...") |
| | try: |
| | |
| | controlnet_depth = ControlNetModel.from_pretrained( |
| | "lllyasviel/sd-controlnet-depth", |
| | torch_dtype=torch_dtype |
| | ) |
| | controlnet_canny = ControlNetModel.from_pretrained( |
| | "lllyasviel/sd-controlnet-canny", |
| | torch_dtype=torch_dtype |
| | ) |
| | |
| | |
| | |
| | pipe_img2img_depth = StableDiffusionControlNetInpaintPipeline.from_pretrained( |
| | "runwayml/stable-diffusion-v1-5", |
| | controlnet=[controlnet_depth, controlnet_canny], |
| | torch_dtype=torch_dtype, |
| | safety_checker=None, |
| | requires_safety_checker=False, |
| | cache_dir="/tmp/models", |
| | use_safetensors=True |
| | ).to(device) |
| | |
| | |
| | pipe_img2img_depth.scheduler = DPMSolverMultistepScheduler.from_config( |
| | pipe_img2img_depth.scheduler.config, |
| | algorithm_type="sde-dpmsolver++", |
| | use_karras_sigmas=True, |
| | timestep_spacing="trailing" |
| | ) |
| | |
| | |
| | pipe_img2img_depth.enable_attention_slicing() |
| | print("✅ Multi-ControlNet-Inpainting-Pipeline geladen (Depth + Canny)") |
| | |
| | except Exception as e: |
| | print(f"❌ Fehler beim Laden der Depth+Canny Pipeline: {e}") |
| | raise |
| | |
| | return pipe_img2img_depth |
| | |
| | else: |
| | |
| | if pipe_img2img_pose is None: |
| | print("🔄 Lade Multi-ControlNet-Inpainting-Modell (OpenPose + Canny)...") |
| | try: |
| | |
| | controlnet_openpose = ControlNetModel.from_pretrained( |
| | "lllyasviel/sd-controlnet-openpose", |
| | torch_dtype=torch_dtype |
| | ) |
| | controlnet_canny = ControlNetModel.from_pretrained( |
| | "lllyasviel/sd-controlnet-canny", |
| | torch_dtype=torch_dtype |
| | ) |
| | |
| | |
| | |
| | pipe_img2img_pose = StableDiffusionControlNetInpaintPipeline.from_pretrained( |
| | "runwayml/stable-diffusion-v1-5", |
| | controlnet=[controlnet_openpose, controlnet_canny], |
| | torch_dtype=torch_dtype, |
| | safety_checker=None, |
| | requires_safety_checker=False, |
| | cache_dir="/tmp/models", |
| | use_safetensors=True |
| | ).to(device) |
| | |
| | |
| | pipe_img2img_pose.scheduler = DPMSolverMultistepScheduler.from_config( |
| | pipe_img2img_pose.scheduler.config, |
| | algorithm_type="sde-dpmsolver++", |
| | use_karras_sigmas=True, |
| | timestep_spacing="trailing" |
| | ) |
| | |
| | |
| | pipe_img2img_pose.enable_attention_slicing() |
| | print("✅ Multi-ControlNet-Inpainting-Pipeline geladen (OpenPose + Canny)") |
| | |
| | except Exception as e: |
| | print(f"❌ Fehler beim Laden der OpenPose+Canny Pipeline: {e}") |
| | raise |
| | |
| | return pipe_img2img_pose |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | class TextToImageProgressCallback: |
| | def __init__(self, progress, total_steps): |
| | self.progress = progress |
| | self.total_steps = total_steps |
| | self.current_step = 0 |
| | |
| | def __call__(self, pipe, step, timestep, callback_kwargs): |
| | self.current_step = step + 1 |
| | progress_percent = (step / self.total_steps) * 100 |
| | self.progress(progress_percent / 100, desc="Generierung läuft...") |
| | return callback_kwargs |
| |
|
| | class ImageToImageProgressCallback: |
| | def __init__(self, progress, total_steps, strength): |
| | self.progress = progress |
| | self.total_steps = total_steps |
| | self.current_step = 0 |
| | self.strength = strength |
| | self.actual_total_steps = None |
| |
|
| | def __call__(self, pipe, step, timestep, callback_kwargs): |
| | self.current_step = step + 1 |
| |
|
| | if self.actual_total_steps is None: |
| | self.actual_total_steps = int(self.total_steps * self.strength) |
| | |
| | print(f"🎯 Steps: {self.total_steps} × {self.strength} → {self.actual_total_steps} tatsächliche Denoising-Schritte") |
| | |
| | progress_percent = (step / self.actual_total_steps) * 100 |
| | self.progress(progress_percent / 100, desc="Generierung läuft...") |
| | return callback_kwargs |
| | |
| |
|
| | |
| | def create_preview_image(image, bbox_coords, mode): |
| | """ |
| | NEUE FUNKTION: Erstellt Vorschau basierend auf 3 Modi mit farbigen Rahmen |
| | |
| | Parameter: |
| | - image: PIL Image |
| | - bbox_coords: [x1, y1, x2, y2] |
| | - mode: "environment_change", "focus_change", "face_only_change" |
| | |
| | Returns: |
| | - PIL Image mit farbigem Rahmen und Text |
| | """ |
| | if image is None: |
| | return None |
| | |
| | preview = image.copy() |
| | draw = ImageDraw.Draw(preview) |
| | |
| | |
| | if mode == "environment_change": |
| | border_color = (0, 255, 0, 180) |
| | mode_text = "UMGEBUNG ÄNDERN (Bereich geschützt)" |
| | box_color = (255, 255, 0, 200) |
| | text_bg_color = (0, 128, 0, 160) |
| | |
| | elif mode == "focus_change": |
| | border_color = (255, 165, 0, 180) |
| | mode_text = "FOCUS VERÄNDERN (Bereich+Körper)" |
| | box_color = (255, 0, 0, 200) |
| | text_bg_color = (255, 140, 0, 160) |
| | |
| | elif mode == "face_only_change": |
| | border_color = (255, 0, 0, 180) |
| | mode_text = "NUR BEREICH VERÄNDERN" |
| | box_color = (255, 0, 0, 200) |
| | text_bg_color = (128, 0, 0, 160) |
| | else: |
| | |
| | border_color = (128, 128, 128, 180) |
| | mode_text = "UNBEKANNTER MODUS" |
| | box_color = (128, 128, 128, 200) |
| | text_bg_color = (64, 64, 64, 160) |
| | |
| | |
| | border_width = max(8, image.width // 200) |
| | draw.rectangle([0, 0, preview.width-1, preview.height-1], |
| | outline=border_color, width=border_width) |
| | |
| | if bbox_coords and all(coord is not None for coord in bbox_coords): |
| | |
| | x1, y1, x2, y2 = sort_coordinates(*bbox_coords) |
| | |
| | |
| | x1 = max(0, min(x1, preview.width-1)) |
| | y1 = max(0, min(y1, preview.height-1)) |
| | x2 = max(0, min(x2, preview.width-1)) |
| | y2 = max(0, min(y2, preview.height-1)) |
| | |
| | |
| | if x2 > x1 and y2 > y1: |
| | |
| | box_width = max(3, image.width // 400) |
| | draw.rectangle([x1, y1, x2, y2], outline=box_color, width=box_width) |
| | |
| | text_color = (255, 255, 255) |
| | |
| | |
| | text_y = max(0, y1 - 25) |
| | text_bbox = draw.textbbox((x1, text_y), mode_text) |
| | draw.rectangle([text_bbox[0]-5, text_bbox[1]-2, text_bbox[2]+5, text_bbox[3]+2], |
| | fill=text_bg_color) |
| | |
| | draw.text((x1, text_y), mode_text, fill=text_color) |
| | |
| | return preview |
| |
|
| | def update_live_preview(image, bbox_x1, bbox_y1, bbox_x2, bbox_y2, mode): |
| | """ |
| | Aktualisiert die Live-Vorschau bei Koordinaten-Änderungen |
| | NEU: Verwendet 3 Modi statt Boolean |
| | """ |
| | if image is None: |
| | return None |
| | |
| | |
| | bbox_coords = sort_coordinates(bbox_x1, bbox_y1, bbox_x2, bbox_y2) |
| | |
| | return create_preview_image(image, bbox_coords, mode) |
| | |
| |
|
| | def process_image_upload(image): |
| | """Verarbeitet Bild-Upload -wenn kein Bild hochgeladen wird None zurückgegeben-> kein Absturz! und gibt Bild + Koordinaten zurück""" |
| | if image is None: |
| | return None, None, None, None, None |
| |
|
| | width, height = image.size |
| | |
| | |
| | bbox = auto_detect_face_area(image) |
| | |
| | |
| | bbox_x1, bbox_y1, bbox_x2, bbox_y2 = sort_coordinates(*bbox) |
| | |
| | |
| | preview = create_preview_image(image, [bbox_x1, bbox_y1, bbox_x2, bbox_y2], "environment_change") |
| | |
| | |
| | print(f"Bild {width}x{height} -> Slider-Originalwerte: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]") |
| | |
| | return preview, bbox_x1, bbox_y1, bbox_x2, bbox_y2 |
| | |
| |
|
| | |
| | def update_slider_for_image(image): |
| | """Aktualisiert Slider-Maxima basierend auf Bildgröße bis 4096x4096""" |
| | if image is None: |
| | return ( |
| | gr.update(maximum=MAX_IMAGE_SIZE), |
| | gr.update(maximum=MAX_IMAGE_SIZE), |
| | gr.update(maximum=MAX_IMAGE_SIZE), |
| | gr.update(maximum=MAX_IMAGE_SIZE) |
| | ) |
| | |
| | width, height = image.size |
| | |
| | |
| | max_width = min(width, MAX_IMAGE_SIZE) |
| | max_height = min(height, MAX_IMAGE_SIZE) |
| | |
| | print(f"Slider-Maxima gesetzt auf: {max_width}x{max_height}") |
| | |
| | return ( |
| | gr.update(maximum=max_width), |
| | gr.update(maximum=max_height), |
| | gr.update(maximum=max_width), |
| | gr.update(maximum=max_height) |
| | ) |
| |
|
| |
|
| |
|
| | def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()): |
| | try: |
| | if not prompt or not prompt.strip(): |
| | return None, "Bitte einen Prompt eingeben" |
| |
|
| | print("\n" + "="*80) |
| | print(f"🚀 Starte Generierung mit Modell: {model_id}") |
| | print("\n" + "="*80) |
| | print(f"📝 Prompt: {prompt}") |
| | |
| | |
| | auto_negatives = auto_negative_prompt(prompt) |
| | print(f"🤖 Automatisch generierte Negative Prompts: {auto_negatives}") |
| | |
| | start_time = time.time() |
| | |
| |
|
| | |
| | quality_keywords = ['masterpiece', 'best quality', 'high quality', 'highly detailed', |
| | 'exquisite', 'ultra detailed', 'professional', |
| | 'perfect', 'excellent', 'amazing', 'stunning', 'beautiful'] |
| | |
| | |
| | user_has_quality_words = False |
| | |
| | |
| | prompt_lower = prompt.lower() |
| | |
| | |
| | for keyword in quality_keywords: |
| | if keyword in prompt_lower: |
| | user_has_quality_words = True |
| | print(f"✓ Benutzer verwendet bereits Qualitätswort: {keyword}") |
| | break |
| | |
| | |
| | weight_patterns = [r'\([^)]+:\d+(\.\d+)?\)', r'\[[^\]]+\]'] |
| | for pattern in weight_patterns: |
| | if re.search(pattern, prompt): |
| | user_has_quality_words = True |
| | print("✓ Benutzer verwendet bereits Gewichte im Prompt") |
| | break |
| | |
| | |
| | if not user_has_quality_words: |
| | enhanced_prompt = f"masterpiece, best quality, {prompt}" |
| | print(f"🔄 Verbesserter Prompt: {enhanced_prompt}") |
| | else: |
| | enhanced_prompt = prompt |
| | print("✓ Benutzerprompt wird unverändert verwendet") |
| | |
| | print(f"Finaler Prompt für Generation: {enhanced_prompt}") |
| |
|
| | |
| | |
| | progress(0, desc="Lade Modell...") |
| | pipe = load_txt2img(model_id) |
| | |
| | seed = random.randint(0, 2**32 - 1) |
| | generator = torch.Generator(device=device).manual_seed(seed) |
| | print(f"🌱 Seed: {seed}") |
| | |
| | callback = TextToImageProgressCallback(progress, steps) |
| | |
| | print(f"⚙️ Einstellungen: Steps={steps}, CFG={guidance_scale}") |
| | |
| | image = pipe( |
| | prompt=enhanced_prompt, |
| | negative_prompt=auto_negatives, |
| | height=512, |
| | width=512, |
| | num_inference_steps=int(steps), |
| | guidance_scale=guidance_scale, |
| | generator=generator, |
| | callback_on_step_end=callback, |
| | callback_on_step_end_tensor_inputs=[], |
| | ).images[0] |
| | |
| | end_time = time.time() |
| | duration = end_time - start_time |
| | print(f"✅ Bild generiert in {duration:.2f} Sekunden") |
| | |
| | config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"]) |
| | status_msg = f"✅ Generiert mit {config['name']} in {duration:.1f}s" |
| | |
| | return image, status_msg |
| | |
| | except Exception as e: |
| | error_msg = f"❌ Fehler: {str(e)}" |
| | print(f"❌ Fehler in text_to_image: {e}") |
| | import traceback |
| | traceback.print_exc() |
| | return None, error_msg |
| | |
| |
|
| |
|
| | def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale, |
| | mode, bbox_x1, bbox_y1, bbox_x2, bbox_y2, |
| | progress=gr.Progress()): |
| | """ |
| | KORRIGIERTE HAUPTFUNKTION FÜR CONTROLNET-GESTEUERTES INPAINTING |
| | """ |
| | try: |
| | if image is None: |
| | return None, None, None, None, None |
| |
|
| | import time, random |
| | start_time = time.time() |
| |
|
| | print("\n" + "="*80) |
| | print(f"🚀 Img2Img Start → Modus: {mode}") |
| | print("\n" + "="*80) |
| | print(f"📊 Einstellungen: Strength: {strength}, Steps: {steps}, Guidance: {guidance_scale}") |
| | print(f"📝 Prompt: {prompt}") |
| | print(f"🚫 Negativ-Prompt: {neg_prompt}") |
| |
|
| | final_image = None |
| | |
| | |
| | auto_negatives = auto_negative_prompt(prompt) |
| | print(f"🤖 Automatisch generierter Negativ-Prompt: {auto_negatives}") |
| | |
| | |
| | combined_negative_prompt = "" |
| | |
| | if neg_prompt and neg_prompt.strip(): |
| | user_neg = neg_prompt.strip() |
| | print(f"👤 Benutzer Negativ-Prompt: {user_neg}") |
| | |
| | user_words = [word.strip().lower() for word in user_neg.split(",")] |
| | auto_words = [word.strip().lower() for word in auto_negatives.split(",")] |
| | |
| | combined_words = user_words.copy() |
| | |
| | for auto_word in auto_words: |
| | if auto_word and auto_word not in user_words: |
| | combined_words.append(auto_word) |
| | |
| | unique_words = [] |
| | seen_words = set() |
| | for word in combined_words: |
| | if word and word not in seen_words: |
| | unique_words.append(word) |
| | seen_words.add(word) |
| | |
| | combined_negative_prompt = ", ".join(unique_words) |
| | else: |
| | combined_negative_prompt = auto_negatives |
| | print(f"ℹ️ Kein manueller Negativ-Prompt, verwende nur automatischen: {combined_negative_prompt}") |
| | |
| | print(f"✅ Finaler kombinierter Negativ-Prompt: {combined_negative_prompt}") |
| |
|
| | |
| | if mode == "face_only_change": |
| |
|
| | prompt_lower = prompt.lower() |
| | |
| | front_face_keywords = [ |
| | "portrait", "face", "eyes", "smile", "lips", "nose", "expression", |
| | "looking at camera", "frontal view", "headshot", "selfie", "close-up", |
| | "profile", "side view", "front", "frontal", "facing camera", "jawline" |
| | ] |
| | |
| | back_head_keywords = [ |
| | "back of head", "from behind", "rear view", "looking away", |
| | "turned away", "back view", "backside", "back", "rear", |
| | "hair only", "ponytail", "hairstyle", "hair", "back hair" |
| | ] |
| | |
| | |
| | is_front_face = any(keyword in prompt_lower for keyword in front_face_keywords) |
| | is_back_head = any(keyword in prompt_lower for keyword in back_head_keywords) |
| | |
| | |
| | if not is_front_face and not is_back_head: |
| | is_front_face = True |
| | print(" ℹ️ Keine Gesicht/Hinterkopf-Keywords → Standard: Gesicht vorne") |
| | |
| | print(f" 🎯 Gesichtserkenner für Boosters: Vorne={is_front_face}, Hinten={is_back_head}") |
| | |
| | |
| | if is_front_face and not is_back_head: |
| | face_boosters = "(perfect face:1.2), (symmetrical face:1.1), realistic shaded perfect face, " |
| | |
| | if not any(keyword in prompt_lower for keyword in |
| | ["perfect face", "symmetrical", "realistic face", "shaded face"]): |
| | enhanced_prompt = face_boosters + prompt |
| | print(f"👤 Gesichts-Booster hinzugefügt: {face_boosters}") |
| | else: |
| | enhanced_prompt = prompt |
| | print(f"👤 Benutzer hat bereits Gesichts-Booster im Prompt") |
| | else: |
| | |
| | enhanced_prompt = prompt |
| | |
| | if is_back_head: |
| | print(f"💇 Hinterkopf erkannt → Keine Gesichts-Booster") |
| | else: |
| | print(f"👤 Keine Gesichts-Booster (unspezifischer Prompt)") |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | elif mode == "focus_change": |
| | focus_boosters = "(sharp focus:1.2), (detailed subject:1.1), (clear foreground:1.1), " |
| | |
| | if not any(keyword in prompt.lower() for keyword in |
| | ["sharp focus", "detailed subject", "clear foreground", "well-defined"]): |
| | enhanced_prompt = focus_boosters + prompt |
| | print(f"🎯 Focus-Booster hinzugefügt: {focus_boosters}") |
| | else: |
| | enhanced_prompt = prompt |
| | print(f"🎯 Benutzer hat bereits Focus-Booster im Prompt") |
| | |
| | elif mode == "environment_change": |
| | background_boosters = "complete scene, full background, entire environment, " |
| | |
| | if not any(keyword in prompt.lower() for keyword in |
| | ["complete scene", "full background", "entire environment", "whole setting"]): |
| | enhanced_prompt = background_boosters + prompt |
| | print(f"🌳 Hintergrund-Booster hinzugefügt: {background_boosters}") |
| | else: |
| | enhanced_prompt = prompt |
| | print(f"🌳 Benutzer hat bereits Hintergrund-Booster im Prompt") |
| | else: |
| | enhanced_prompt = prompt |
| |
|
| | print(f"🎯 Finaler Prompt für {mode}: {enhanced_prompt}") |
| |
|
| | progress(0, desc="Starte Generierung...") |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if mode == "focus_change": |
| | |
| | |
| | |
| | |
| | keep_environment = False |
| | |
| | |
| | |
| | adj_strength = min(0.6, strength) |
| | |
| | |
| | |
| | |
| | controlnet_strength = 0.5 |
| |
|
| | |
| | pose_ratio = 0.7 |
| | canny_ratio = 0.3 |
| | |
| | |
| | prompt_lower = prompt.lower() |
| | |
| | |
| | humanoid_keywords = [ |
| | "anime", "cartoon", "manga", "witch", "wizard", "sorcerer", |
| | "alien", "elf", "fairy", "character", "fantasy", "superhero", |
| | "cyborg", "robot", "android", "santa", "person", "woman", "man", |
| | "girl", "boy", "child", "business", "suit", "professional", |
| | "sports", "athlete", "runner", "dancer", "portrait", "face" |
| | ] |
| | |
| | object_keywords = [ |
| | "car", "vehicle", "automobile", "chair", "table", "desk", |
| | "statue", "sculpture", "monument", "lamp", "bottle", "vase", |
| | "product", "object", "furniture", "device", "tool", "item", |
| | "building", "house", "tree", "plant", "rock", "stone" |
| | ] |
| | |
| | animal_keywords = [ |
| | "dog", "cat", "wolf", "lion", "tiger", "bear", "rabbit", |
| | "horse", "bird", "animal", "creature", "beast", "monkey", |
| | "elephant", "giraffe", "zebra", "deer", "fox", "pet" |
| | ] |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | if any(keyword in prompt_lower for keyword in humanoid_keywords): |
| | |
| | |
| | ui_strength = strength |
| |
|
| |
|
| | |
| | |
| | def smoothstep(min_val, max_val, x): |
| | x = max(0, min(1, (x - min_val) / (max_val - min_val))) |
| | return x * x * (3 - 2 * x) |
| | |
| | |
| | |
| | adj_strength = 0.15 + 0.8 * ui_strength |
| |
|
| |
|
| | if ui_strength <= 0.7: |
| | |
| | controlnet_strength = 0.85 - 0.83 * ui_strength |
| | else: |
| | |
| | t = (ui_strength - 0.7) / 0.2 |
| | controlnet_strength = 0.269 + 0.081 * t |
| |
|
| |
|
| | |
| | |
| | base_pose = 0.85 - 0.4 * smoothstep(0.4, 0.8, ui_strength) |
| |
|
| |
|
| | |
| | if ui_strength > 0.6: |
| | |
| | canny_reduction = smoothstep(0.6, 0.9, ui_strength) * 0.15 |
| | pose_ratio = min(0.60, base_pose + canny_reduction) |
| | else: |
| | pose_ratio = base_pose |
| | |
| | canny_ratio = 1.0 - pose_ratio |
| | |
| | |
| | |
| | adj_strength = max(0.15, min(adj_strength, 0.95)) |
| | controlnet_strength = max(0.12, min(controlnet_strength, 0.85)) |
| | pose_ratio = max(0.45, min(pose_ratio, 0.60)) |
| | canny_ratio = max(0.40, min(canny_ratio, 0.55)) |
| |
|
| | conditioning_scale = [ |
| | controlnet_strength * pose_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | |
| | print(f"👤 Humanoid → Humanoid (UI-Stärke: {ui_strength})") |
| | print(f" adj_strength: {adj_strength:.2f}, controlnet: {controlnet_strength:.2f}") |
| | print(f" Verhältnis: Pose {pose_ratio*100:.0f}% : Canny {canny_ratio*100:.0f}%") |
| | print(f" Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]") |
| | |
| | print(f"DEBUG UI={ui_strength}: smoothstep={smoothstep(0.4, 0.8, ui_strength):.3f}") |
| | print(f"DEBUG Pose vor Clipping: {0.85 - 0.4 * smoothstep(0.4, 0.8, ui_strength):.3f}") |
| | print(f"DEBUG Pose nach Clipping: {pose_ratio:.3f}") |
| | |
| | |
| | |
| | elif any(keyword in prompt_lower for keyword in object_keywords): |
| | adj_strength = min(0.7, strength * 1.15) |
| | controlnet_strength = 0.5 |
| | pose_ratio = 0.10 |
| | canny_ratio = 0.90 |
| |
|
| | conditioning_scale = [ |
| | controlnet_strength * pose_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | print("📦 Gegenstand → Gegenstand → Ratio 25:75 (Pose:Canny)") |
| | |
| | |
| | elif any(keyword in prompt_lower for keyword in animal_keywords): |
| | adj_strength = min(0.6, strength * 1.1) |
| | controlnet_strength = 0.5 |
| | pose_ratio = 0.5 |
| | canny_ratio = 0.5 |
| | print("🐾 Mensch → Tier → Ratio 50:50 (Pose:Canny)") |
| | |
| | |
| | conditioning_scale = [ |
| | controlnet_strength * pose_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| |
|
| | else: |
| | |
| | adj_strength = max(0.4, min(adj_strength, 0.8)) |
| | controlnet_strength = max(0.3, min(controlnet_strength, 0.7)) |
| | pose_ratio = max(0.5, min(pose_ratio, 0.8)) |
| | canny_ratio = max(0.2, min(canny_ratio, 0.5)) |
| | |
| | |
| | conditioning_scale = [ |
| | controlnet_strength * pose_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | |
| | |
| | print(f"🎯 MODUS: Focus verändern") |
| | print(f" Strength: {adj_strength}, ControlNet: {controlnet_strength}") |
| | print(f" OpenPose: {pose_ratio*100}%, Canny: {canny_ratio*100}%") |
| | print(f" Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]") |
| | |
| |
|
| | elif mode == "environment_change": |
| | |
| | |
| | |
| | |
| | keep_environment = True |
| |
|
| | ui_strength = strength |
| |
|
| | |
| | prompt_lower = prompt.lower() |
| |
|
| | |
| | |
| | adj_strength = 0.75 |
| | |
| | |
| | controlnet_strength = 0.55 |
| |
|
| | |
| | depth_ratio = 0.50 |
| | canny_ratio = 0.12 |
| |
|
| | |
| | |
| | |
| | nature_keywords = ["beach", "forest", "mountain", "ocean", "sky", "field", "landscape", "nature", "outdoor", "desert", "snow", "arctic"] |
| | interior_keywords = ["office", "room", "interior", "kitchen", "bedroom", "living room", "indoor", "wall", "furniture"] |
| | |
| | |
| | |
| | if any(keyword in prompt_lower for keyword in interior_keywords): |
| | |
| | adj_strength = 0.2 + (ui_strength * 0.5) |
| | controlnet_strength = 0.7 + (ui_strength * 0.2) |
| | canny_ratio = 0.8 + (ui_strength * 0.1) |
| | depth_ratio = 1.0 - canny_ratio |
| |
|
| | |
| | adj_strength = max(0.15, min(adj_strength, 0.7)) |
| | controlnet_strength = max(0.6, min(controlnet_strength, 0.95)) |
| | canny_ratio = max(0.7, min(canny_ratio, 0.95)) |
| | depth_ratio = max(0.05, min(depth_ratio, 0.3)) |
| | |
| | conditioning_scale = [ |
| | controlnet_strength * depth_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | |
| | print(f"🏠 INNENRÄUME: UI={ui_strength:.2f}") |
| | print(f" Strength: {adj_strength}, ControlNet: {controlnet_strength}") |
| | print(f" Depth: {depth_ratio*100:.0f}% (Maßstab), Canny: {canny_ratio*100:.0f}%") |
| | print(f" Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]") |
| | |
| | |
| | |
| | elif any(keyword in prompt_lower for keyword in nature_keywords): |
| |
|
| | |
| | adj_strength = 0.15 + (ui_strength * 0.75) |
| | |
| | |
| | controlnet_strength = 0.5 + (ui_strength * 0.25) |
| | |
| | |
| | |
| | depth_ratio = 0.9 - (ui_strength * 0.3) |
| | canny_ratio = 1.0 - depth_ratio |
| |
|
| | |
| | adj_strength = max(0.15, min(adj_strength, 0.95)) |
| | controlnet_strength = max(0.2, min(controlnet_strength, 0.6)) |
| | depth_ratio = max(0.5, min(depth_ratio, 0.95)) |
| | canny_ratio = max(0.05, min(canny_ratio, 0.5)) |
| | |
| | conditioning_scale = [ |
| | controlnet_strength * depth_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | |
| | print(f"🌳 NATURSZENE: UI={ui_strength:.2f}") |
| | print(f" Strength: {adj_strength}, ControlNet: {controlnet_strength}") |
| | print(f" Depth: {depth_ratio*100:.0f}% (Maßstab), Canny: {canny_ratio*100:.0f}%") |
| | print(f" Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]") |
| | |
| | else: |
| |
|
| | |
| | adj_strength = max(0.15, min(adj_strength, 0.7)) |
| | controlnet_strength = max(0.6, min(controlnet_strength, 0.95)) |
| | canny_ratio = max(0.7, min(canny_ratio, 0.95)) |
| | depth_ratio = max(0.05, min(depth_ratio, 0.3)) |
| |
|
| |
|
| | conditioning_scale = [ |
| | controlnet_strength * depth_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | |
| | print(f"🎯 STANDARD MODUS: Umgebung ändern") |
| | print(f" Strength: {adj_strength}, ControlNet: {controlnet_strength}") |
| | print(f" Depth: {depth_ratio*100}%, Canny: {canny_ratio*100}%") |
| | print(f" Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]") |
| | |
| | |
| | else: |
| | keep_environment = True |
| |
|
| | ui_strength = strength |
| | prompt_lower = prompt.lower() |
| |
|
| | |
| | adj_strength = 0.15 + (ui_strength * 0.75) |
| | |
| | |
| | controlnet_strength = 0.8 - (ui_strength * 0.6) |
| | |
| | |
| | depth_ratio = 0.8 - (ui_strength * 0.4) |
| | canny_ratio = 0.2 + (ui_strength * 0.3) |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | drawing_keywords = ["drawing", "illustration", "sketch", "painting", "artwork", "watercolor"] |
| | |
| |
|
| | |
| | |
| | anime_keywords = ["anime", "manga", "cartoon", "character", "chibi", "cel-shading", "lineart"] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | print(f" 🎯 Gesichtserkennung: Vorne={is_front_face}, Hinten={is_back_head}") |
| | |
| | if any(keyword in prompt_lower for keyword in anime_keywords): |
| | print("🎨 ANIME-TRANSFORM-MODUS") |
| | |
| | def smoothstep(min_val, max_val, x): |
| | x = max(0, min(1, (x - min_val) / (max_val - min_val))) |
| | return x * x * (3 - 2 * x) |
| | |
| | |
| | adj_strength = 0.30 + 0.55 * smoothstep(0.35, 0.9, ui_strength) |
| | adj_strength = max(0.3, min(adj_strength, 0.85)) |
| | |
| | controlnet_strength = 0.30 + 0.52 * smoothstep(0.65, 0.9, ui_strength) |
| | controlnet_strength = max(0.25, min(controlnet_strength, 0.85)) |
| | |
| | |
| | if is_front_face: |
| | |
| | depth_ratio = 0.65 + 0.15 * smoothstep(0.5, 0.9, ui_strength) |
| | canny_ratio = 1.0 - depth_ratio |
| | print(" 👤 Anime-Gesicht (vorne): Mehr Depth für 3D-Struktur") |
| | |
| | elif is_back_head: |
| | |
| | depth_ratio = 0.65 + 0.20 * smoothstep(0.5, 0.9, ui_strength) |
| | canny_ratio = 1.0 - depth_ratio |
| | |
| | |
| | if ui_strength <= 0.7: |
| | |
| | controlnet_strength = 0.30 + 0.52 * smoothstep(0.65, 0.9, ui_strength) |
| | else: |
| | |
| | |
| | boost_factor = (ui_strength - 0.7) / 0.2 |
| | controlnet_strength = 0.5 + (0.35 * boost_factor) |
| | |
| | |
| | controlnet_strength = max(0.3, min(controlnet_strength, 0.9)) |
| | |
| | print(f" 💇 Anime-Hinterkopf: Depth={depth_ratio:.2f}, ControlNet={controlnet_strength:.2f}") |
| | if ui_strength > 0.7: |
| | print(" ⚡ BOOST: ControlNet erhöht für bessere Strukturerhaltung") |
| | |
| | else: |
| | |
| | depth_ratio = 0.55 + 0.15 * smoothstep(0.5, 0.9, ui_strength) |
| | canny_ratio = 1.0 - depth_ratio |
| | |
| | conditioning_scale = [ |
| | controlnet_strength * depth_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| |
|
| | print(f"UI Strength: {ui_strength}") |
| | print(f"adj_strength: {adj_strength:.3f}") |
| | print(f"controlnet_strength: {controlnet_strength:.3f}") |
| | print(f"Depth: {depth_ratio*100:.1f}%, Canny: {canny_ratio*100:.1f}%") |
| | print(f"conditioning_scale: {conditioning_scale}") |
| |
|
| | |
| | elif any(keyword in prompt_lower for keyword in drawing_keywords): |
| | |
| | adj_strength = max(0.3, adj_strength * 0.9) |
| | |
| | |
| | controlnet_strength = min(0.9, controlnet_strength * 1.2) |
| | |
| | |
| | depth_ratio = min(0.9, depth_ratio * 1.2) |
| | canny_ratio = max(0.1, canny_ratio * 0.8) |
| |
|
| | |
| | adj_strength = max(0.15, min(adj_strength, 0.95)) |
| | controlnet_strength = max(0.1, min(controlnet_strength, 0.9)) |
| | depth_ratio = max(0.1, min(depth_ratio, 0.9)) |
| | canny_ratio = max(0.1, min(canny_ratio, 0.9)) |
| | |
| |
|
| | conditioning_scale = [ |
| | controlnet_strength * depth_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | |
| | print(" 📸 Drawing-Modus: Mehr Strukturerhalt") |
| | print(f" Strength: {adj_strength}, ControlNet: {controlnet_strength}") |
| | print(f" Depth: {depth_ratio*100}%, Canny: {canny_ratio*100}%") |
| | print(f" Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]") |
| | |
| | else: |
| | |
| | adj_strength = max(0.15, min(adj_strength, 0.95)) |
| | controlnet_strength = max(0.1, min(controlnet_strength, 0.9)) |
| | depth_ratio = max(0.1, min(depth_ratio, 0.9)) |
| | canny_ratio = max(0.1, min(canny_ratio, 0.9)) |
| |
|
| |
|
| | conditioning_scale = [ |
| | controlnet_strength * depth_ratio, |
| | controlnet_strength * canny_ratio |
| | ] |
| | |
| | print(" 📸 Standard-Modus: Mehr Strukturerhalt") |
| | print(f" Strength: {adj_strength}, ControlNet: {controlnet_strength}") |
| | print(f" Depth: {depth_ratio*100}%, Canny: {canny_ratio*100}%") |
| | print(f" Conditioning Scale: [{conditioning_scale[0]:.3f}, {conditioning_scale[1]:.3f}]") |
| | |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | |
| |
|
| | |
| | original_mask = None |
| | padding_info = None |
| | scaled_image = None |
| | scaled_mask = None |
| |
|
| | |
| | if bbox_x1 is not None and bbox_y1 is not None and bbox_x2 is not None and bbox_y2 is not None: |
| | print(f"🎯 BBox Koordinaten erhalten: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]") |
| | |
| | |
| | |
| | processed_mask, raw_mask, binary_mask = controlnet_processor.create_sam_mask( |
| | image=image, |
| | bbox_coords=(bbox_x1, bbox_y1, bbox_x2, bbox_y2), |
| | mode=mode, |
| | is_front_face=is_front_face, |
| | is_back_head=is_back_head |
| | ) |
| |
|
| | original_mask = processed_mask |
| | |
| | |
| | scaled_image, scaled_mask_inpaint, scaled_mask_composite, padding_info = scale_image_and_mask_together( |
| | image.convert("RGB"), |
| | binary_mask, |
| | original_mask, |
| | target_size=IMG_SIZE, |
| | bbox_coords=(bbox_x1, bbox_y1, bbox_x2, bbox_y2), |
| | mode=mode |
| | ) |
| | |
| |
|
| | print(f"✅ Gemeinsame Skalierung abgeschlossen") |
| | print(f" Original: {image.size} → Skaliert: {scaled_image.size}") |
| | else: |
| | |
| | print(f"ℹ️ Keine BBox angegeben → normales Img2Img (ohne Maske)") |
| | scaled_image = image.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS) |
| | scaled_mask = Image.new("L", (IMG_SIZE, IMG_SIZE), 255) |
| |
|
| | padding_info = None |
| |
|
| |
|
| | progress(0.1, desc="ControlNet läuft...") |
| |
|
| | |
| | print(f"📊 ControlNet Input Größe: {scaled_image.size}") |
| | |
| | controlnet_maps, debug_maps = controlnet_processor.prepare_controlnet_maps( |
| | image=scaled_image, |
| | keep_environment=keep_environment |
| | ) |
| |
|
| | print(f"✅ ControlNet Maps erstellt: {len(controlnet_maps)} Maps") |
| |
|
| | progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...") |
| |
|
| | |
| | pipe = load_img2img(keep_environment=keep_environment) |
| | |
| | |
| | adj_guidance = min(guidance_scale, 12.0) |
| | seed = random.randint(0, 2**32 - 1) |
| | generator = torch.Generator(device=device).manual_seed(seed) |
| | print(f"🌱 Inpaint Seed: {seed}") |
| |
|
| | |
| | callback = ImageToImageProgressCallback(progress, int(steps), adj_strength) |
| |
|
| | |
| | print(f"🔄 Führe ControlNet-gesteuertes Inpainting durch...") |
| | |
| |
|
| | result = pipe( |
| | prompt=enhanced_prompt, |
| | negative_prompt=combined_negative_prompt, |
| | image=scaled_image, |
| | mask_image=scaled_mask_inpaint, |
| | |
| | control_image=controlnet_maps, |
| | controlnet_conditioning_scale=conditioning_scale, |
| | strength=adj_strength, |
| | num_inference_steps=int(steps), |
| | guidance_scale=adj_guidance, |
| | generator=generator, |
| | callback_on_step_end=callback, |
| | callback_on_step_end_tensor_inputs=[], |
| | ) |
| | |
| | print("✅ ControlNet-Inpainting abgeschlossen") |
| |
|
| | |
| | generated_image = result.images[0] |
| |
|
| | |
| | if original_mask is not None and padding_info is not None: |
| | |
| | final_image = enhanced_composite_with_sam( |
| | original_image=image.convert("RGB"), |
| | inpaint_result=generated_image, |
| | original_mask=original_mask, |
| | padding_info=padding_info, |
| | bbox_coords=(bbox_x1, bbox_y1, bbox_x2, bbox_y2), |
| | mode=mode |
| | ) |
| | print(f"✅ Korrektes Compositing durchgeführt") |
| | else: |
| | |
| | final_image = generated_image |
| | mask_preview = Image.new("RGB", (512, 512), color="gray") |
| | raw_sam_mask_display = Image.new("RGB", (512, 512), color="gray") |
| | controlnet_map1 = Image.new("RGB", (512, 512), color="gray") |
| | controlnet_map2 = Image.new("RGB", (512, 512), color="gray") |
| | print(f"ℹ️ Keine Maske → Direkte Rückgabe des Bildes") |
| | |
| |
|
| | end_time = time.time() |
| | duration = end_time - start_time |
| | |
| | print(f"✅ Transformation abgeschlossen in {duration:.2f} Sekunden") |
| | print(f"🎯 Verwendeter Modus: {mode}") |
| | print(f"⚙️ ControlNet: {'Depth+Canny' if keep_environment else 'OpenPose+Canny'}") |
| | print(f"📊 Finale Bildgröße: {final_image.size}") |
| |
|
| | |
| | mask_preview = original_mask.convert("RGB") |
| | raw_sam_mask_display = raw_mask.convert("RGB") |
| |
|
| | |
| | if "pose" in debug_maps: |
| | controlnet_map1 = debug_maps["pose"] |
| | map1_label = "🎭 Pose Map" |
| | else: |
| | controlnet_map1 = debug_maps["depth"] |
| | map1_label = "🏔️ Depth Map" |
| |
|
| | controlnet_map2 = debug_maps["canny"] |
| |
|
| | |
| | return final_image, raw_sam_mask_display, mask_preview, controlnet_map1, controlnet_map2 |
| |
|
| |
|
| | except Exception as e: |
| | print(f"❌ Fehler in img_to_image: {e}") |
| | import traceback |
| | traceback.print_exc() |
| |
|
| | |
| | if image is not None: |
| | fallback_image = image.copy() |
| | else: |
| | fallback_image = Image.new("RGB", (512, 512), color="gray") |
| | |
| | return final_image, None, None, None, None |
| |
|
| |
|
| | def update_bbox_from_image(image): |
| | """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird""" |
| | if image is None: |
| | return None, None, None, None |
| | |
| | bbox = auto_detect_face_area(image) |
| | return bbox[0], bbox[1], bbox[2], bbox[3] |
| |
|
| | def update_model_settings(model_id): |
| | """Aktualisiert die empfohlenen Einstellungen basierend auf Modellauswahl""" |
| | config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"]) |
| | |
| | return ( |
| | config["recommended_steps"], |
| | config["recommended_cfg"], |
| | f"📊 Empfohlene Einstellungen: {config['recommended_steps']} Steps, CFG {config['recommended_cfg']}" |
| | ) |
| |
|
| | def main_ui(): |
| | """ |
| | HAUPT-UI (ANGEPASST FÜR 3 MODI) |
| | """ |
| | with gr.Blocks( |
| | title="AI Image Generator", |
| | theme=gr.themes.Base(), |
| | css=""" |
| | /* ===== INFO-BOXEN über Textboxen ===== */ |
| | .info-box { |
| | background: #f8fafc; |
| | padding: 8px 12px; |
| | border-radius: 6px; |
| | border: 2px solid #e2e8f0; |
| | margin-bottom: 6px; |
| | font-size: 12px; |
| | line-height: 1.3; |
| | min-height: 50px !important; |
| | height: 50px !important; |
| | display: flex !important; |
| | align-items: center; |
| | justify-content: flex-start !important; |
| | text-align: left; |
| | padding-left: 15px; |
| | overflow: hidden !important; /* KEIN Scroll */ |
| | border: none !important; |
| | } |
| | |
| | /* Linke Box (Prompt) - Blau */ |
| | .gr-column:first-child .info-box { |
| | border-left: 4px solid #3b82f6; |
| | background: #eff6ff; |
| | } |
| | |
| | /* Rechte Box (Negativ) - Rot */ |
| | .gr-column:last-child .info-box { |
| | border-left: 4px solid #ef4444; |
| | background: #fef2f2; |
| | } |
| | |
| | /* Code in Info-Boxen */ |
| | .info-box code { |
| | background: white; |
| | padding: 3px 3px; |
| | border-radius: 4px; |
| | font-family: monospace; |
| | font-size: 12px; |
| | border: 1px solid #e2e8f0; |
| | display: inline-block; |
| | margin: 3px 0; |
| | } |
| | |
| | |
| | /* ===== TEXTBOXEN ===== */ |
| | .prompt-box textarea { |
| | min-height: 90px !important; |
| | border-radius: 6px !important; |
| | border: 2px solid #e2e8f0 !important; |
| | padding: 10px !important; |
| | font-size: 14px !important; |
| | } |
| | |
| | /* Focus-State */ |
| | .prompt-box textarea:focus { |
| | border-color: #3b82f6 !important; |
| | outline: none !important; |
| | box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.1) !important; |
| | } |
| | |
| | /* Platzhalter */ |
| | .prompt-box textarea::placeholder { |
| | color: #94a3b8 !important; |
| | } |
| | |
| | |
| | .clickable-file { |
| | color: #1976d2; |
| | cursor: pointer; |
| | text-decoration: none; |
| | font-family: 'Monaco', 'Consolas', monospace; |
| | background: #e3f2fd; |
| | padding: 2px 6px; |
| | border-radius: 4px; |
| | border: 1px solid #bbdefb; |
| | } |
| | .clickable-file:hover { |
| | background: #bbdefb; |
| | text-decoration: underline; |
| | } |
| | .model-info-box { |
| | background: #e8f4fd; |
| | padding: 12px; |
| | border-radius: 6px; |
| | margin: 10px 0; |
| | border-left: 4px solid #2196f3; |
| | font-size: 14px; |
| | } |
| | #generate-button { |
| | background-color: #0080FF !important; |
| | border: none !important; |
| | margin: 20px auto !important; |
| | display: block !important; |
| | font-weight: 600; |
| | width: 280px; |
| | } |
| | #generate-button:hover { |
| | background-color: #0066CC !important; |
| | } |
| | .hint-box { |
| | margin-top: 20px; |
| | } |
| | .custom-text { |
| | font-size: 25px !important; |
| | } |
| | .image-upload .svelte-1p4f8co { |
| | display: block !important; |
| | } |
| | .preview-box { |
| | border: 2px dashed #ccc; |
| | padding: 10px; |
| | border-radius: 8px; |
| | margin: 10px 0; |
| | } |
| | .mode-red { |
| | border: 3px solid #ff4444 !important; |
| | } |
| | .mode-green { |
| | border: 3px solid #44ff44 !important; |
| | } |
| | .coordinate-sliders { |
| | background: #f8f9fa; |
| | padding: 15px; |
| | border-radius: 8px; |
| | margin: 10px 0; |
| | } |
| | .gr-checkbox .wrap .text-gray { |
| | font-size: 14px !important; |
| | font-weight: 600 !important; |
| | line-height: 1.4 !important; |
| | } |
| | .status-message { |
| | padding: 10px; |
| | border-radius: 5px; |
| | margin: 10px 0; |
| | text-align: center; |
| | font-weight: 500; |
| | } |
| | .status-success { |
| | background-color: #d4edda; |
| | color: #155724; |
| | border: 1px solid #c3e6cb; |
| | } |
| | .status-error { |
| | background-color: #f8d7da; |
| | color: #721c24; |
| | border: 1px solid #f5c6cb; |
| | } |
| | .radio-group { |
| | background: #f8f9fa; |
| | padding: 15px; |
| | border-radius: 8px; |
| | margin: 10px 0; |
| | border: 2px solid #e9ecef; |
| | } |
| | .radio-item { |
| | padding: 8px 12px; |
| | margin: 5px 0; |
| | border-radius: 4px; |
| | transition: background 0.3s; |
| | } |
| | .radio-item:hover { |
| | background: #e9ecef; |
| | } |
| | .radio-label { |
| | font-weight: 600; |
| | font-size: 14px; |
| | } |
| | .radio-description { |
| | font-size: 12px; |
| | color: #6c757d; |
| | margin-left: 24px; |
| | } |
| | """ |
| | ) as demo: |
| |
|
| | with gr.Column(visible=True) as content_area: |
| | with gr.Tab("Text zu Bild"): |
| | gr.Markdown("## 🎨 Text zu Bild Generator") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=2): |
| | |
| | model_dropdown = gr.Dropdown( |
| | choices=[ |
| | (config["name"], model_id) |
| | for model_id, config in MODEL_CONFIGS.items() |
| | ], |
| | value="runwayml/stable-diffusion-v1-5", |
| | label="📁 Modellauswahl", |
| | info="🏠 Universal vs 👤 Portraits" |
| | ) |
| | |
| | |
| | model_info_box = gr.Markdown( |
| | value="<div class='model-info-box'>" |
| | "**🏠 Stable Diffusion 1.5 (Universal)**<br>" |
| | "Universal model, good all-rounder, reliable results<br>" |
| | "Empfohlene Einstellungen: 35 Steps, CFG 7.5" |
| | "</div>", |
| | label="Modellinformationen" |
| | ) |
| | |
| | with gr.Column(scale=3): |
| | txt_input = gr.Textbox( |
| | placeholder="z.B. ultra realistic mountain landscape at sunrise, soft mist over the valley, detailed foliage, crisp textures, depth of field, sunlight rays through clouds, shot on medium format camera, 8k, HDR, hyper-detailed, natural lighting, masterpiece", |
| | lines=3, |
| | label="🎯 Prompt (Englisch)", |
| | info="Beschreibe detailliert, was du sehen möchtest. Negative Prompts werden automatisch generiert." |
| | ) |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | txt_steps = gr.Slider( |
| | minimum=10, maximum=100, value=35, step=1, |
| | label="⚙️ Inferenz-Schritte", |
| | info="Mehr Schritte = bessere Qualität, aber langsamer (20-50 empfohlen)" |
| | ) |
| | with gr.Column(): |
| | txt_guidance = gr.Slider( |
| | minimum=1.0, maximum=20.0, value=7.5, step=0.5, |
| | label="🎛️ Prompt-Stärke (CFG Scale)", |
| | info="Wie stark der Prompt befolgt wird (7-12 für gute Balance)" |
| | ) |
| | |
| | |
| | status_output = gr.Markdown( |
| | value="", |
| | elem_classes="status-message" |
| | ) |
| | |
| | generate_btn = gr.Button("🚀 Bild generieren", variant="primary", elem_id="generate-button") |
| | |
| | with gr.Row(): |
| | txt_output = gr.Image( |
| | label="🖼️ Generiertes Bild", |
| | show_download_button=True, |
| | type="pil", |
| | height=400 |
| | ) |
| | |
| | |
| | def update_model_info(model_id): |
| | config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"]) |
| | info_html = f""" |
| | <div class='model-info-box'> |
| | <strong>{config['name']}</strong><br> |
| | {config['description']}<br> |
| | <em>Empfohlene Einstellungen: {config['recommended_steps']} Steps, CFG {config['recommended_cfg']}</em> |
| | </div> |
| | """ |
| | return info_html, config["recommended_steps"], config["recommended_cfg"] |
| | |
| | model_dropdown.change( |
| | fn=update_model_info, |
| | inputs=[model_dropdown], |
| | outputs=[model_info_box, txt_steps, txt_guidance] |
| | ) |
| | |
| | generate_btn.click( |
| | fn=text_to_image, |
| | inputs=[txt_input, model_dropdown, txt_steps, txt_guidance], |
| | outputs=[txt_output, status_output], |
| | concurrency_limit=1 |
| | ) |
| |
|
| | with gr.Tab("Bild zu Bild"): |
| | gr.Markdown("## 🖼️ Bild zu Bild Transformation (3 MODI)") |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | img_input = gr.Image( |
| | type="pil", |
| | label="📤 Eingabebild", |
| | height=300, |
| | sources=["upload"], |
| | elem_id="image-upload" |
| | ) |
| | with gr.Column(): |
| | preview_output = gr.Image( |
| | label="🎯 Live-Vorschau mit Maske", |
| | height=300, |
| | interactive=False, |
| | show_download_button=False |
| | ) |
| | |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | gr.Markdown("### 🎛️ Transformations-Modus") |
| | |
| | |
| | mode_radio = gr.Radio( |
| | choices=[ |
| | ("🌳 Umgebung ändern", "environment_change"), |
| | ("🎯 Focus verändern", "focus_change"), |
| | ("👤 Ausschließlich Gesicht", "face_only_change") |
| | ], |
| | value="environment_change", |
| | label="Wähle den Transformationsmodus:", |
| | info="Steuert, welcher Teil des Bildes verändert wird", |
| | elem_classes="radio-group" |
| | ) |
| | |
| | |
| | gr.Markdown(""" |
| | <div style="font-size: 12px; color: #666; margin-top: 10px;"> |
| | <strong>Modus-Erklärungen:</strong><br> |
| | • <strong>🌳 Umgebung ändern:</strong> Ändert alles AUSSER dem markierten Bereich (Depth+Canny)<br> |
| | • <strong>🎯 Focus verändern:</strong> Ändert markierten Bereich+Körper (OpenPose+Canny)<br> |
| | • <strong>👤 Ausschließlich Gesicht:</strong> Ändert NUR den markierten Bereich (Depth+Canny) |
| | </div> |
| | """) |
| | |
| | with gr.Row(): |
| | gr.Markdown("### 📐 Bildelementbereich anpassen") |
| | |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | bbox_x1 = gr.Slider( |
| | label="← Links (x1)", |
| | minimum=0, maximum=MAX_IMAGE_SIZE, value=100, step=1, |
| | info="Linke Kante des Bildelementbereichs" |
| | ) |
| | with gr.Column(): |
| | bbox_y1 = gr.Slider( |
| | label="↑ Oben (y1)", |
| | minimum=0, maximum=MAX_IMAGE_SIZE, value=100, step=1, |
| | info="Obere Kante des Bildelementbereichs" |
| | ) |
| | with gr.Row(): |
| | with gr.Column(): |
| | bbox_x2 = gr.Slider( |
| | label="→ Rechts (x2)", |
| | minimum=0, maximum=MAX_IMAGE_SIZE, value=300, step=1, |
| | info="Rechte Kante des Bildelementbereichs" |
| | ) |
| | with gr.Column(): |
| | bbox_y2 = gr.Slider( |
| | label="↓ Unten (y2)", |
| | minimum=0, maximum=MAX_IMAGE_SIZE, value=300, step=1, |
| | info="Untere Kante des Bildelementbereichs" |
| | ) |
| |
|
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | |
| | pos_info = gr.Markdown( |
| | value="`[STIL-MOTIV], [UMGEBUNG], [VOR/HINTERGRUND], [DETAILS], [QUALITÄT], [BELEUCHTUNG]`", |
| | elem_classes=["info-box"] |
| | ) |
| | img_prompt = gr.Textbox( |
| | placeholder="photorealistic coastal beach, keep person unchanged, high detailed, diffused light", |
| | lines=2, |
| | label="🎯 Transformations-Prompt (Englisch)", |
| | |
| | elem_classes=["prompt-box"] |
| | ) |
| | with gr.Column(): |
| | |
| | neg_info = gr.Markdown( |
| | value="`[GESICHTER/ANATOMIE], [FEHLER], [QUALITÄT], [UNERWÜNSCHTES]`", |
| | elem_classes=["info-box"] |
| | ) |
| | img_neg_prompt = gr.Textbox( |
| | placeholder="blurry face, deformed anatomy, ugly, extra limbs, poorly drawn hands", |
| | lines=2, |
| | label="🚫 Negativ-Prompt (Englisch)", |
| | |
| | elem_classes=["prompt-box"] |
| | ) |
| | |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | strength_slider = gr.Slider( |
| | minimum=0.1, maximum=0.9, value=0.4, step=0.05, |
| | label="💪 Veränderungs-Stärke (strength)", |
| | info="0.1-0.3: Leichte Anpassungen, 0.4-0.6: Mittlere Veränderungen, 0.7-0.9: Starke Umgestaltung" |
| | ) |
| | with gr.Column(): |
| | img_steps = gr.Slider( |
| | minimum=10, maximum=45, value=35, step=1, |
| | label="⚙️ Inferenz-Schritte", |
| | info="Anzahl der Verarbeitungsschritte (25-45 für gute Ergebnisse)" |
| | ) |
| | with gr.Column(): |
| | img_guidance = gr.Slider( |
| | minimum=1.0, maximum=15.0, value=7.5, step=0.5, |
| | label="🎛️ Prompt-Stärke (guidance)", |
| | info="Einfluss des Prompts auf das Ergebnis (6-10 für natürliche Ergebnisse)" |
| | ) |
| |
|
| | with gr.Row(): |
| | gr.Markdown( |
| | "### 📋 Hinweise:\n" |
| | "• **🆕 3 Transformations-Modi** für präzise Kontrolle\n" |
| | "• **🆕 Unterstützt Bilder bis 4096×4096 Pixel**\n" |
| | "• **🆕 Automatische Bildelementerkennung** setzt Koordinaten beim Upload\n" |
| | "• **🆕 Live-Vorschau** zeigt farbige Rahmen je nach Modus\n" |
| | "• **🆕 Dynamische Koordinaten-Schieberegler** passen sich an Bildgröße an\n" |
| | "• **ControlNet-Technologie** für konsistente Ergebnisse\n" |
| | "• **Automatische Negative Prompts** für bessere Qualität\n" |
| | "• **KORREKTER COMPOSITING-WORKFLOW** – nur bearbeiteter Bereich wird eingefügt\n" |
| | "• **Ausgabe in Eingabebildgröße" |
| | ) |
| | |
| | transform_btn = gr.Button("🔄 Bild transformieren", variant="primary") |
| |
|
| |
|
| | with gr.Row(): |
| | img_output = gr.Image( |
| | label="✨ Transformiertes Bild", |
| | show_download_button=True, |
| | type="pil", |
| | height=400 |
| | ) |
| | |
| | with gr.Row(): |
| | sam_raw_mask_output = gr.Image( |
| | label="🔍 SAM-Rohmaske (Vor Nachbearbeitung)", |
| | type="pil", |
| | height=300, |
| | show_download_button=False |
| | ) |
| | processed_mask_output = gr.Image( |
| | label="🛠️ Nachbearbeitete Maske (Für Inpainting)", |
| | type="pil", |
| | height=300, |
| | show_download_button=False |
| | ) |
| |
|
| | with gr.Row(): |
| | pose_map_output = gr.Image( |
| | label="🎭 Pose/Depth Map", |
| | type="pil", |
| | height=300, |
| | show_download_button=False |
| | ) |
| | canny_map_output = gr.Image( |
| | label="📐 Canny Edge Map", |
| | type="pil", |
| | height=300, |
| | show_download_button=False |
| | ) |
| | |
| |
|
| | |
| | |
| | img_input.upload( |
| | fn=process_image_upload, |
| | inputs=[img_input], |
| | outputs=[preview_output, bbox_x1, bbox_y1, bbox_x2, bbox_y2] |
| | ).then( |
| | fn=update_slider_for_image, |
| | inputs=[img_input], |
| | outputs=[bbox_x1, bbox_y1, bbox_x2, bbox_y2] |
| | ) |
| | |
| | |
| | coordinate_inputs = [img_input, bbox_x1, bbox_y1, bbox_x2, bbox_y2, mode_radio] |
| | |
| | |
| | for slider in [bbox_x1, bbox_y1, bbox_x2, bbox_y2]: |
| | slider.release( |
| | fn=update_live_preview, |
| | inputs=coordinate_inputs, |
| | outputs=preview_output |
| | ) |
| |
|
| |
|
| | |
| | def update_info(mode): |
| | if mode == "environment_change": |
| | return ( |
| | "`[STIL-MOTIV], [UMGEBUNG], [VOR/HINTERGRUND], [DETAILS], [QUALITÄT], [BELEUCHTUNG]`", |
| | "`[GESICHTER/ANATOMIE], [FEHLER], [QUALITÄT], [UNERWÜNSCHTES]`" |
| | ) |
| | |
| | elif mode == "focus_change": |
| | return ( |
| | "`[GESICHTSBESCHREIBUNG], [KLEIDUNG], [POSITION], [DETAILS], [STIL]`", |
| | "`[DEFORMIERT], [UNSCHÄRFE], [ANATOMIEFEHLER], [UNERWÜNSCHTES]`" |
| | ) |
| | |
| | else: |
| | return ( |
| | "`[HAARFARBE], [AUGEN], [GESICHTSAUSDRUCK], [DETAILS], [BELEUCHTUNG]`", |
| | "`[UNREALISTISCH], [ASYMETRISCH], [FEHLER], [UNERWÜNSCHTES]`" |
| | ) |
| | |
| |
|
| |
|
| | |
| | mode_radio.change( |
| | fn=update_info, |
| | inputs=[mode_radio], |
| | outputs=[pos_info, neg_info] |
| | ) |
| |
|
| |
|
| | |
| | mode_radio.change( |
| | fn=update_live_preview, |
| | inputs=coordinate_inputs, |
| | outputs=preview_output |
| | ) |
| | |
| | |
| | transform_btn.click( |
| | fn=img_to_image, |
| | inputs=[ |
| | img_input, img_prompt, img_neg_prompt, |
| | strength_slider, img_steps, img_guidance, |
| | mode_radio, bbox_x1, bbox_y1, bbox_x2, bbox_y2 |
| | ], |
| | outputs=[img_output, sam_raw_mask_output, processed_mask_output, pose_map_output, canny_map_output], |
| | concurrency_limit=1 |
| | ) |
| |
|
| | return demo |