Update app.py
Browse files
app.py
CHANGED
|
@@ -25,6 +25,7 @@ MODEL_CONFIGS = {
|
|
| 25 |
"name": "🏠 Stable Diffusion 1.5 (Universal)",
|
| 26 |
"description": "Universal model, good all-rounder, reliable results",
|
| 27 |
"requires_vae": False,
|
|
|
|
| 28 |
"recommended_steps": 35,
|
| 29 |
"recommended_cfg": 7.5,
|
| 30 |
"supports_fp16": True
|
|
@@ -163,6 +164,136 @@ def create_face_mask(image, bbox_coords, mode):
|
|
| 163 |
|
| 164 |
return mask
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
def auto_detect_face_area(image):
|
| 167 |
"""Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken"""
|
| 168 |
width, height = image.size
|
|
@@ -190,7 +321,6 @@ def load_txt2img(model_id):
|
|
| 190 |
"""Lädt das Text-to-Image Modell basierend auf der Auswahl"""
|
| 191 |
global pipe_txt2img, current_pipe_model_id
|
| 192 |
|
| 193 |
-
# Wenn bereits das richtige Modell geladen ist, nichts tun
|
| 194 |
if pipe_txt2img is not None and current_pipe_model_id == model_id:
|
| 195 |
print(f"✅ Modell {model_id} bereits geladen")
|
| 196 |
return pipe_txt2img
|
|
@@ -217,16 +347,14 @@ def load_txt2img(model_id):
|
|
| 217 |
print("ℹ️ Versuche ohne VAE weiter...")
|
| 218 |
vae = None
|
| 219 |
|
| 220 |
-
# Modellparameter basierend auf Modelltyp
|
| 221 |
model_params = {
|
| 222 |
"torch_dtype": torch_dtype,
|
| 223 |
"safety_checker": None,
|
| 224 |
"requires_safety_checker": False,
|
| 225 |
"add_watermarker": False,
|
| 226 |
-
"allow_pickle": True,
|
| 227 |
}
|
| 228 |
|
| 229 |
-
# SAFETENSORS LOGIK
|
| 230 |
if model_id in SAFETENSORS_MODELS:
|
| 231 |
model_params["use_safetensors"] = True
|
| 232 |
print(f"ℹ️ Verwende safetensors für {model_id}")
|
|
@@ -234,14 +362,12 @@ def load_txt2img(model_id):
|
|
| 234 |
model_params["use_safetensors"] = False
|
| 235 |
print(f"ℹ️ Verwende .bin weights für {model_id}")
|
| 236 |
|
| 237 |
-
# FP16 Variante nur wenn Modell sie unterstützt UND wir auf GPU sind
|
| 238 |
if config.get("supports_fp16", False) and torch_dtype == torch.float16:
|
| 239 |
model_params["variant"] = "fp16"
|
| 240 |
print("ℹ️ Verwende FP16 Variante")
|
| 241 |
else:
|
| 242 |
print("ℹ️ Verwende Standard Variante (kein FP16)")
|
| 243 |
|
| 244 |
-
# VAE nur wenn nicht None
|
| 245 |
if vae is not None:
|
| 246 |
model_params["vae"] = vae
|
| 247 |
|
|
@@ -251,10 +377,8 @@ def load_txt2img(model_id):
|
|
| 251 |
**model_params
|
| 252 |
).to(device)
|
| 253 |
|
| 254 |
-
# SICHERER SCHEDULER-HANDLING
|
| 255 |
print("⚙️ Konfiguriere Scheduler...")
|
| 256 |
|
| 257 |
-
# Prüfe ob Scheduler existiert
|
| 258 |
if pipe_txt2img.scheduler is None:
|
| 259 |
print("⚠️ Scheduler ist None, setze Standard-Scheduler")
|
| 260 |
pipe_txt2img.scheduler = PNDMScheduler.from_pretrained(
|
|
@@ -262,13 +386,10 @@ def load_txt2img(model_id):
|
|
| 262 |
subfolder="scheduler"
|
| 263 |
)
|
| 264 |
|
| 265 |
-
# Versuche DPM-Solver zu verwenden (bessere Ergebnisse)
|
| 266 |
try:
|
| 267 |
-
# Hole die Scheduler-Konfiguration
|
| 268 |
if hasattr(pipe_txt2img.scheduler, 'config'):
|
| 269 |
scheduler_config = pipe_txt2img.scheduler.config
|
| 270 |
else:
|
| 271 |
-
# Fallback-Konfiguration für Scheduler
|
| 272 |
scheduler_config = {
|
| 273 |
"beta_start": 0.00085,
|
| 274 |
"beta_end": 0.012,
|
|
@@ -279,7 +400,6 @@ def load_txt2img(model_id):
|
|
| 279 |
}
|
| 280 |
print("⚠️ Keine Scheduler-Konfig gefunden, verwende Standard")
|
| 281 |
|
| 282 |
-
# Setze DPM-Solver Scheduler
|
| 283 |
pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
|
| 284 |
scheduler_config,
|
| 285 |
use_karras_sigmas=True,
|
|
@@ -291,11 +411,9 @@ def load_txt2img(model_id):
|
|
| 291 |
print(f"⚠️ Konnte DPM-Scheduler nicht setzen: {scheduler_error}")
|
| 292 |
print("ℹ️ Verwende Standard-Scheduler weiter")
|
| 293 |
|
| 294 |
-
# Optimierungen
|
| 295 |
pipe_txt2img.enable_attention_slicing()
|
| 296 |
print("✅ Attention Slicing aktiviert")
|
| 297 |
|
| 298 |
-
# VAE Slicing nur wenn VAE existiert
|
| 299 |
if hasattr(pipe_txt2img, 'vae') and pipe_txt2img.vae is not None:
|
| 300 |
try:
|
| 301 |
pipe_txt2img.enable_vae_slicing()
|
|
@@ -319,7 +437,6 @@ def load_txt2img(model_id):
|
|
| 319 |
traceback.print_exc()
|
| 320 |
print("🔄 Fallback auf SD 1.5...")
|
| 321 |
|
| 322 |
-
# Fallback auf Standard SD 1.5
|
| 323 |
try:
|
| 324 |
pipe_txt2img = StableDiffusionPipeline.from_pretrained(
|
| 325 |
"runwayml/stable-diffusion-v1-5",
|
|
@@ -348,7 +465,6 @@ def load_img2img():
|
|
| 348 |
safety_checker=None,
|
| 349 |
).to(device)
|
| 350 |
|
| 351 |
-
# WICHTIG: Behalte DPMSolver++ Scheduler bei (beste Qualität für Inpainting)
|
| 352 |
pipe_img2img.scheduler = DPMSolverMultistepScheduler.from_config(
|
| 353 |
pipe_img2img.scheduler.config,
|
| 354 |
algorithm_type="sde-dpmsolver++",
|
|
@@ -543,114 +659,17 @@ def update_slider_for_image(image):
|
|
| 543 |
gr.update(maximum=max_height)
|
| 544 |
)
|
| 545 |
|
| 546 |
-
# ===
|
| 547 |
-
def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()):
|
| 548 |
-
try:
|
| 549 |
-
if not prompt or not prompt.strip():
|
| 550 |
-
return None, "Bitte einen Prompt eingeben"
|
| 551 |
-
|
| 552 |
-
print(f"🚀 Starte Generierung mit Modell: {model_id}")
|
| 553 |
-
print(f"📝 Prompt: {prompt}")
|
| 554 |
-
|
| 555 |
-
# Automatische negative Prompts generieren
|
| 556 |
-
auto_negatives = auto_negative_prompt(prompt)
|
| 557 |
-
print(f"🤖 Automatisch generierte Negative Prompts: {auto_negatives}")
|
| 558 |
-
|
| 559 |
-
start_time = time.time()
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
# Liste von Qualitätswörtern/Gewichten, die auf Benutzereingaben prüfen
|
| 563 |
-
quality_keywords = ['masterpiece', 'best quality', 'high quality', 'highly detailed',
|
| 564 |
-
'exquisite', 'ultra detailed', 'professional',
|
| 565 |
-
'perfect', 'excellent', 'amazing', 'stunning', 'beautiful']
|
| 566 |
-
|
| 567 |
-
# Prüfe, ob der Benutzer bereits Qualitätswörter/Gewichte verwendet hat
|
| 568 |
-
user_has_quality_words = False
|
| 569 |
-
|
| 570 |
-
# Konvertiere Prompt zu Kleinbuchstaben für die Prüfung
|
| 571 |
-
prompt_lower = prompt.lower()
|
| 572 |
-
|
| 573 |
-
# Prüfe auf einfache Qualitätswörter
|
| 574 |
-
for keyword in quality_keywords:
|
| 575 |
-
if keyword in prompt_lower:
|
| 576 |
-
user_has_quality_words = True
|
| 577 |
-
print(f"✓ Benutzer verwendet bereits Qualitätswort: {keyword}")
|
| 578 |
-
break
|
| 579 |
-
|
| 580 |
-
# Prüfe auf Gewichte (z.B. (word:1.5), [word], etc.)
|
| 581 |
-
weight_patterns = [r'\([^)]+:\d+(\.\d+)?\)', r'\[[^\]]+\]']
|
| 582 |
-
for pattern in weight_patterns:
|
| 583 |
-
if re.search(pattern, prompt):
|
| 584 |
-
user_has_quality_words = True
|
| 585 |
-
print("✓ Benutzer verwendet bereits Gewichte im Prompt")
|
| 586 |
-
break
|
| 587 |
-
|
| 588 |
-
# Prompt basierend auf Prüfung anpassen
|
| 589 |
-
if not user_has_quality_words:
|
| 590 |
-
enhanced_prompt = f"masterpiece, best quality, {prompt}"
|
| 591 |
-
print(f"🔄 Verbesserter Prompt: {enhanced_prompt}")
|
| 592 |
-
else:
|
| 593 |
-
enhanced_prompt = prompt
|
| 594 |
-
print("✓ Benutzerprompt wird unverändert verwendet")
|
| 595 |
-
|
| 596 |
-
print(f"Finaler Prompt für Generation: {enhanced_prompt}")
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
progress(0, desc="Lade Modell...")
|
| 601 |
-
pipe = load_txt2img(model_id)
|
| 602 |
-
|
| 603 |
-
seed = random.randint(0, 2**32 - 1)
|
| 604 |
-
generator = torch.Generator(device=device).manual_seed(seed)
|
| 605 |
-
print(f"🌱 Seed: {seed}")
|
| 606 |
-
|
| 607 |
-
callback = TextToImageProgressCallback(progress, steps)
|
| 608 |
-
|
| 609 |
-
print(f"⚙️ Einstellungen: Steps={steps}, CFG={guidance_scale}")
|
| 610 |
-
|
| 611 |
-
image = pipe(
|
| 612 |
-
prompt=enhanced_prompt,
|
| 613 |
-
negative_prompt=auto_negatives,
|
| 614 |
-
height=512,
|
| 615 |
-
width=512,
|
| 616 |
-
num_inference_steps=int(steps),
|
| 617 |
-
guidance_scale=guidance_scale,
|
| 618 |
-
generator=generator,
|
| 619 |
-
callback_on_step_end=callback,
|
| 620 |
-
callback_on_step_end_tensor_inputs=[],
|
| 621 |
-
).images[0]
|
| 622 |
-
|
| 623 |
-
end_time = time.time()
|
| 624 |
-
duration = end_time - start_time
|
| 625 |
-
print(f"✅ Bild generiert in {duration:.2f} Sekunden")
|
| 626 |
-
|
| 627 |
-
config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
|
| 628 |
-
status_msg = f"✅ Generiert mit {config['name']} in {duration:.1f}s"
|
| 629 |
-
|
| 630 |
-
return image, status_msg
|
| 631 |
-
|
| 632 |
-
except Exception as e:
|
| 633 |
-
error_msg = f"❌ Fehler: {str(e)}"
|
| 634 |
-
print(f"❌ Fehler in text_to_image: {e}")
|
| 635 |
-
import traceback
|
| 636 |
-
traceback.print_exc()
|
| 637 |
-
return None, error_msg
|
| 638 |
-
|
| 639 |
-
|
| 640 |
def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
| 641 |
mode, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
|
| 642 |
progress=gr.Progress()):
|
| 643 |
"""
|
| 644 |
-
HAUPTFUNKTION FÜR BILD-zu-BILD
|
| 645 |
|
| 646 |
-
WICHTIG:
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
keep_environment Parameter für ControlNet:
|
| 652 |
-
- True für "environment_change" und "face_only_change" (Depth+Canny)
|
| 653 |
-
- False für "focus_change" (OpenPose+Canny)
|
| 654 |
"""
|
| 655 |
try:
|
| 656 |
if image is None:
|
|
@@ -664,8 +683,7 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
|
| 664 |
print(f"📝 Prompt: {prompt}")
|
| 665 |
print(f"🚫 Negativ-Prompt: {neg_prompt}")
|
| 666 |
|
| 667 |
-
|
| 668 |
-
# ===== AUTOMATISCHEN NEGATIV-PROMPT GENERIEREN =====
|
| 669 |
auto_negatives = auto_negative_prompt(prompt)
|
| 670 |
print(f"🤖 Automatisch generierter Negativ-Prompt: {auto_negatives}")
|
| 671 |
|
|
@@ -673,23 +691,18 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
|
| 673 |
combined_negative_prompt = ""
|
| 674 |
|
| 675 |
if neg_prompt and neg_prompt.strip():
|
| 676 |
-
# Benutzer hat einen Negativ-Prompt eingegeben
|
| 677 |
user_neg = neg_prompt.strip()
|
| 678 |
print(f"👤 Benutzer Negativ-Prompt: {user_neg}")
|
| 679 |
|
| 680 |
-
# Entferne Duplikate zwischen automatischen und manuellen Prompts
|
| 681 |
user_words = [word.strip().lower() for word in user_neg.split(",")]
|
| 682 |
auto_words = [word.strip().lower() for word in auto_negatives.split(",")]
|
| 683 |
|
| 684 |
-
# Starte mit dem Benutzer-Prompt
|
| 685 |
combined_words = user_words.copy()
|
| 686 |
|
| 687 |
-
# Füge automatische Wörter hinzu, die nicht bereits vorhanden sind
|
| 688 |
for auto_word in auto_words:
|
| 689 |
if auto_word and auto_word not in user_words:
|
| 690 |
combined_words.append(auto_word)
|
| 691 |
|
| 692 |
-
# Zusammenfügen und Duplikate entfernen
|
| 693 |
unique_words = []
|
| 694 |
seen_words = set()
|
| 695 |
for word in combined_words:
|
|
@@ -699,19 +712,15 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
|
| 699 |
|
| 700 |
combined_negative_prompt = ", ".join(unique_words)
|
| 701 |
else:
|
| 702 |
-
# Kein Benutzer-Prompt, verwende nur den automatischen
|
| 703 |
combined_negative_prompt = auto_negatives
|
| 704 |
print(f"ℹ️ Kein manueller Negativ-Prompt, verwende nur automatischen: {combined_negative_prompt}")
|
| 705 |
|
| 706 |
print(f"✅ Finaler kombinierter Negativ-Prompt: {combined_negative_prompt}")
|
| 707 |
|
| 708 |
-
|
| 709 |
# ===== GESICHTS-SPEZIFISCHE BOOSTER FÜR NUR-GESICHT MODUS =====
|
| 710 |
if mode == "face_only_change":
|
| 711 |
-
# Gesichtsspezifische Booster hinzufügen
|
| 712 |
face_boosters = "(perfect face:1.2), (symmetrical face:1.1), realistic shaded perfect face, "
|
| 713 |
|
| 714 |
-
# Prüfen ob Prompts schon gewichtete Face-Booster haben
|
| 715 |
if not any(keyword in prompt.lower() for keyword in
|
| 716 |
["perfect face", "symmetrical", "realistic face", "shaded face"]):
|
| 717 |
enhanced_prompt = face_boosters + prompt
|
|
@@ -720,15 +729,12 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
|
| 720 |
enhanced_prompt = prompt
|
| 721 |
print(f"👤 Benutzer hat bereits Gesichts-Booster im Prompt")
|
| 722 |
else:
|
| 723 |
-
# Für andere Modi: Normaler Prompt
|
| 724 |
enhanced_prompt = prompt
|
| 725 |
|
| 726 |
# ===== HINTERGRUND-BOOSTER FÜR UMGEBUNGS-ÄNDERUNG =====
|
| 727 |
if mode == "environment_change":
|
| 728 |
-
# Booster für komplett neue Umgebung
|
| 729 |
background_boosters = "complete scene, full background, entire environment, "
|
| 730 |
|
| 731 |
-
# Prüfe ob schon Booster enthalten
|
| 732 |
if not any(keyword in enhanced_prompt.lower() for keyword in
|
| 733 |
["complete scene", "full background", "entire environment", "whole setting"]):
|
| 734 |
enhanced_prompt = background_boosters + enhanced_prompt
|
|
@@ -736,34 +742,24 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
|
| 736 |
|
| 737 |
print(f"🎯 Finaler Prompt für {mode}: {enhanced_prompt}")
|
| 738 |
|
| 739 |
-
|
| 740 |
progress(0, desc="Starte Generierung mit ControlNet...")
|
| 741 |
|
| 742 |
-
|
| 743 |
# ===== MODUS-SPEZIFISCHE EINSTELLUNGEN =====
|
| 744 |
adj_strength = min(0.85, strength * 1.25)
|
| 745 |
|
| 746 |
-
#controlnet_strength zwischen 0.15-0.255 ->prompt dominiert
|
| 747 |
-
#controlnet_strength zwischen 0.35-0.595 → ControlNet dominiert (Prompt begrenzt)
|
| 748 |
-
#controlnet_strength zwischen 0.25-0.425 → Balance (50/50)
|
| 749 |
-
|
| 750 |
-
# Bestimme keep_environment basierend auf Modus
|
| 751 |
if mode == "focus_change":
|
| 752 |
-
# MODUS 2: Focus verändern → OpenPose + Canny
|
| 753 |
keep_environment = False
|
| 754 |
-
controlnet_strength = adj_strength * 0.7
|
| 755 |
print(f"🎯 MODUS: Focus verändern → OpenPose+Canny (keep_environment=False)")
|
| 756 |
|
| 757 |
elif mode == "environment_change":
|
| 758 |
-
# MODUS 1: Umgebung ändern → Depth + Canny
|
| 759 |
keep_environment = True
|
| 760 |
-
controlnet_strength = adj_strength * 0.3
|
| 761 |
print(f"🎯 MODUS: Umgebung ändern → Depth+Canny (keep_environment=True)")
|
| 762 |
|
| 763 |
else: # face_only_change
|
| 764 |
-
# MODUS 3: Ausschließlich Gesicht → Depth + Canny
|
| 765 |
keep_environment = True
|
| 766 |
-
controlnet_strength = adj_strength * 0.5
|
| 767 |
print(f"🎯 MODUS: Ausschließlich Gesicht → Depth+Canny (keep_environment=True)")
|
| 768 |
|
| 769 |
controlnet_steps = min(25, int(steps * 0.8))
|
|
@@ -771,64 +767,89 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
|
| 771 |
|
| 772 |
progress(0.03, desc="ControlNet läuft...")
|
| 773 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 774 |
# ===== CONTROLNET AUFRUF =====
|
|
|
|
|
|
|
| 775 |
controlnet_output, inpaint_input = controlnet_processor.generate_with_controlnet(
|
| 776 |
-
image=
|
| 777 |
-
prompt=enhanced_prompt,
|
| 778 |
negative_prompt=combined_negative_prompt,
|
| 779 |
steps=controlnet_steps,
|
| 780 |
guidance_scale=guidance_scale,
|
| 781 |
controlnet_strength=controlnet_strength,
|
| 782 |
-
progress=None,
|
| 783 |
keep_environment=keep_environment
|
| 784 |
)
|
| 785 |
|
| 786 |
-
print(f"✅ ControlNet Output erhalten
|
| 787 |
-
print(f"✅ Inpaint Input
|
| 788 |
|
| 789 |
progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")
|
| 790 |
|
|
|
|
| 791 |
pipe = load_img2img()
|
| 792 |
|
| 793 |
-
#
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
img_resized = inpaint_input.convert("RGB").resize((
|
| 797 |
-
print("🔄 ControlNet Output von {} auf 512×512 skaliert".format(inpaint_input.size))
|
| 798 |
else:
|
| 799 |
img_resized = inpaint_input.convert("RGB")
|
| 800 |
-
print("✅
|
| 801 |
|
|
|
|
| 802 |
adj_guidance = min(guidance_scale, 12.0)
|
| 803 |
seed = random.randint(0, 2**32 - 1)
|
| 804 |
generator = torch.Generator(device=device).manual_seed(seed)
|
| 805 |
print(f"🌱 Inpaint Seed: {seed}")
|
| 806 |
|
| 807 |
-
# =====
|
| 808 |
-
|
| 809 |
-
if
|
| 810 |
-
#
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
print(f"✅ Maske
|
| 818 |
-
print(f" Original BBox: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]")
|
| 819 |
-
print(f" Maske skaliert von {original_mask.size} auf {mask.size}")
|
| 820 |
-
|
| 821 |
-
# WICHTIG: KEINE SCHEDULER-ÄNDERUNG MEHR - DPMSolver++ bleibt aktiv
|
| 822 |
-
print(f"✅ Verwende DPMSolver++ Scheduler: {type(pipe.scheduler).__name__}")
|
| 823 |
|
|
|
|
| 824 |
callback = ImageToImageProgressCallback(progress, int(steps), adj_strength)
|
| 825 |
|
| 826 |
# ===== INPAINT DURCHFÜHREN =====
|
| 827 |
result = pipe(
|
| 828 |
-
prompt=enhanced_prompt,
|
| 829 |
negative_prompt=combined_negative_prompt,
|
| 830 |
image=img_resized,
|
| 831 |
-
mask_image=
|
| 832 |
strength=adj_strength,
|
| 833 |
num_inference_steps=int(steps),
|
| 834 |
guidance_scale=adj_guidance,
|
|
@@ -837,21 +858,38 @@ def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
|
| 837 |
callback_on_step_end_tensor_inputs=[],
|
| 838 |
)
|
| 839 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
end_time = time.time()
|
| 841 |
duration = end_time - start_time
|
|
|
|
| 842 |
print(f"✅ Transformation abgeschlossen in {duration:.2f} Sekunden")
|
| 843 |
print(f"🎯 Verwendeter Modus: {mode}")
|
| 844 |
print(f"⚙️ ControlNet: {'Depth+Canny' if keep_environment else 'OpenPose+Canny'}")
|
| 845 |
-
print(f"
|
| 846 |
|
| 847 |
-
|
| 848 |
-
return generated_image
|
| 849 |
|
| 850 |
except Exception as e:
|
| 851 |
print(f"❌ Fehler in img_to_image: {e}")
|
| 852 |
import traceback
|
| 853 |
traceback.print_exc()
|
| 854 |
-
return None
|
| 855 |
|
| 856 |
def update_bbox_from_image(image):
|
| 857 |
"""Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
|
|
@@ -874,7 +912,6 @@ def update_model_settings(model_id):
|
|
| 874 |
def main_ui():
|
| 875 |
"""
|
| 876 |
HAUPT-UI (ANGEPASST FÜR 3 MODI)
|
| 877 |
-
Wichtigste Änderung: Ersetzung der Checkbox durch Radio-Buttons
|
| 878 |
"""
|
| 879 |
with gr.Blocks(
|
| 880 |
title="AI Image Generator",
|
|
@@ -1212,7 +1249,8 @@ def main_ui():
|
|
| 1212 |
"• **🆕 Dynamische Koordinaten-Schieberegler** passen sich an Bildgröße an\n"
|
| 1213 |
"• **ControlNet-Technologie** für konsistente Ergebnisse\n"
|
| 1214 |
"• **Automatische Negative Prompts** für bessere Qualität\n"
|
| 1215 |
-
"• **
|
|
|
|
| 1216 |
)
|
| 1217 |
|
| 1218 |
transform_btn = gr.Button("🔄 Bild transformieren", variant="primary")
|
|
@@ -1277,5 +1315,5 @@ if __name__ == "__main__":
|
|
| 1277 |
max_file_size="15MB",
|
| 1278 |
show_error=True,
|
| 1279 |
share=False,
|
| 1280 |
-
ssr_mode=False
|
| 1281 |
)
|
|
|
|
| 25 |
"name": "🏠 Stable Diffusion 1.5 (Universal)",
|
| 26 |
"description": "Universal model, good all-rounder, reliable results",
|
| 27 |
"requires_vae": False,
|
| 28 |
+
"vae_model": "stabilityai/sd-vae-ft-mse",
|
| 29 |
"recommended_steps": 35,
|
| 30 |
"recommended_cfg": 7.5,
|
| 31 |
"supports_fp16": True
|
|
|
|
| 164 |
|
| 165 |
return mask
|
| 166 |
|
| 167 |
+
# === KORREKTE GEMEINSAME PROPORTIONALE SKALIERUNG MIT PADDING ===
|
| 168 |
+
def scale_image_and_mask_together(image, mask, target_size=512):
|
| 169 |
+
"""
|
| 170 |
+
SKALIERT BILD UND MASKE GEMEINSAM MIT GLEICHEN PROPORTIONEN (MIT PADDING)
|
| 171 |
+
Behält das Seitenverhältnis bei und fügt ggf. Padding hinzu
|
| 172 |
+
|
| 173 |
+
Parameter:
|
| 174 |
+
- image: PIL Image (RGB)
|
| 175 |
+
- mask: PIL Image (L-Modus, Maske)
|
| 176 |
+
- target_size: Zielgröße (Standard 512)
|
| 177 |
+
|
| 178 |
+
Returns:
|
| 179 |
+
- padded_image: skaliertes Bild mit Padding (RGB)
|
| 180 |
+
- padded_mask: skalierte Maske mit Padding (L)
|
| 181 |
+
- padding_info: Dictionary mit Skalierungsinfo für späteres Compositing
|
| 182 |
+
"""
|
| 183 |
+
if image is None or mask is None:
|
| 184 |
+
raise ValueError("Bild oder Maske ist None")
|
| 185 |
+
|
| 186 |
+
if image.size != mask.size:
|
| 187 |
+
raise ValueError(f"Bild und Maske haben unterschiedliche Größen: {image.size} vs {mask.size}") #Stoppt Programm sofort mit Fehlermeldung!
|
| 188 |
+
|
| 189 |
+
#Variablen für Bildmaße
|
| 190 |
+
original_width, original_height = image.size
|
| 191 |
+
|
| 192 |
+
# Bestimme Skalierungsfaktor (längere Seite auf target_size)
|
| 193 |
+
scale = target_size / max(original_width, original_height)
|
| 194 |
+
|
| 195 |
+
new_width = int(original_width * scale)
|
| 196 |
+
new_height = int(original_height * scale)
|
| 197 |
+
|
| 198 |
+
print(f"📐 Gemeinsame Skalierung: {original_width}x{original_height} → {new_width}x{new_height} (Skalierung: {scale:.4f})")
|
| 199 |
+
|
| 200 |
+
# Skaliere Bild und Maske
|
| 201 |
+
scaled_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
| 202 |
+
scaled_mask = mask.resize((new_width, new_height), Image.Resampling.NEAREST)
|
| 203 |
+
|
| 204 |
+
# Auf Zielgröße padden (zentriert)
|
| 205 |
+
#Image.new("RGB", (target_size, target_size), (0, 0, 0)) erstellt ein neues, leeres, schwarzes Bild in der Ziel-Verarbeitungsgröße deines Modells (512×512 für SD 1.5 oder 1024×1024 für SDXL)
|
| 206 |
+
padded_image = Image.new("RGB", (target_size, target_size), (0, 0, 0))
|
| 207 |
+
padded_mask = Image.new("L", (target_size, target_size), 0)
|
| 208 |
+
|
| 209 |
+
# Zentrierte Position berechnen
|
| 210 |
+
x_offset = (target_size - new_width) // 2
|
| 211 |
+
y_offset = (target_size - new_height) // 2
|
| 212 |
+
|
| 213 |
+
padded_image.paste(scaled_image, (x_offset, y_offset))
|
| 214 |
+
padded_mask.paste(scaled_mask, (x_offset, y_offset))
|
| 215 |
+
|
| 216 |
+
# WICHTIG: Speichere alle Informationen für späteres Compositing
|
| 217 |
+
padding_info = {
|
| 218 |
+
'x_offset': x_offset,
|
| 219 |
+
'y_offset': y_offset,
|
| 220 |
+
'scaled_width': new_width,
|
| 221 |
+
'scaled_height': new_height,
|
| 222 |
+
'original_width': original_width,
|
| 223 |
+
'original_height': original_height,
|
| 224 |
+
'scale_factor': scale,
|
| 225 |
+
'target_size': target_size
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
print(f"📦 Padding hinzugefügt: Offsets ({x_offset}, {y_offset})")
|
| 229 |
+
|
| 230 |
+
return padded_image, padded_mask, padding_info
|
| 231 |
+
|
| 232 |
+
# === KORREKTE COMPOSITING-FUNKTION ===
|
| 233 |
+
def composite_edited_region(original_image, inpaint_result, original_mask, padding_info):
|
| 234 |
+
"""
|
| 235 |
+
KORREKTER COMPOSITING-WORKFLOW:
|
| 236 |
+
- Schneidet den bearbeiteten Bereich aus dem Inpaint-Ergebnis
|
| 237 |
+
- Fügt ihn nahtlos in das Originalbild ein
|
| 238 |
+
|
| 239 |
+
Parameter:
|
| 240 |
+
- original_image: Originalbild in Originalgröße
|
| 241 |
+
- inpaint_result: 512x512 Inpaint-Ergebnis
|
| 242 |
+
- original_mask: Originalmaske (vor Skalierung)
|
| 243 |
+
- padding_info: Skalierungsinformationen von scale_image_and_mask_together()
|
| 244 |
+
|
| 245 |
+
Returns:
|
| 246 |
+
- composited_image: Finales Bild mit bearbeiteter Region
|
| 247 |
+
"""
|
| 248 |
+
print(f"🎨 Starte korrektes Compositing...")
|
| 249 |
+
|
| 250 |
+
# 1. Extrahiere den bearbeiteten Bereich aus dem Inpaint-Ergebnis (ohne Padding)
|
| 251 |
+
x_offset = padding_info['x_offset']
|
| 252 |
+
y_offset = padding_info['y_offset']
|
| 253 |
+
scaled_width = padding_info['scaled_width']
|
| 254 |
+
scaled_height = padding_info['scaled_height']
|
| 255 |
+
|
| 256 |
+
# Bereich im 512x512 Inpaint-Ergebnis, der dem originalen Bild entspricht
|
| 257 |
+
unpainted_region = inpaint_result.crop(
|
| 258 |
+
(x_offset, y_offset, x_offset + scaled_width, y_offset + scaled_height)
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
print(f" Bearbeiteter Bereich extrahiert: {unpainted_region.size}")
|
| 262 |
+
|
| 263 |
+
# 2. Skaliere den bearbeiteten Bereich zurück auf Originalgröße
|
| 264 |
+
original_size = (padding_info['original_width'], padding_info['original_height'])
|
| 265 |
+
edited_region_fullsize = unpainted_region.resize(original_size, Image.Resampling.LANCZOS)
|
| 266 |
+
|
| 267 |
+
print(f" Auf Originalgröße skaliert: {edited_region_fullsize.size}")
|
| 268 |
+
|
| 269 |
+
# 3. Erstelle eine weiche Maske für nahtloses Einfügen
|
| 270 |
+
# Erweitere die Originalmaske leicht für weiche Übergänge
|
| 271 |
+
soft_mask = original_mask.copy()
|
| 272 |
+
|
| 273 |
+
# Für weiche Kanten: Erweitere die Maske um 5 Pixel
|
| 274 |
+
from PIL import ImageFilter
|
| 275 |
+
soft_mask = soft_mask.filter(ImageFilter.MaxFilter(5))
|
| 276 |
+
soft_mask = soft_mask.filter(ImageFilter.GaussianBlur(3))
|
| 277 |
+
|
| 278 |
+
# 4. Compositing: Kombiniere Originalbild und bearbeiteten Bereich
|
| 279 |
+
final_image = original_image.copy()
|
| 280 |
+
|
| 281 |
+
# Konvertiere für alpha blending
|
| 282 |
+
edited_rgba = edited_region_fullsize.convert("RGBA")
|
| 283 |
+
soft_mask_rgba = soft_mask.convert("L")
|
| 284 |
+
|
| 285 |
+
# Erstelle ein temporäres Bild mit Alpha-Kanal
|
| 286 |
+
temp_image = Image.new("RGBA", original_size, (0, 0, 0, 0))
|
| 287 |
+
temp_image.paste(edited_rgba, (0, 0), soft_mask_rgba)
|
| 288 |
+
|
| 289 |
+
# Kombiniere mit Original
|
| 290 |
+
final_image = Image.alpha_composite(final_image.convert("RGBA"), temp_image)
|
| 291 |
+
final_image = final_image.convert("RGB")
|
| 292 |
+
|
| 293 |
+
print(f"✅ Compositing abgeschlossen. Finale Größe: {final_image.size}")
|
| 294 |
+
|
| 295 |
+
return final_image
|
| 296 |
+
|
| 297 |
def auto_detect_face_area(image):
|
| 298 |
"""Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken"""
|
| 299 |
width, height = image.size
|
|
|
|
| 321 |
"""Lädt das Text-to-Image Modell basierend auf der Auswahl"""
|
| 322 |
global pipe_txt2img, current_pipe_model_id
|
| 323 |
|
|
|
|
| 324 |
if pipe_txt2img is not None and current_pipe_model_id == model_id:
|
| 325 |
print(f"✅ Modell {model_id} bereits geladen")
|
| 326 |
return pipe_txt2img
|
|
|
|
| 347 |
print("ℹ️ Versuche ohne VAE weiter...")
|
| 348 |
vae = None
|
| 349 |
|
|
|
|
| 350 |
model_params = {
|
| 351 |
"torch_dtype": torch_dtype,
|
| 352 |
"safety_checker": None,
|
| 353 |
"requires_safety_checker": False,
|
| 354 |
"add_watermarker": False,
|
| 355 |
+
"allow_pickle": True,
|
| 356 |
}
|
| 357 |
|
|
|
|
| 358 |
if model_id in SAFETENSORS_MODELS:
|
| 359 |
model_params["use_safetensors"] = True
|
| 360 |
print(f"ℹ️ Verwende safetensors für {model_id}")
|
|
|
|
| 362 |
model_params["use_safetensors"] = False
|
| 363 |
print(f"ℹ️ Verwende .bin weights für {model_id}")
|
| 364 |
|
|
|
|
| 365 |
if config.get("supports_fp16", False) and torch_dtype == torch.float16:
|
| 366 |
model_params["variant"] = "fp16"
|
| 367 |
print("ℹ️ Verwende FP16 Variante")
|
| 368 |
else:
|
| 369 |
print("ℹ️ Verwende Standard Variante (kein FP16)")
|
| 370 |
|
|
|
|
| 371 |
if vae is not None:
|
| 372 |
model_params["vae"] = vae
|
| 373 |
|
|
|
|
| 377 |
**model_params
|
| 378 |
).to(device)
|
| 379 |
|
|
|
|
| 380 |
print("⚙️ Konfiguriere Scheduler...")
|
| 381 |
|
|
|
|
| 382 |
if pipe_txt2img.scheduler is None:
|
| 383 |
print("⚠️ Scheduler ist None, setze Standard-Scheduler")
|
| 384 |
pipe_txt2img.scheduler = PNDMScheduler.from_pretrained(
|
|
|
|
| 386 |
subfolder="scheduler"
|
| 387 |
)
|
| 388 |
|
|
|
|
| 389 |
try:
|
|
|
|
| 390 |
if hasattr(pipe_txt2img.scheduler, 'config'):
|
| 391 |
scheduler_config = pipe_txt2img.scheduler.config
|
| 392 |
else:
|
|
|
|
| 393 |
scheduler_config = {
|
| 394 |
"beta_start": 0.00085,
|
| 395 |
"beta_end": 0.012,
|
|
|
|
| 400 |
}
|
| 401 |
print("⚠️ Keine Scheduler-Konfig gefunden, verwende Standard")
|
| 402 |
|
|
|
|
| 403 |
pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
|
| 404 |
scheduler_config,
|
| 405 |
use_karras_sigmas=True,
|
|
|
|
| 411 |
print(f"⚠️ Konnte DPM-Scheduler nicht setzen: {scheduler_error}")
|
| 412 |
print("ℹ️ Verwende Standard-Scheduler weiter")
|
| 413 |
|
|
|
|
| 414 |
pipe_txt2img.enable_attention_slicing()
|
| 415 |
print("✅ Attention Slicing aktiviert")
|
| 416 |
|
|
|
|
| 417 |
if hasattr(pipe_txt2img, 'vae') and pipe_txt2img.vae is not None:
|
| 418 |
try:
|
| 419 |
pipe_txt2img.enable_vae_slicing()
|
|
|
|
| 437 |
traceback.print_exc()
|
| 438 |
print("🔄 Fallback auf SD 1.5...")
|
| 439 |
|
|
|
|
| 440 |
try:
|
| 441 |
pipe_txt2img = StableDiffusionPipeline.from_pretrained(
|
| 442 |
"runwayml/stable-diffusion-v1-5",
|
|
|
|
| 465 |
safety_checker=None,
|
| 466 |
).to(device)
|
| 467 |
|
|
|
|
| 468 |
pipe_img2img.scheduler = DPMSolverMultistepScheduler.from_config(
|
| 469 |
pipe_img2img.scheduler.config,
|
| 470 |
algorithm_type="sde-dpmsolver++",
|
|
|
|
| 659 |
gr.update(maximum=max_height)
|
| 660 |
)
|
| 661 |
|
| 662 |
+
# === KORRIGIERTE HAUPTFUNKTION MIT RICHTIGEM COMPOSITING ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
|
| 664 |
mode, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
|
| 665 |
progress=gr.Progress()):
|
| 666 |
"""
|
| 667 |
+
KORRIGIERTE HAUPTFUNKTION FÜR BILD-zu-BILD MIT RICHTIGEM COMPOSITING
|
| 668 |
|
| 669 |
+
WICHTIG: Verwendet den korrekten Compositing-Workflow:
|
| 670 |
+
1. Skaliert Bild und Maske gemeinsam
|
| 671 |
+
2. Führt Inpainting auf 512×512 durch
|
| 672 |
+
3. Kompositiert nur den bearbeiteten Bereich zurück ins Originalbild
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
"""
|
| 674 |
try:
|
| 675 |
if image is None:
|
|
|
|
| 683 |
print(f"📝 Prompt: {prompt}")
|
| 684 |
print(f"🚫 Negativ-Prompt: {neg_prompt}")
|
| 685 |
|
| 686 |
+
# ===== AUTOMATISCHEN NEGATIV-PROMPT GENERIEREN =====
|
|
|
|
| 687 |
auto_negatives = auto_negative_prompt(prompt)
|
| 688 |
print(f"🤖 Automatisch generierter Negativ-Prompt: {auto_negatives}")
|
| 689 |
|
|
|
|
| 691 |
combined_negative_prompt = ""
|
| 692 |
|
| 693 |
if neg_prompt and neg_prompt.strip():
|
|
|
|
| 694 |
user_neg = neg_prompt.strip()
|
| 695 |
print(f"👤 Benutzer Negativ-Prompt: {user_neg}")
|
| 696 |
|
|
|
|
| 697 |
user_words = [word.strip().lower() for word in user_neg.split(",")]
|
| 698 |
auto_words = [word.strip().lower() for word in auto_negatives.split(",")]
|
| 699 |
|
|
|
|
| 700 |
combined_words = user_words.copy()
|
| 701 |
|
|
|
|
| 702 |
for auto_word in auto_words:
|
| 703 |
if auto_word and auto_word not in user_words:
|
| 704 |
combined_words.append(auto_word)
|
| 705 |
|
|
|
|
| 706 |
unique_words = []
|
| 707 |
seen_words = set()
|
| 708 |
for word in combined_words:
|
|
|
|
| 712 |
|
| 713 |
combined_negative_prompt = ", ".join(unique_words)
|
| 714 |
else:
|
|
|
|
| 715 |
combined_negative_prompt = auto_negatives
|
| 716 |
print(f"ℹ️ Kein manueller Negativ-Prompt, verwende nur automatischen: {combined_negative_prompt}")
|
| 717 |
|
| 718 |
print(f"✅ Finaler kombinierter Negativ-Prompt: {combined_negative_prompt}")
|
| 719 |
|
|
|
|
| 720 |
# ===== GESICHTS-SPEZIFISCHE BOOSTER FÜR NUR-GESICHT MODUS =====
|
| 721 |
if mode == "face_only_change":
|
|
|
|
| 722 |
face_boosters = "(perfect face:1.2), (symmetrical face:1.1), realistic shaded perfect face, "
|
| 723 |
|
|
|
|
| 724 |
if not any(keyword in prompt.lower() for keyword in
|
| 725 |
["perfect face", "symmetrical", "realistic face", "shaded face"]):
|
| 726 |
enhanced_prompt = face_boosters + prompt
|
|
|
|
| 729 |
enhanced_prompt = prompt
|
| 730 |
print(f"👤 Benutzer hat bereits Gesichts-Booster im Prompt")
|
| 731 |
else:
|
|
|
|
| 732 |
enhanced_prompt = prompt
|
| 733 |
|
| 734 |
# ===== HINTERGRUND-BOOSTER FÜR UMGEBUNGS-ÄNDERUNG =====
|
| 735 |
if mode == "environment_change":
|
|
|
|
| 736 |
background_boosters = "complete scene, full background, entire environment, "
|
| 737 |
|
|
|
|
| 738 |
if not any(keyword in enhanced_prompt.lower() for keyword in
|
| 739 |
["complete scene", "full background", "entire environment", "whole setting"]):
|
| 740 |
enhanced_prompt = background_boosters + enhanced_prompt
|
|
|
|
| 742 |
|
| 743 |
print(f"🎯 Finaler Prompt für {mode}: {enhanced_prompt}")
|
| 744 |
|
|
|
|
| 745 |
progress(0, desc="Starte Generierung mit ControlNet...")
|
| 746 |
|
|
|
|
| 747 |
# ===== MODUS-SPEZIFISCHE EINSTELLUNGEN =====
|
| 748 |
adj_strength = min(0.85, strength * 1.25)
|
| 749 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 750 |
if mode == "focus_change":
|
|
|
|
| 751 |
keep_environment = False
|
| 752 |
+
controlnet_strength = adj_strength * 0.7
|
| 753 |
print(f"🎯 MODUS: Focus verändern → OpenPose+Canny (keep_environment=False)")
|
| 754 |
|
| 755 |
elif mode == "environment_change":
|
|
|
|
| 756 |
keep_environment = True
|
| 757 |
+
controlnet_strength = adj_strength * 0.3
|
| 758 |
print(f"🎯 MODUS: Umgebung ändern → Depth+Canny (keep_environment=True)")
|
| 759 |
|
| 760 |
else: # face_only_change
|
|
|
|
| 761 |
keep_environment = True
|
| 762 |
+
controlnet_strength = adj_strength * 0.5
|
| 763 |
print(f"🎯 MODUS: Ausschließlich Gesicht → Depth+Canny (keep_environment=True)")
|
| 764 |
|
| 765 |
controlnet_steps = min(25, int(steps * 0.8))
|
|
|
|
| 767 |
|
| 768 |
progress(0.03, desc="ControlNet läuft...")
|
| 769 |
|
| 770 |
+
# ===== WICHTIG: VARIABLEN FÜR KOMPLETTEN WORKFLOW =====
|
| 771 |
+
original_mask = None
|
| 772 |
+
padding_info = None
|
| 773 |
+
controlnet_input = image.convert("RGB") # Standard: Originalbild
|
| 774 |
+
|
| 775 |
+
if bbox_x1 is not None and bbox_y1 is not None and bbox_x2 is not None and bbox_y2 is not None:
|
| 776 |
+
print(f"🎯 BBox Koordinaten erhalten: [{bbox_x1}, {bbox_y1}, {bbox_x2}, {bbox_y2}]")
|
| 777 |
+
|
| 778 |
+
# 1. MASKE AUF ORIGINAL-BILD ERSTELLEN (speichern für späteres Compositing)
|
| 779 |
+
original_mask = create_face_mask(image, (bbox_x1, bbox_y1, bbox_x2, bbox_y2), mode)
|
| 780 |
+
|
| 781 |
+
# 2. BILD UND MASKE GEMEINSAM SKALIEREN (mit Padding)
|
| 782 |
+
scaled_image, scaled_mask, padding_info = scale_image_and_mask_together(
|
| 783 |
+
image.convert("RGB"), # Originalbild
|
| 784 |
+
original_mask, # Originalmaske
|
| 785 |
+
target_size=IMG_SIZE
|
| 786 |
+
)
|
| 787 |
+
|
| 788 |
+
controlnet_input = scaled_image # Verwende das skalierte Bild für ControlNet
|
| 789 |
+
print(f"✅ Gemeinsame Skalierung abgeschlossen")
|
| 790 |
+
print(f" Original: {image.size} → Skaliert: {scaled_image.size}")
|
| 791 |
+
else:
|
| 792 |
+
# Keine BBox: Normales Img2Img (ohne Maske)
|
| 793 |
+
print(f"ℹ️ Keine BBox angegeben → normales Img2Img (ohne Maske)")
|
| 794 |
+
controlnet_input = image.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS)
|
| 795 |
+
|
| 796 |
# ===== CONTROLNET AUFRUF =====
|
| 797 |
+
print(f"📊 ControlNet Input Größe: {controlnet_input.size}")
|
| 798 |
+
|
| 799 |
controlnet_output, inpaint_input = controlnet_processor.generate_with_controlnet(
|
| 800 |
+
image=controlnet_input,
|
| 801 |
+
prompt=enhanced_prompt,
|
| 802 |
negative_prompt=combined_negative_prompt,
|
| 803 |
steps=controlnet_steps,
|
| 804 |
guidance_scale=guidance_scale,
|
| 805 |
controlnet_strength=controlnet_strength,
|
| 806 |
+
progress=None,
|
| 807 |
keep_environment=keep_environment
|
| 808 |
)
|
| 809 |
|
| 810 |
+
print(f"✅ ControlNet Output erhalten")
|
| 811 |
+
print(f"✅ Inpaint Input Größe: {inpaint_input.size}")
|
| 812 |
|
| 813 |
progress(0.3, desc="ControlNet abgeschlossen – starte Inpaint...")
|
| 814 |
|
| 815 |
+
# ===== INPAINTING PIPELINE =====
|
| 816 |
pipe = load_img2img()
|
| 817 |
|
| 818 |
+
# Bild für Inpainting vorbereiten
|
| 819 |
+
if inpaint_input.size != (IMG_SIZE, IMG_SIZE):
|
| 820 |
+
print(f"⚠️ Inpaint Input hat unerwartete Größe {inpaint_input.size}, skaliere auf {IMG_SIZE}x{IMG_SIZE}")
|
| 821 |
+
img_resized = inpaint_input.convert("RGB").resize((IMG_SIZE, IMG_SIZE), Image.Resampling.LANCZOS)
|
|
|
|
| 822 |
else:
|
| 823 |
img_resized = inpaint_input.convert("RGB")
|
| 824 |
+
print(f"✅ Inpaint Input ist bereits {IMG_SIZE}x{IMG_SIZE}")
|
| 825 |
|
| 826 |
+
# ===== SEED UND GENERATOR =====
|
| 827 |
adj_guidance = min(guidance_scale, 12.0)
|
| 828 |
seed = random.randint(0, 2**32 - 1)
|
| 829 |
generator = torch.Generator(device=device).manual_seed(seed)
|
| 830 |
print(f"🌱 Inpaint Seed: {seed}")
|
| 831 |
|
| 832 |
+
# ===== MASKE FÜR INPAINTING VORBEREITEN =====
|
| 833 |
+
inpaint_mask = None
|
| 834 |
+
if original_mask is not None and padding_info is not None:
|
| 835 |
+
# Verwende die skalierte Maske für Inpainting
|
| 836 |
+
_, scaled_mask, _ = scale_image_and_mask_together(
|
| 837 |
+
image.convert("RGB"),
|
| 838 |
+
original_mask,
|
| 839 |
+
target_size=IMG_SIZE
|
| 840 |
+
)
|
| 841 |
+
inpaint_mask = scaled_mask
|
| 842 |
+
print(f"✅ Maske für Inpainting vorbereitet: {inpaint_mask.size}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 843 |
|
| 844 |
+
# ===== FORTSCHRITTS-CALLBACK =====
|
| 845 |
callback = ImageToImageProgressCallback(progress, int(steps), adj_strength)
|
| 846 |
|
| 847 |
# ===== INPAINT DURCHFÜHREN =====
|
| 848 |
result = pipe(
|
| 849 |
+
prompt=enhanced_prompt,
|
| 850 |
negative_prompt=combined_negative_prompt,
|
| 851 |
image=img_resized,
|
| 852 |
+
mask_image=inpaint_mask,
|
| 853 |
strength=adj_strength,
|
| 854 |
num_inference_steps=int(steps),
|
| 855 |
guidance_scale=adj_guidance,
|
|
|
|
| 858 |
callback_on_step_end_tensor_inputs=[],
|
| 859 |
)
|
| 860 |
|
| 861 |
+
# ===== KORREKTES COMPOSITING =====
|
| 862 |
+
generated_image = result.images[0]
|
| 863 |
+
|
| 864 |
+
if original_mask is not None and padding_info is not None:
|
| 865 |
+
# KORREKTER WORKFLOW: Nur bearbeiteten Bereich in Originalbild einfügen
|
| 866 |
+
final_image = composite_edited_region(
|
| 867 |
+
original_image=image.convert("RGB"),
|
| 868 |
+
inpaint_result=generated_image,
|
| 869 |
+
original_mask=original_mask,
|
| 870 |
+
padding_info=padding_info
|
| 871 |
+
)
|
| 872 |
+
print(f"✅ Korrektes Compositing durchgeführt")
|
| 873 |
+
else:
|
| 874 |
+
# Keine Maske: Einfach das generierte Bild zurückgeben (bereits 512×512)
|
| 875 |
+
final_image = generated_image
|
| 876 |
+
print(f"ℹ️ Keine Maske → Direkte Rückgabe des 512×512 Bildes")
|
| 877 |
+
|
| 878 |
end_time = time.time()
|
| 879 |
duration = end_time - start_time
|
| 880 |
+
|
| 881 |
print(f"✅ Transformation abgeschlossen in {duration:.2f} Sekunden")
|
| 882 |
print(f"🎯 Verwendeter Modus: {mode}")
|
| 883 |
print(f"⚙️ ControlNet: {'Depth+Canny' if keep_environment else 'OpenPose+Canny'}")
|
| 884 |
+
print(f"📊 Finale Bildgröße: {final_image.size}")
|
| 885 |
|
| 886 |
+
return final_image
|
|
|
|
| 887 |
|
| 888 |
except Exception as e:
|
| 889 |
print(f"❌ Fehler in img_to_image: {e}")
|
| 890 |
import traceback
|
| 891 |
traceback.print_exc()
|
| 892 |
+
return None
|
| 893 |
|
| 894 |
def update_bbox_from_image(image):
|
| 895 |
"""Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird"""
|
|
|
|
| 912 |
def main_ui():
|
| 913 |
"""
|
| 914 |
HAUPT-UI (ANGEPASST FÜR 3 MODI)
|
|
|
|
| 915 |
"""
|
| 916 |
with gr.Blocks(
|
| 917 |
title="AI Image Generator",
|
|
|
|
| 1249 |
"• **🆕 Dynamische Koordinaten-Schieberegler** passen sich an Bildgröße an\n"
|
| 1250 |
"• **ControlNet-Technologie** für konsistente Ergebnisse\n"
|
| 1251 |
"• **Automatische Negative Prompts** für bessere Qualität\n"
|
| 1252 |
+
"• **KORREKTER COMPOSITING-WORKFLOW** – nur bearbeiteter Bereich wird eingefügt\n"
|
| 1253 |
+
"• **Ausgabe: 512×512 Pixel** für beste Qualität (kein Hochskalieren!)"
|
| 1254 |
)
|
| 1255 |
|
| 1256 |
transform_btn = gr.Button("🔄 Bild transformieren", variant="primary")
|
|
|
|
| 1315 |
max_file_size="15MB",
|
| 1316 |
show_error=True,
|
| 1317 |
share=False,
|
| 1318 |
+
ssr_mode=False
|
| 1319 |
)
|