Astridkraft commited on
Commit
acd60f4
·
verified ·
1 Parent(s): f0735ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -341
app.py CHANGED
@@ -9,6 +9,16 @@ import time
9
  import os
10
  import tempfile
11
  import random
 
 
 
 
 
 
 
 
 
 
12
 
13
  # === OPTIMIERTE EINSTELLUNGEN ===
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -20,7 +30,7 @@ print(f"Running on: {device}")
20
  # === MODELLKONFIGURATION (NUR 2 MODELLE) ===
21
  MODEL_CONFIGS = {
22
  "runwayml/stable-diffusion-v1-5": {
23
- "name": "🏠 Stable Diffusion 1.5 (Universal)",
24
  "description": "Universal model, good all-rounder, reliable results",
25
  "requires_vae": False,
26
  "recommended_steps": 35,
@@ -28,7 +38,7 @@ MODEL_CONFIGS = {
28
  "supports_fp16": True
29
  },
30
  "SG161222/Realistic_Vision_V6.0_B1_noVAE": {
31
- "name": "👤 Realistic Vision V6.0 (Portraits)",
32
  "description": "Best for photorealistic faces, skin details, human portraits",
33
  "requires_vae": True,
34
  "vae_model": "stabilityai/sd-vae-ft-mse",
@@ -38,106 +48,79 @@ MODEL_CONFIGS = {
38
  }
39
  }
40
 
41
- # === SAFETENSORS KONFIGURATION ===
42
  SAFETENSORS_MODELS = ["runwayml/stable-diffusion-v1-5"]
43
-
44
- # Aktuell ausgewähltes Modell (wird vom User gesetzt)
45
  current_model_id = "runwayml/stable-diffusion-v1-5"
46
 
47
  # === AUTOMATISCHE NEGATIVE PROMPT GENERIERUNG ===
48
  def auto_negative_prompt(positive_prompt):
49
- """Generiert automatisch negative Prompts basierend auf dem positiven Prompt"""
50
  p = positive_prompt.lower()
51
  negatives = []
52
 
53
- # Personen / Portraits
54
  if any(w in p for w in [
55
- "person", "man", "woman", "face", "portrait", "team", "employee",
56
- "people", "crowd", "character", "figure", "human", "child", "baby",
57
- "girl", "boy", "lady", "gentleman", "fairy", "elf", "dwarf", "santa claus"
58
- "mermaid", "angel", "demon", "witch", "wizard", "creature", "being",
59
- "model", "actor", "actress", "celebrity", "avatar", "group"]):
 
60
  negatives.append(
61
  "blurry face, lowres face, deformed pupils, bad anatomy, malformed hands, extra fingers, uneven eyes, distorted face, "
62
  "unrealistic skin, mutated, ugly, disfigured, poorly drawn face, "
63
  "missing limbs, extra limbs, fused fingers, too many fingers, bad teeth, "
64
- "mutated hands, long neck, extra wings, multiple wings,grainy face, noisy face, "
65
  "compression artifacts, rendering artifacts, digital artifacts, overprocessed face, oversmoothed face "
66
  )
67
-
68
- # Business / Corporate
69
  if any(w in p for w in ["office", "business", "team", "meeting", "corporate", "company", "workplace"]):
70
- negatives.append(
71
- "overexposed, oversaturated, harsh lighting, watermark, text, logo, brand"
72
- )
73
 
74
- # Produkt / CGI
75
  if any(w in p for w in ["product", "packshot", "mockup", "render", "3d", "cgi", "packaging"]):
76
- negatives.append(
77
- "plastic texture, noisy, overly reflective surfaces, watermark, text, low poly"
78
- )
79
 
80
- # Landschaft / Umgebung
81
  if any(w in p for w in ["landscape", "nature", "mountain", "forest", "outdoor", "beach", "sky"]):
82
- negatives.append(
83
- "blurry, oversaturated, unnatural colors, distorted horizon, floating objects"
84
- )
85
 
86
- # Logos / Symbole
87
  if any(w in p for w in ["logo", "symbol", "icon", "typography", "badge", "emblem"]):
88
- negatives.append(
89
- "watermark, signature, username, text, writing, scribble, messy"
90
- )
91
 
92
- # Architektur / Gebäude
93
  if any(w in p for w in ["building", "architecture", "house", "interior", "room", "facade"]):
94
- negatives.append(
95
- "deformed, distorted perspective, floating objects, collapsing structure"
96
- )
97
 
98
- # Basis negative Prompts für alle Fälle
99
  base_negatives = "low quality, worst quality, blurry, jpeg artifacts, ugly, deformed"
100
 
101
- if negatives:
102
- return base_negatives + ", " + ", ".join(negatives)
103
- else:
104
- return base_negatives
 
 
 
 
 
 
105
 
106
  # === GESICHTSMASKEN-FUNKTIONEN ===
107
  def create_face_mask(image, bbox_coords, face_preserve):
108
- """Erzeugt eine Gesichtsmaske - WEIßE Bereiche werden VERÄNDERT, SCHWARZE BLEIBEN"""
109
- mask = Image.new("L", image.size, 0) # Start mit komplett schwarzer Maske (alles geschützt)
110
-
111
  if bbox_coords and all(coord is not None for coord in bbox_coords):
112
  x1, y1, x2, y2 = bbox_coords
113
  draw = ImageDraw.Draw(mask)
114
-
115
  if face_preserve:
116
- # GESICHTSERHALTUNG: Maske um das Gesicht herum zeichnen
117
- draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255) # Alles weiß = verändern
118
- draw.rectangle([x1, y1, x2, y2], fill=0) # Gesicht schwarz = geschützt (rechteckig)
119
- print("Gesicht wird GESCHÜTZT - Umgebung wird verändert (rechteckige Maske)")
120
  else:
121
- # NUR GESICHT VERÄNDERN: Nur das Gesicht wird weiß (verändert)
122
- draw.rectangle([x1, y1, x2, y2], fill=255) # Gesicht weiß = verändern (rechteckig)
123
- print("Nur Gesicht wird verändert - Umgebung bleibt erhalten (rechteckige Maske)")
124
-
125
  return mask
126
 
127
  def auto_detect_face_area(image):
128
- """Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken"""
129
  width, height = image.size
130
- # Größere Bounding Box für bessere Abdeckung (50% statt 40%)
131
  face_size = min(width, height) * 0.4
132
- # Verschiebe y1 nach oben, um Stirn und Kinn besser abzudecken
133
  x1 = (width - face_size) / 2
134
- y1 = (height - face_size) / 4 # Höher positioniert (25% statt 33%)
135
  x2 = x1 + face_size
136
- y2 = y1 + face_size * 1.2 # Leicht länglicher für ovale Gesichter
137
- # Stelle sicher, dass Koordinaten innerhalb des Bildes liegen
138
  x1, y1 = max(0, int(x1)), max(0, int(y1))
139
  x2, y2 = min(width, int(x2)), min(height, int(y2))
140
- print(f"Geschätzte Gesichtskoordinaten: [{x1}, {y1}, {x2}, {y2}]")
141
  return [x1, y1, x2, y2]
142
 
143
  # === PIPELINES ===
@@ -146,375 +129,150 @@ current_pipe_model_id = None
146
  pipe_img2img = None
147
 
148
  def load_txt2img(model_id):
149
- """Lädt das Text-to-Image Modell basierend auf der Auswahl"""
150
  global pipe_txt2img, current_pipe_model_id
151
-
152
- # Wenn bereits das richtige Modell geladen ist, nichts tun
153
  if pipe_txt2img is not None and current_pipe_model_id == model_id:
154
- print(f"✅ Modell {model_id} bereits geladen")
155
  return pipe_txt2img
156
 
157
- print(f"🔄 Lade Modell: {model_id}")
158
-
159
  config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
160
- print(f"📋 Modell-Konfiguration: {config['name']}")
161
- print(f"📝 Beschreibung: {config['description']}")
162
 
163
  try:
164
- # VAE-Handling basierend auf Modellkonfiguration
165
  vae = None
166
  if config.get("requires_vae", False):
167
- print(f"🔧 Lade externe VAE: {config['vae_model']}")
168
- try:
169
- vae = AutoencoderKL.from_pretrained(
170
- config["vae_model"],
171
- torch_dtype=torch_dtype
172
- ).to(device)
173
- print("✅ VAE erfolgreich geladen")
174
- except Exception as vae_error:
175
- print(f"⚠️ Fehler beim Laden der VAE: {vae_error}")
176
- print("ℹ️ Versuche ohne VAE weiter...")
177
- vae = None
178
 
179
- # Modellparameter basierend auf Modelltyp
180
  model_params = {
181
  "torch_dtype": torch_dtype,
182
  "safety_checker": None,
183
  "requires_safety_checker": False,
184
- "add_watermarker": False,
185
- "allow_pickle": True, # Für .bin Modelle wichtig
186
  }
187
 
188
- # SAFETENSORS LOGIK
189
  if model_id in SAFETENSORS_MODELS:
190
  model_params["use_safetensors"] = True
191
- print(f"ℹ️ Verwende safetensors für {model_id}")
192
- else:
193
- model_params["use_safetensors"] = False
194
- print(f"ℹ️ Verwende .bin weights für {model_id}")
195
 
196
- # FP16 Variante nur wenn Modell sie unterstützt UND wir auf GPU sind
197
  if config.get("supports_fp16", False) and torch_dtype == torch.float16:
198
  model_params["variant"] = "fp16"
199
- print("ℹ️ Verwende FP16 Variante")
200
- else:
201
- print("ℹ️ Verwende Standard Variante (kein FP16)")
202
 
203
- # VAE nur wenn nicht None
204
  if vae is not None:
205
  model_params["vae"] = vae
206
 
207
- print(f"📥 Lade Hauptmodell von Hugging Face...")
208
- pipe_txt2img = StableDiffusionPipeline.from_pretrained(
209
- model_id,
210
- **model_params
211
- ).to(device)
212
-
213
- # SICHERER SCHEDULER-HANDLING
214
- print("⚙️ Konfiguriere Scheduler...")
215
-
216
- # Prüfe ob Scheduler existiert
217
- if pipe_txt2img.scheduler is None:
218
- print("⚠️ Scheduler ist None, setze Standard-Scheduler")
219
- pipe_txt2img.scheduler = PNDMScheduler.from_pretrained(
220
- model_id,
221
- subfolder="scheduler"
222
- )
223
 
224
- # Versuche DPM-Solver zu verwenden (bessere Ergebnisse)
225
  try:
226
- # Hole die Scheduler-Konfiguration
227
- if hasattr(pipe_txt2img.scheduler, 'config'):
228
- scheduler_config = pipe_txt2img.scheduler.config
229
- else:
230
- # Fallback-Konfiguration für Scheduler
231
- scheduler_config = {
232
- "beta_start": 0.00085,
233
- "beta_end": 0.012,
234
- "beta_schedule": "scaled_linear",
235
- "num_train_timesteps": 1000,
236
- "prediction_type": "epsilon",
237
- "steps_offset": 1
238
- }
239
- print("⚠️ Keine Scheduler-Konfig gefunden, verwende Standard")
240
-
241
- # Setze DPM-Solver Scheduler
242
  pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
243
- scheduler_config,
244
  use_karras_sigmas=True,
245
  algorithm_type="sde-dpmsolver++"
246
  )
247
- print("✅ DPM-Solver Multistep Scheduler konfiguriert")
248
-
249
- except Exception as scheduler_error:
250
- print(f"⚠️ Konnte DPM-Scheduler nicht setzen: {scheduler_error}")
251
- print("ℹ️ Verwende Standard-Scheduler weiter")
252
-
253
- # Optimierungen
254
- pipe_txt2img.enable_attention_slicing()
255
- print("✅ Attention Slicing aktiviert")
256
-
257
- # VAE Slicing nur wenn VAE existiert
258
- if hasattr(pipe_txt2img, 'vae') and pipe_txt2img.vae is not None:
259
- try:
260
- pipe_txt2img.enable_vae_slicing()
261
- if hasattr(pipe_txt2img.vae, 'enable_slicing'):
262
- pipe_txt2img.vae.enable_slicing()
263
- print("✅ VAE Slicing aktiviert")
264
- except Exception as vae_slice_error:
265
- print(f"⚠️ VAE Slicing nicht möglich: {vae_slice_error}")
266
 
267
  current_pipe_model_id = model_id
268
- print(f"✅ {config['name']} erfolgreich geladen")
269
- print(f"📊 Modell-Dtype: {pipe_txt2img.dtype}")
270
- print(f"📊 Scheduler: {type(pipe_txt2img.scheduler).__name__}")
271
- print(f"⚙️ Empfohlene Einstellungen: Steps={config['recommended_steps']}, CFG={config['recommended_cfg']}")
272
-
273
  return pipe_txt2img
274
 
275
  except Exception as e:
276
- print(f"Fehler beim Laden von {model_id}: {str(e)[:200]}...")
277
- import traceback
278
- traceback.print_exc()
279
- print("🔄 Fallback auf SD 1.5...")
280
-
281
- # Fallback auf Standard SD 1.5
282
- try:
283
- pipe_txt2img = StableDiffusionPipeline.from_pretrained(
284
- "runwayml/stable-diffusion-v1-5",
285
- torch_dtype=torch_dtype,
286
- use_safetensors=True,
287
- ).to(device)
288
- pipe_txt2img.enable_attention_slicing()
289
- current_pipe_model_id = "runwayml/stable-diffusion-v1-5"
290
- print("✅ Fallback auf SD 1.5 erfolgreich")
291
-
292
- return pipe_txt2img
293
- except Exception as fallback_error:
294
- print(f"❌ Auch Fallback fehlgeschlagen: {fallback_error}")
295
- raise
296
 
297
  def load_img2img():
298
  global pipe_img2img
299
  if pipe_img2img is None:
300
- print("Loading Inpainting model...")
301
- try:
302
- pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained(
303
- "runwayml/stable-diffusion-inpainting",
304
- torch_dtype=torch_dtype,
305
- allow_pickle=False,
306
- safety_checker=None,
307
- ).to(device)
308
- except Exception as e:
309
- print(f"Fehler beim Laden des Inpainting-Modells: {e}")
310
- raise
311
-
312
- from diffusers import DPMSolverMultistepScheduler
313
- pipe_img2img.scheduler = DPMSolverMultistepScheduler.from_config(
314
- pipe_img2img.scheduler.config,
315
- algorithm_type="sde-dpmsolver++",
316
- use_karras_sigmas=True,
317
- timestep_spacing="trailing"
318
- )
319
-
320
  pipe_img2img.enable_attention_slicing()
321
  pipe_img2img.enable_vae_tiling()
322
- if hasattr(pipe_img2img, 'vae_slicing'):
323
- pipe_img2img.vae_slicing = True
324
-
325
  return pipe_img2img
326
 
327
- # === NEUE CALLBACK-FUNKTIONEN FÜR FORTSCHRITT ===
328
  class TextToImageProgressCallback:
329
  def __init__(self, progress, total_steps):
330
  self.progress = progress
331
  self.total_steps = total_steps
332
- self.current_step = 0
333
-
334
  def __call__(self, pipe, step, timestep, callback_kwargs):
335
- self.current_step = step + 1
336
- progress_percent = (step / self.total_steps) * 100
337
- self.progress(progress_percent / 100, desc="Generierung läuft...")
338
  return callback_kwargs
339
 
340
  class ImageToImageProgressCallback:
341
  def __init__(self, progress, total_steps, strength):
342
  self.progress = progress
343
  self.total_steps = total_steps
344
- self.current_step = 0
345
  self.strength = strength
346
- self.actual_total_steps = None
347
-
348
  def __call__(self, pipe, step, timestep, callback_kwargs):
349
- self.current_step = step + 1
350
-
351
- if self.actual_total_steps is None:
352
- if self.strength < 1.0:
353
- self.actual_total_steps = int(self.total_steps * self.strength)
354
- else:
355
- self.actual_total_steps = self.total_steps
356
-
357
- print(f"🎯 INTERNE STEP-AUSGABE: Strength {self.strength} → {self.actual_total_steps} tatsächliche Denoising-Schritte")
358
-
359
- progress_percent = (step / self.actual_total_steps) * 100
360
- self.progress(progress_percent / 100, desc="Generierung läuft...")
361
  return callback_kwargs
362
 
363
- # === NEUE FUNKTIONEN FÜR DIE FEATURES ===
364
- def create_preview_image(image, bbox_coords, face_preserve, mode_color):
365
- """Erstellt eine Vorschau mit farbigem Rahmen basierend auf dem Modus"""
366
- if image is None:
367
- return None
368
-
369
- preview = image.copy()
370
- draw = ImageDraw.Draw(preview)
371
-
372
- if mode_color == "red":
373
- border_color = (255, 0, 0, 180)
374
- mode_text = "NUR BILDELEMENT VERÄNDERN"
375
- else:
376
- border_color = (0, 255, 0, 180)
377
- mode_text = "BILDELEMENT BEIBEHALTEN"
378
-
379
- border_width = 8
380
- draw.rectangle([0, 0, preview.width-1, preview.height-1],
381
- outline=border_color, width=border_width)
382
-
383
- if bbox_coords and all(coord is not None for coord in bbox_coords):
384
- x1, y1, x2, y2 = bbox_coords
385
-
386
- box_color = (255, 255, 0, 200)
387
- draw.rectangle([x1, y1, x2, y2], outline=box_color, width=3)
388
-
389
- text_color = (255, 255, 255)
390
- bg_color = (0, 0, 0, 160)
391
-
392
- text_bbox = draw.textbbox((x1, y1 - 25), mode_text)
393
- draw.rectangle([text_bbox[0]-5, text_bbox[1]-2, text_bbox[2]+5, text_bbox[3]+2],
394
- fill=bg_color)
395
-
396
- draw.text((x1, y1 - 25), mode_text, fill=text_color)
397
-
398
- return preview
399
-
400
- def update_live_preview(image, bbox_x1, bbox_y1, bbox_x2, bbox_y2, face_preserve):
401
- """Aktualisiert die Live-Vorschau bei Koordinaten-Änderungen"""
402
- if image is None:
403
- return None
404
-
405
- bbox_coords = [bbox_x1, bbox_y1, bbox_x2, bbox_y2]
406
- mode_color = "green" if face_preserve else "red"
407
-
408
- return create_preview_image(image, bbox_coords, face_preserve, mode_color)
409
-
410
- def process_image_upload(image):
411
- """Verarbeitet Bild-Upload und gibt Bild + Koordinaten zurück"""
412
- if image is None:
413
- return None, None, None, None, None
414
-
415
- if image.size != (512, 512):
416
- image = image.resize((512, 512), Image.LANCZOS)
417
- print(f"Bild auf 512x512 skaliert")
418
-
419
- bbox = auto_detect_face_area(image)
420
- bbox_x1, bbox_y1, bbox_x2, bbox_y2 = bbox
421
-
422
- preview = create_preview_image(image, bbox, True, "green")
423
-
424
- return preview, bbox_x1, bbox_y1, bbox_x2, bbox_y2
425
-
426
- # === HAUPTFUNKTIONEN ===
427
  def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()):
428
  try:
429
  if not prompt or not prompt.strip():
430
  return None, "Bitte einen Prompt eingeben"
431
 
432
- print(f"🚀 Starte Generierung mit Modell: {model_id}")
433
- print(f"📝 Prompt: {prompt}")
434
-
435
- # Automatische negative Prompts generieren
436
  auto_negatives = auto_negative_prompt(prompt)
437
- print(f"🤖 Automatisch generierte Negative Prompts: {auto_negatives}")
438
-
439
  start_time = time.time()
440
-
441
 
442
- # Liste von Qualitätswörtern/Gewichten, die auf Benutzereingaben prüfen
443
- quality_keywords = ['masterpiece', 'best quality', 'high quality', 'highly detailed',
444
- 'exquisite', 'detailed', 'ultra detailed', 'professional',
445
- 'perfect', 'excellent', 'amazing', 'stunning', 'beautiful']
446
-
447
- # Prüfe, ob der Benutzer bereits Qualitätswörter/Gewichte verwendet hat
448
- user_has_quality_words = False
449
-
450
- # Konvertiere Prompt zu Kleinbuchstaben für die Prüfung
451
- prompt_lower = prompt.lower()
452
-
453
- # Prüfe auf einfache Qualitätswörter
454
- for keyword in quality_keywords:
455
- if keyword in prompt_lower:
456
- user_has_quality_words = True
457
- print(f"✓ Benutzer verwendet bereits Qualitätswort: {keyword}")
458
- break
459
-
460
- # Prüfe auf Gewichte (z.B. (word:1.5), [word], etc.)
461
- weight_patterns = [r'\([^)]+:\d+(\.\d+)?\)', r'\[[^\]]+\]']
462
- for pattern in weight_patterns:
463
- if re.search(pattern, prompt):
464
- user_has_quality_words = True
465
- print("✓ Benutzer verwendet bereits Gewichte im Prompt")
466
- break
467
-
468
- # Prompt basierend auf Prüfung anpassen
469
- if not user_has_quality_words:
470
- enhanced_prompt = f"masterpiece, raw, best quality, highly detailed, {prompt}"
471
- print(f"🔄 Verbesserter Prompt: {enhanced_prompt}")
472
- else:
473
- enhanced_prompt = prompt
474
- print("✓ Benutzerprompt wird unverändert verwendet")
475
-
476
- print(f"Finaler Prompt für Generation: {enhanced_prompt}")
477
 
478
-
479
-
480
  progress(0, desc="Lade Modell...")
481
  pipe = load_txt2img(model_id)
482
 
483
  seed = random.randint(0, 2**32 - 1)
484
  generator = torch.Generator(device=device).manual_seed(seed)
485
- print(f"🌱 Seed: {seed}")
486
-
487
- callback = TextToImageProgressCallback(progress, steps)
488
-
489
- print(f"⚙️ Einstellungen: Steps={steps}, CFG={guidance_scale}")
490
-
491
  image = pipe(
492
  prompt=enhanced_prompt,
493
  negative_prompt=auto_negatives,
494
- height=512,
495
- width=512,
496
  num_inference_steps=int(steps),
497
  guidance_scale=guidance_scale,
498
  generator=generator,
499
- callback_on_step_end=callback,
500
  callback_on_step_end_tensor_inputs=[],
501
  ).images[0]
502
-
503
- end_time = time.time()
504
- duration = end_time - start_time
505
- print(f" Bild generiert in {duration:.2f} Sekunden")
506
-
507
- config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
508
- status_msg = f"✅ Generiert mit {config['name']} in {duration:.1f}s"
509
-
 
 
 
 
 
 
 
 
 
 
 
 
510
  return image, status_msg
511
 
512
  except Exception as e:
513
- error_msg = f" Fehler: {str(e)}"
514
- print(f"❌ Fehler in text_to_image: {e}")
515
  import traceback
516
  traceback.print_exc()
517
- return None, error_msg
 
518
 
519
  def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
520
  face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2,
 
9
  import os
10
  import tempfile
11
  import random
12
+ import re
13
+
14
+ # === FACE-FIX IMPORT (automatisch nur bei Personen) ===
15
+ try:
16
+ from controlnet_facefix import apply_facefix
17
+ FACEFIX_AVAILABLE = True
18
+ print("Face-Fix (OpenPose_faceonly + Depth) erfolgreich geladen")
19
+ except Exception as e:
20
+ print(f"Face-Fix nicht verfügbar: {e}")
21
+ FACEFIX_AVAILABLE = False
22
 
23
  # === OPTIMIERTE EINSTELLUNGEN ===
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
30
  # === MODELLKONFIGURATION (NUR 2 MODELLE) ===
31
  MODEL_CONFIGS = {
32
  "runwayml/stable-diffusion-v1-5": {
33
+ "name": "Stable Diffusion 1.5 (Universal)",
34
  "description": "Universal model, good all-rounder, reliable results",
35
  "requires_vae": False,
36
  "recommended_steps": 35,
 
38
  "supports_fp16": True
39
  },
40
  "SG161222/Realistic_Vision_V6.0_B1_noVAE": {
41
+ "name": "Realistic Vision V6.0 (Portraits)",
42
  "description": "Best for photorealistic faces, skin details, human portraits",
43
  "requires_vae": True,
44
  "vae_model": "stabilityai/sd-vae-ft-mse",
 
48
  }
49
  }
50
 
 
51
  SAFETENSORS_MODELS = ["runwayml/stable-diffusion-v1-5"]
 
 
52
  current_model_id = "runwayml/stable-diffusion-v1-5"
53
 
54
  # === AUTOMATISCHE NEGATIVE PROMPT GENERIERUNG ===
55
  def auto_negative_prompt(positive_prompt):
 
56
  p = positive_prompt.lower()
57
  negatives = []
58
 
 
59
  if any(w in p for w in [
60
+ "person", "man", "woman", "face", "portrait", "team", "employee",
61
+ "people", "crowd", "character", "figure", "human", "child", "baby",
62
+ "girl", "boy", "lady", "gentleman", "fairy", "elf", "dwarf", "santa claus",
63
+ "mermaid", "angel", "demon", "witch", "wizard", "creature", "being",
64
+ "model", "actor", "actress", "celebrity", "avatar", "group"
65
+ ]):
66
  negatives.append(
67
  "blurry face, lowres face, deformed pupils, bad anatomy, malformed hands, extra fingers, uneven eyes, distorted face, "
68
  "unrealistic skin, mutated, ugly, disfigured, poorly drawn face, "
69
  "missing limbs, extra limbs, fused fingers, too many fingers, bad teeth, "
70
+ "mutated hands, long neck, extra wings, multiple wings, grainy face, noisy face, "
71
  "compression artifacts, rendering artifacts, digital artifacts, overprocessed face, oversmoothed face "
72
  )
73
+
 
74
  if any(w in p for w in ["office", "business", "team", "meeting", "corporate", "company", "workplace"]):
75
+ negatives.append("overexposed, oversaturated, harsh lighting, watermark, text, logo, brand")
 
 
76
 
 
77
  if any(w in p for w in ["product", "packshot", "mockup", "render", "3d", "cgi", "packaging"]):
78
+ negatives.append("plastic texture, noisy, overly reflective surfaces, watermark, text, low poly")
 
 
79
 
 
80
  if any(w in p for w in ["landscape", "nature", "mountain", "forest", "outdoor", "beach", "sky"]):
81
+ negatives.append("blurry, oversaturated, unnatural colors, distorted horizon, floating objects")
 
 
82
 
 
83
  if any(w in p for w in ["logo", "symbol", "icon", "typography", "badge", "emblem"]):
84
+ negatives.append("watermark, signature, username, text, writing, scribble, messy")
 
 
85
 
 
86
  if any(w in p for w in ["building", "architecture", "house", "interior", "room", "facade"]):
87
+ negatives.append("deformed, distorted perspective, floating objects, collapsing structure")
 
 
88
 
 
89
  base_negatives = "low quality, worst quality, blurry, jpeg artifacts, ugly, deformed"
90
 
91
+ return base_negatives + ", " + ", ".join(negatives) if negatives else base_negatives
92
+
93
+ # === PERSONEN-ERKENNUNG (für Face-Fix) ===
94
+ def is_person_prompt(prompt: str) -> bool:
95
+ p = prompt.lower()
96
+ person_keywords = [
97
+ "person", "man", "woman", "face", "portrait", "people", "child", "girl", "boy",
98
+ "fairy", "elf", "witch", "santa", "nikolaus", "human", "character", "figure"
99
+ ]
100
+ return any(w in p for w in person_keywords)
101
 
102
  # === GESICHTSMASKEN-FUNKTIONEN ===
103
  def create_face_mask(image, bbox_coords, face_preserve):
104
+ mask = Image.new("L", image.size, 0)
 
 
105
  if bbox_coords and all(coord is not None for coord in bbox_coords):
106
  x1, y1, x2, y2 = bbox_coords
107
  draw = ImageDraw.Draw(mask)
 
108
  if face_preserve:
109
+ draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255)
110
+ draw.rectangle([x1, y1, x2, y2], fill=0)
 
 
111
  else:
112
+ draw.rectangle([x1, y1, x2, y2], fill=255)
 
 
 
113
  return mask
114
 
115
  def auto_detect_face_area(image):
 
116
  width, height = image.size
 
117
  face_size = min(width, height) * 0.4
 
118
  x1 = (width - face_size) / 2
119
+ y1 = (height - face_size) / 4
120
  x2 = x1 + face_size
121
+ y2 = y1 + face_size * 1.2
 
122
  x1, y1 = max(0, int(x1)), max(0, int(y1))
123
  x2, y2 = min(width, int(x2)), min(height, int(y2))
 
124
  return [x1, y1, x2, y2]
125
 
126
  # === PIPELINES ===
 
129
  pipe_img2img = None
130
 
131
  def load_txt2img(model_id):
 
132
  global pipe_txt2img, current_pipe_model_id
 
 
133
  if pipe_txt2img is not None and current_pipe_model_id == model_id:
 
134
  return pipe_txt2img
135
 
136
+ print(f"Lade Modell: {model_id}")
 
137
  config = MODEL_CONFIGS.get(model_id, MODEL_CONFIGS["runwayml/stable-diffusion-v1-5"])
 
 
138
 
139
  try:
 
140
  vae = None
141
  if config.get("requires_vae", False):
142
+ vae = AutoencoderKL.from_pretrained(config["vae_model"], torch_dtype=torch_dtype).to(device)
 
 
 
 
 
 
 
 
 
 
143
 
 
144
  model_params = {
145
  "torch_dtype": torch_dtype,
146
  "safety_checker": None,
147
  "requires_safety_checker": False,
 
 
148
  }
149
 
 
150
  if model_id in SAFETENSORS_MODELS:
151
  model_params["use_safetensors"] = True
 
 
 
 
152
 
 
153
  if config.get("supports_fp16", False) and torch_dtype == torch.float16:
154
  model_params["variant"] = "fp16"
 
 
 
155
 
 
156
  if vae is not None:
157
  model_params["vae"] = vae
158
 
159
+ pipe_txt2img = StableDiffusionPipeline.from_pretrained(model_id, **model_params).to(device)
160
+ pipe_txt2img.enable_attention_slicing()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
 
162
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(
164
+ pipe_txt2img.scheduler.config,
165
  use_karras_sigmas=True,
166
  algorithm_type="sde-dpmsolver++"
167
  )
168
+ except:
169
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  current_pipe_model_id = model_id
 
 
 
 
 
172
  return pipe_txt2img
173
 
174
  except Exception as e:
175
+ print(f"Fehler beim Laden, Fallback auf SD 1.5: {e}")
176
+ pipe_txt2img = StableDiffusionPipeline.from_pretrained(
177
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch_dtype, use_safetensors=True
178
+ ).to(device)
179
+ pipe_txt2img.enable_attention_slicing()
180
+ current_pipe_model_id = "runwayml/stable-diffusion-v1-5"
181
+ return pipe_txt2img
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  def load_img2img():
184
  global pipe_img2img
185
  if pipe_img2img is None:
186
+ pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained(
187
+ "runwayml/stable-diffusion-inpainting", torch_dtype=torch_dtype, safety_checker=None
188
+ ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  pipe_img2img.enable_attention_slicing()
190
  pipe_img2img.enable_vae_tiling()
 
 
 
191
  return pipe_img2img
192
 
193
+ # === CALLBACKS ===
194
  class TextToImageProgressCallback:
195
  def __init__(self, progress, total_steps):
196
  self.progress = progress
197
  self.total_steps = total_steps
 
 
198
  def __call__(self, pipe, step, timestep, callback_kwargs):
199
+ self.progress(step / self.total_steps, desc="Generierung läuft...")
 
 
200
  return callback_kwargs
201
 
202
  class ImageToImageProgressCallback:
203
  def __init__(self, progress, total_steps, strength):
204
  self.progress = progress
205
  self.total_steps = total_steps
 
206
  self.strength = strength
207
+ self.actual_steps = None
 
208
  def __call__(self, pipe, step, timestep, callback_kwargs):
209
+ if self.actual_steps is None:
210
+ self.actual_steps = int(self.total_steps * self.strength)
211
+ progress_val = step / self.actual_steps
212
+ self.progress(progress_val, desc="Generierung läuft...")
 
 
 
 
 
 
 
 
213
  return callback_kwargs
214
 
215
+ # === HAUPTFUNKTION: TEXT ZU BILD MIT AUTOMATISCHEM FACE-FIX ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  def text_to_image(prompt, model_id, steps, guidance_scale, progress=gr.Progress()):
217
  try:
218
  if not prompt or not prompt.strip():
219
  return None, "Bitte einen Prompt eingeben"
220
 
221
+ print(f"Generierung mit Modell: {model_id}")
 
 
 
222
  auto_negatives = auto_negative_prompt(prompt)
 
 
223
  start_time = time.time()
 
224
 
225
+ # Qualitäts-Boost nur wenn nicht vorhanden
226
+ quality_keywords = ['masterpiece', 'best quality', 'raw', 'highly detailed', 'ultra realistic']
227
+ has_quality = any(kw in prompt.lower() for kw in quality_keywords)
228
+ has_weights = bool(re.search(r':\d+\.\d+|\([^)]+:\d', prompt))
229
+
230
+ enhanced_prompt = f"masterpiece, raw, best quality, highly detailed, {prompt}" if not (has_quality or has_weights) else prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
 
 
232
  progress(0, desc="Lade Modell...")
233
  pipe = load_txt2img(model_id)
234
 
235
  seed = random.randint(0, 2**32 - 1)
236
  generator = torch.Generator(device=device).manual_seed(seed)
237
+
 
 
 
 
 
238
  image = pipe(
239
  prompt=enhanced_prompt,
240
  negative_prompt=auto_negatives,
241
+ height=512, width=512,
 
242
  num_inference_steps=int(steps),
243
  guidance_scale=guidance_scale,
244
  generator=generator,
245
+ callback_on_step_end=TextToImageProgressCallback(progress, steps),
246
  callback_on_step_end_tensor_inputs=[],
247
  ).images[0]
248
+
249
+ # AUTOMATISCHER FACE-FIX NUR BEI PERSONEN
250
+ if FACEFIX_AVAILABLE and is_person_prompt(enhanced_prompt):
251
+ print("Person erkannt Starte 20-Sekunden Face-Fix...")
252
+ progress(0.92, desc="Perfektioniere Gesicht & Hände...")
253
+ try:
254
+ image = apply_facefix(
255
+ image=image,
256
+ prompt=enhanced_prompt,
257
+ negative_prompt=auto_negatives,
258
+ seed=seed,
259
+ model_id=model_id
260
+ )
261
+ print("Face-Fix abgeschlossen!")
262
+ except Exception as e:
263
+ print(f"Face-Fix fehlgeschlagen (ignoriert): {e}")
264
+
265
+ duration = time.time() - start_time
266
+ config = MODEL_CONFIGS.get(model_id, {"name": model_id})
267
+ status_msg = f"Generiert mit {config.get('name', model_id)} in {duration:.1f}s"
268
  return image, status_msg
269
 
270
  except Exception as e:
271
+ print(f"Fehler in text_to_image: {e}")
 
272
  import traceback
273
  traceback.print_exc()
274
+ return None, f"Fehler: {str(e)}"
275
+
276
 
277
  def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale,
278
  face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2,