| |
| |
| |
| import gradio as gr |
| from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline |
| from diffusers import StableDiffusionInpaintPipeline |
| from controlnet_module import controlnet_processor |
| import torch |
| from PIL import Image, ImageDraw |
| import time |
| import os |
| import tempfile |
| import random |
|
|
|
|
|
|
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| torch_dtype = torch.float16 if device == "cuda" else torch.float32 |
| IMG_SIZE = 512 |
|
|
| print(f"Running on: {device}") |
|
|
| |
| def create_face_mask(image, bbox_coords, face_preserve): |
| """Erzeugt eine Gesichtsmaske - WEIßE Bereiche werden VERÄNDERT, SCHWARZE BLEIBEN""" |
| mask = Image.new("L", image.size, 0) |
| |
| if bbox_coords and all(coord is not None for coord in bbox_coords): |
| x1, y1, x2, y2 = bbox_coords |
| draw = ImageDraw.Draw(mask) |
| |
| if face_preserve: |
| |
| draw.rectangle([0, 0, image.size[0], image.size[1]], fill=255) |
| draw.rectangle([x1, y1, x2, y2], fill=0) |
| print("Gesicht wird GESCHÜTZT - Umgebung wird verändert (rechteckige Maske)") |
| else: |
| |
| draw.rectangle([x1, y1, x2, y2], fill=255) |
| print("Nur Gesicht wird verändert - Umgebung bleibt erhalten (rechteckige Maske)") |
| |
| return mask |
|
|
| def auto_detect_face_area(image): |
| """Optimierten Vorschlag für Gesichtsbereich ohne externe Bibliotheken""" |
| width, height = image.size |
| |
| face_size = min(width, height) * 0.4 |
| |
| x1 = (width - face_size) / 2 |
| y1 = (height - face_size) / 4 |
| x2 = x1 + face_size |
| y2 = y1 + face_size * 1.2 |
| |
| x1, y1 = max(0, int(x1)), max(0, int(y1)) |
| x2, y2 = min(width, int(x2)), min(height, int(y2)) |
| print(f"Geschätzte Gesichtskoordinaten: [{x1}, {y1}, {x2}, {y2}]") |
| return [x1, y1, x2, y2] |
|
|
| |
| pipe_txt2img = None |
| pipe_img2img = None |
|
|
| def load_txt2img(): |
| global pipe_txt2img |
| if pipe_txt2img is None: |
| print("Loading Text-to-Image model...") |
| pipe_txt2img = StableDiffusionPipeline.from_pretrained( |
| "runwayml/stable-diffusion-v1-5", |
| torch_dtype=torch_dtype, |
| use_safetensors=True, |
| safety_checker=None, |
| requires_safety_checker=False, |
| |
| ).to(device) |
| |
| from diffusers import DPMSolverMultistepScheduler |
| pipe_txt2img.scheduler = DPMSolverMultistepScheduler.from_config(pipe_txt2img.scheduler.config) |
| pipe_txt2img.enable_attention_slicing() |
| return pipe_txt2img |
|
|
| def load_img2img(): |
| global pipe_img2img |
| if pipe_img2img is None: |
| print("Loading Inpainting model...") |
| try: |
| pipe_img2img = StableDiffusionInpaintPipeline.from_pretrained( |
| "stabilityai/stable-diffusion-2-inpainting", |
| torch_dtype=torch_dtype, |
| use_safetensors=True, |
| allow_pickle=False, |
| safety_checker=None, |
| |
| ).to(device) |
| except Exception as e: |
| print(f"Fehler beim Laden des Modells: {e}") |
| raise |
|
|
| |
| from diffusers import DPMSolverMultistepScheduler |
| pipe_img2img.scheduler = DPMSolverMultistepScheduler.from_config( |
| pipe_img2img.scheduler.config, |
| algorithm_type="sde-dpmsolver++", |
| use_karras_sigmas=True, |
| timestep_spacing="trailing" |
| ) |
| |
| pipe_img2img.enable_attention_slicing() |
| pipe_img2img.enable_vae_tiling() |
| pipe_img2img.vae_slicing = True |
|
|
| return pipe_img2img |
|
|
| |
| class TextToImageProgressCallback: |
| def __init__(self, progress, total_steps): |
| self.progress = progress |
| self.total_steps = total_steps |
| self.current_step = 0 |
| |
| def __call__(self, pipe, step, timestep, callback_kwargs): |
| """Neue Callback-Signatur für diffusers >= 1.0.0""" |
| self.current_step = step + 1 |
| progress_percent = (step / self.total_steps) * 100 |
| self.progress(progress_percent / 100, desc="Generierung läuft - CPU benötigt bis zu 20 Minuten!") |
| return callback_kwargs |
|
|
| class ImageToImageProgressCallback: |
| def __init__(self, progress, total_steps, strength): |
| self.progress = progress |
| self.total_steps = total_steps |
| self.current_step = 0 |
| self.strength = strength |
| self.actual_total_steps = None |
|
|
| def __call__(self, pipe, step, timestep, callback_kwargs): |
| """Neue Callback-Signatur für diffusers >= 1.0.0""" |
| self.current_step = step + 1 |
| |
| |
| if self.actual_total_steps is None: |
| |
| if self.strength < 1.0: |
| self.actual_total_steps = int(self.total_steps * self.strength) |
| else: |
| self.actual_total_steps = self.total_steps |
| |
| print(f"🎯 INTERNE STEP-AUSGABE: Strength {self.strength} → {self.actual_total_steps} tatsächliche Denoising-Schritte") |
| |
| progress_percent = (step / self.actual_total_steps) * 100 |
| self.progress(progress_percent / 100, desc="Generierung läuft - CPU benötigt bis zu 20 Minuten!") |
| return callback_kwargs |
|
|
| |
| def text_to_image(prompt, steps, guidance_scale, progress=gr.Progress()): |
| try: |
| if not prompt or not prompt.strip(): |
| return None |
| |
| print(f"Starting generation for: {prompt}") |
| start_time = time.time() |
| |
| |
| progress(0, desc="Generierung läuft - CPU benötigt bis zu 20 Minuten!") |
| |
| pipe = load_txt2img() |
| |
| |
| seed = random.randint(0, 2**32 - 1) |
| generator = torch.Generator(device=device).manual_seed(seed) |
| print(f"Using seed: {seed}") |
| |
| |
| callback = TextToImageProgressCallback(progress, steps) |
| |
| image = pipe( |
| prompt=prompt, |
| height=IMG_SIZE, |
| width=IMG_SIZE, |
| num_inference_steps=int(steps), |
| guidance_scale=guidance_scale, |
| generator=generator, |
| callback_on_step_end=callback, |
| callback_on_step_end_tensor_inputs=[], |
| ).images[0] |
| |
| end_time = time.time() |
| print(f"Bild generiert in {end_time - start_time:.2f} Sekunden") |
| |
| |
| return image |
| |
| except Exception as e: |
| print(f"Fehler: {e}") |
| import traceback |
| traceback.print_exc() |
| return None |
|
|
|
|
| def img_to_image(image, prompt, neg_prompt, strength, steps, guidance_scale, face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2, progress=gr.Progress()): |
| try: |
| if image is None: |
| return None |
|
|
| print(f"Img2Img Start → Strength: {strength}, Steps: {steps}, Guidance: {guidance_scale}") |
| print(f"Prompt: {prompt}") |
| print(f"Negativ-Prompt: {neg_prompt}") |
| print(f"Gesicht beibehalten: {face_preserve}") |
| start_time = time.time() |
|
|
| |
| progress(0, desc="Generierung läuft - CPU benötigt bis zu 20 Minuten!") |
|
|
| |
| progress(0.05, desc="ControlNet: Pose-Erkennung...") |
| |
| |
| adj_strength = min(0.85, strength * 1.3) |
| actual_steps_from_strength = int(steps * adj_strength) |
| controlnet_steps = min(25, actual_steps_from_strength) |
| |
| print(f"🎯 ControlNet Step-Kalkulation: UI={steps}, Adj-Strength={adj_strength:.3f}, Echte Steps={actual_steps_from_strength}, ControlNet-Steps={controlnet_steps}") |
|
|
| |
| controlnet_strength = adj_strength * 0.5 |
| print(f"🎯 ControlNet Strength: {controlnet_strength:.3f} (50% von Inpaint-Strength {adj_strength:.3f})") |
|
|
| |
| controlnet_output, inpaint_input = controlnet_processor.generate_with_controlnet( |
| image=image, |
| prompt=prompt, |
| negative_prompt=neg_prompt, |
| steps=controlnet_steps, |
| guidance_scale=guidance_scale, |
| controlnet_strength=controlnet_strength, |
| progress=progress, |
| keep_environment=face_preserve |
| ) |
| |
| print(f"✅ ControlNet Output erhalten: {type(controlnet_output)}") |
| print(f"✅ Inpaint Input erhalten: {type(inpaint_input)}") |
| |
| |
| progress(0.3, desc="ControlNet fertig, starte Inpaint...") |
|
|
| |
| pipe = load_img2img() |
| |
| |
| img_resized = inpaint_input.convert("RGB").resize((IMG_SIZE, IMG_SIZE)) |
|
|
| |
| adj_guidance = min(guidance_scale, 12.0) |
| |
| |
| seed = random.randint(0, 2**32 - 1) |
| generator = torch.Generator(device=device).manual_seed(seed) |
| print(f"Using seed: {seed}") |
|
|
| |
| mask = None |
| bbox_coords = None |
| |
| if bbox_x1 is not None and bbox_y1 is not None and bbox_x2 is not None and bbox_y2 is not None: |
| |
| orig_width, orig_height = image.size |
| scale_x = IMG_SIZE / orig_width |
| scale_y = IMG_SIZE / orig_height |
| |
| scaled_coords = [ |
| int(bbox_x1 * scale_x), |
| int(bbox_y1 * scale_y), |
| int(bbox_x2 * scale_x), |
| int(bbox_y2 * scale_y) |
| ] |
| bbox_coords = scaled_coords |
| print(f"Skalierte Koordinaten: {scaled_coords}") |
|
|
| |
| if bbox_coords: |
| mask = create_face_mask(img_resized, bbox_coords, face_preserve) |
| if mask: |
| print("Maske erfolgreich erstellt") |
| else: |
| print("Keine gültigen Koordinaten - keine Maske angewendet") |
| mask = None |
|
|
| |
| print(f"⚙️ PIPELINE-KONFIGURATION:") |
| print(f" - Angefordert: {int(steps)} Steps") |
| print(f" - Strength: {adj_strength:.3f}") |
| print(f" - Scheduler: {pipe.scheduler.__class__.__name__}") |
|
|
| print(f"🎯 KORREKTE INTERNE STEP-AUSGABE: {int(steps)} Steps × Strength {adj_strength:.3f} = {actual_steps_from_strength} tatsächliche Schritte") |
|
|
| |
| callback = ImageToImageProgressCallback(progress, int(steps), adj_strength) |
|
|
| |
| result = pipe( |
| prompt=prompt, |
| negative_prompt=neg_prompt, |
| image=img_resized, |
| mask_image=mask, |
| strength=adj_strength, |
| num_inference_steps=int(steps), |
| guidance_scale=adj_guidance, |
| generator=generator, |
| callback_on_step_end=callback, |
| callback_on_step_end_tensor_inputs=[], |
| ) |
|
|
| |
| try: |
| scheduler = pipe.scheduler |
| print(f"🔧 SCHEDULER-INFO: {scheduler.__class__.__name__}") |
| print(f"📊 TATSÄCHLICHE STEP-KONFIGURATION: {int(steps)} Schritte mit Strength {adj_strength:.3f}") |
| |
| if hasattr(scheduler, 'timesteps'): |
| actual_steps = len(scheduler.timesteps) |
| print(f"🎯 BESTÄTIGTE INTERNE STEP-AUSGABE: Scheduler verwendete {actual_steps} tatsächliche Denoising-Schritte") |
| |
| except Exception as e: |
| print(f"⚠️ Konnte Scheduler-Info nicht auslesen: {e}") |
|
|
| end_time = time.time() |
| print(f"Bild transformiert in {end_time - start_time:.2f} Sekunden") |
| |
| generated_image = result.images[0] |
|
|
| return generated_image |
| |
| except Exception as e: |
| print(f"Fehler: {e}") |
| import traceback |
| traceback.print_exc() |
| return None |
|
|
|
|
|
|
| def update_bbox_from_image(image): |
| """Aktualisiert die Bounding-Box-Koordinaten wenn ein Bild hochgeladen wird""" |
| if image is None: |
| return None, None, None, None |
| |
| bbox = auto_detect_face_area(image) |
| return bbox[0], bbox[1], bbox[2], bbox[3] |
|
|
| def main_ui(): |
| with gr.Blocks( |
| title="AI Image Generator", |
| theme=gr.themes.Base(), |
| css=""" |
| .info-box { |
| background-color: #f8f4f0; |
| padding: 15px; |
| border-radius: 8px; |
| border-left: 4px solid #8B7355; |
| margin: 20px 0; |
| } |
| .clickable-file { |
| color: #1976d2; |
| cursor: pointer; |
| text-decoration: none; |
| font-family: 'Monaco', 'Consolas', monospace; |
| background: #e3f2fd; |
| padding: 2px 6px; |
| border-radius: 4px; |
| border: 1px solid #bbdefb; |
| } |
| .clickable-file:hover { |
| background: #bbdefb; |
| text-decoration: underline; |
| } |
| #start-button { |
| background-color: #0080FF !important; |
| border: none !important; |
| margin: 50px auto !important; |
| display: block !important; |
| font-weight: 600; |
| width: 280px; |
| } |
| #start-button:hover { |
| background-color: #D3D3D3 !important; |
| } |
| .hint-box { |
| margin-top: 20px; |
| } |
| .custom-text { |
| font-size: 25px !important; |
| } |
| .image-upload .svelte-1p4f8co { |
| display: block !important; |
| } |
| """ |
| ) as demo: |
|
|
| |
|
|
| |
| with gr.Column(visible=True) as content_area: |
| with gr.Tab("Text zu Bild"): |
| gr.Markdown("**Beschreibe dein gewünschtes Bild (maximal 77 Token):**") |
| |
| with gr.Row(): |
| txt_input = gr.Textbox( |
| placeholder="z.B. ultra realistic mountain landscape at sunrise, soft mist over the valley, detailed foliage, crisp textures, depth of field, sunlight rays through clouds, shot on medium format camera, 8k, HDR, hyper-detailed, natural lighting, masterpiece, Eingabe unten:(Schritt Inferenz:35, Prompt-Stärke:9)", |
| lines=2, |
| label="Prompt (Englisch)", |
| info="Beschreibe detailliert, was du sehen möchtest. Verwende Kommas zur Trennung." |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| txt_steps = gr.Slider( |
| minimum=10, maximum=100, value=35, step=1, |
| label="Inferenz-Schritte", |
| info="Mehr Schritte = bessere Qualität, aber langsamer (20-50 empfohlen)" |
| ) |
| with gr.Column(): |
| txt_guidance = gr.Slider( |
| minimum=1.0, maximum=20.0, value=7.5, step=0.5, |
| label="Prompt-Stärke", |
| info="Wie stark der Prompt befolgt wird (7-12 für gute Balance)" |
| ) |
| |
| generate_btn = gr.Button("Bild generieren", variant="primary") |
| txt_output = gr.Image( |
| label="Generiertes Bild", |
| show_download_button=True, |
| type="pil" |
| ) |
| |
| generate_btn.click( |
| fn=text_to_image, |
| inputs=[txt_input, txt_steps, txt_guidance], |
| outputs=txt_output, |
| concurrency_limit=1 |
| ) |
|
|
| with gr.Tab("Bild zu Bild"): |
| gr.Markdown("**Lade ein Bild hoch und beschreibe die gewünschte Veränderung:**") |
| |
| with gr.Row(): |
| img_input = gr.Image( |
| type="pil", |
| label="Eingabebild", |
| height=300, |
| sources=["upload"] |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| img_prompt = gr.Textbox( |
| placeholder="change background to beach with palm trees, keep person unchanged, sunny day", |
| lines=2, |
| label="Transformations-Prompt (Englisch - maximal 77 Token)", |
| info="Was soll verändert werden? Sei spezifisch." |
| ) |
| with gr.Column(): |
| img_neg_prompt = gr.Textbox( |
| placeholder="blurry, deformed, ugly, bad anatomy, extra limbs, poorly drawn hands", |
| lines=2, |
| label="Negativ-Prompt (Englisch - maximal 77 Token)", |
| info="Was soll vermieden werden? Unerwünschte Elemente auflisten." |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| strength_slider = gr.Slider( |
| minimum=0.1, maximum=0.9, value=0.4, step=0.05, |
| label="Veränderungs-Stärke", |
| info="0.1-0.3: Leichte Anpassungen, 0.4-0.6: Mittlere Veränderungen, 0.7-0.9: Starke Umgestaltung" |
| ) |
| with gr.Column(): |
| img_steps = gr.Slider( |
| minimum=10, maximum=100, value=35, step=1, |
| label="Inferenz-Schritte", |
| info="Anzahl der Verarbeitungsschritte (25-45 für gute Ergebnisse)" |
| ) |
| with gr.Column(): |
| img_guidance = gr.Slider( |
| minimum=1.0, maximum=20.0, value=7.5, step=0.5, |
| label="Prompt-Stärke", |
| info="Einfluss des Prompts auf das Ergebnis (6-10 für natürliche Ergebnisse)" |
| ) |
| |
| |
| with gr.Row(): |
| face_preserve = gr.Checkbox( |
| label="Gesicht, Tier, Gegenstand beibehalten", |
| value=True, |
| info="Aktiviert: Bildelement bleibt erhalten, Hintergrund wird verändert | Deaktiviert: Nur Bildelement wird verändert" |
| ) |
| |
| with gr.Row(): |
| gr.Markdown("**Bildelementbereich anpassen**") |
| |
| with gr.Row(): |
| bbox_x1 = gr.Number( |
| label="Links (x1)", |
| value=100, |
| precision=0, |
| info="Linke Kante des Gesichtsbereichs" |
| ) |
| bbox_y1 = gr.Number( |
| label="Oben (y1)", |
| value=100, |
| precision=0, |
| info="Obere Kante des Gesichtsbereichs" |
| ) |
| bbox_x2 = gr.Number( |
| label="Rechts (x2)", |
| value=300, |
| precision=0, |
| info="Rechte Kante des Gesichtsbereichs" |
| ) |
| bbox_y2 = gr.Number( |
| label="Unten (y2)", |
| value=300, |
| precision=0, |
| info="Untere Kante des Gesichtsbereichs" |
| ) |
|
|
| with gr.Row(): |
| gr.Markdown( |
| "**Achtung:**\n" |
| "• **Automatische Bildelementerkennung** setzt Koordinaten beim Upload\n" |
| "• **Koordinaten nur bei erkennbaren Verzerrungen anpassen** (Bereiche leicht verschieben)" |
| ) |
| |
| |
| transform_btn = gr.Button("Bild transformieren", variant="primary") |
| |
| with gr.Row(): |
| img_output = gr.Image( |
| label="Transformiertes Bild", |
| show_download_button=True, |
| type="pil" |
| ) |
| |
| |
| img_input.change( |
| fn=update_bbox_from_image, |
| inputs=[img_input], |
| outputs=[bbox_x1, bbox_y1, bbox_x2, bbox_y2] |
| ) |
| |
| transform_btn.click( |
| fn=img_to_image, |
| inputs=[ |
| img_input, img_prompt, img_neg_prompt, |
| strength_slider, img_steps, img_guidance, |
| face_preserve, bbox_x1, bbox_y1, bbox_x2, bbox_y2 |
| ], |
| outputs=img_output, |
| concurrency_limit=1 |
| ) |
|
|
| return demo |
|
|
|
|
| if __name__ == "__main__": |
| demo = main_ui() |
| demo.queue() |
| demo.launch( |
| server_name="0.0.0.0", |
| server_port=7860, |
| max_file_size="10MB", |
| show_error=True, |
| share=False |
| ) |