video-ffmpeg

Sleeping

App Files Files Community

Tim13ekd commited on Dec 14, 2025

Commit

c5cfcb5

verified ·

1 Parent(s): 4ce04cf

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -86

app.py CHANGED Viewed

@@ -4,20 +4,29 @@ from pathlib import Path
 import uuid
 import subprocess
 import shutil
-import shlex  # Für sicheres Escapen von Text
 # Erlaubte Dateiformate
 allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
 allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
 def save_temp_audio(audio_file):
-    """
-    Speichert die hochgeladene Datei sicher in einem temporären Verzeichnis.
-    """
     if isinstance(audio_file, str):
         ext = Path(audio_file).suffix
         if ext.lower() not in allowed_audios:
-            ext = ".mp3"  # Standard, falls Endung fehlt
         temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
         with open(temp_audio, "wb") as f:
             f.write(audio_file.encode())
@@ -31,142 +40,139 @@ def save_temp_audio(audio_file):
         with open(temp_audio, "wb") as f:
             shutil.copyfileobj(audio_file, f)
         return temp_audio
-    else:
-        raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")
-def generate_slideshow_with_audio(images, input_text, duration_per_word=0.5, duration_per_image=3, fade_duration=0.7, font_size=60, y_pos=0.5, audio_file=None):
     if not images:
         return None, "❌ Keine Bilder ausgewählt"
     temp_dir = tempfile.mkdtemp()
-    clips = []
     # Text in Wörter aufteilen
-    words = input_text.split()
-    total_words = len(words)
-    # Falls Audio vorhanden ist, speichern wir es einfach als temporäre Datei
     temp_audio_file = None
     if audio_file:
         temp_audio_file = save_temp_audio(audio_file)
-    # Clips für jedes Bild erstellen
-    clips_with_text = []
-    word_index = 0
     for i, img_path in enumerate(images):
-        img_path = Path(img_path.name)  # Sicherstellen, dass es den richtigen Pfad hat
         clip_path_with_text = Path(temp_dir) / f"clip_with_text_{i}.mp4"
-        # Berechne Start- und Endzeit für jedes Wort
-        start_time = i * duration_per_image
-        end_time = (i + 1) * duration_per_image
-        # Text-Filters für jedes Wort
         if word_index < len(words):
             text = words[word_index]
             word_index += 1
         else:
-            text = ""  # Falls keine weiteren Wörter, leeres Text
         vf_filters = (
             "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
             "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
-            "fps=25,format=yuv420p"
-            f",drawtext=text='{shlex.quote(text)}':fontcolor=white:fontsize={font_size}:borderw=2:"
             f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
             f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
         )
-        # Bild als Video mit Text Overlay erstellen
         cmd = [
-            "ffmpeg",
-            "-y",
-            "-loop", "1",
-            "-i", str(img_path),
             "-t", str(duration_per_image),
             "-vf", vf_filters,
             str(clip_path_with_text)
         ]
         try:
             subprocess.run(cmd, check=True, capture_output=True, text=True)
         except subprocess.CalledProcessError as e:
-            return None, f"❌ FFmpeg Fehler bei Text Overlay für Bild {i+1}:\n{e.stderr}"
-        clips_with_text.append(clip_path_with_text)
-    # Zusammenfügen der Clips mit Text
-    filelist_with_text_path = Path(temp_dir) / "filelist_with_text.txt"
-    with open(filelist_with_text_path, "w") as f:
         for clip in clips_with_text:
             f.write(f"file '{clip}'\n")
-    output_with_text_file = Path(temp_dir) / f"slideshow_with_text_{uuid.uuid4().hex}.mp4"
-    cmd_concat_with_text = [
-        "ffmpeg",
-        "-y",
-        "-f", "concat",
-        "-safe", "0",
-        "-i", str(filelist_with_text_path),
-        "-c:v", "libx264",
-        "-pix_fmt", "yuv420p",
-        str(output_with_text_file)
     ]
-    try:
-        subprocess.run(cmd_concat_with_text, check=True, capture_output=True, text=True)
-    except subprocess.CalledProcessError as e:
-        return None, f"❌ FFmpeg Fehler beim Zusammenfügen der Clips mit Text:\n{e.stderr}"
-    # Audio hinzufügen, falls vorhanden
     if temp_audio_file:
-        final_output = Path(temp_dir) / f"slideshow_with_audio_{uuid.uuid4().hex}.mp4"
         cmd_audio = [
-            "ffmpeg",
-            "-y",
-            "-i", str(output_with_text_file),
-            "-i", str(temp_audio_file),
-            "-c:v", "copy",
-            "-c:a", "aac",
-            "-shortest",
             str(final_output)
         ]
-        try:
-            subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
-            return str(final_output), "✅ Slideshow mit Audio und Text Overlay erstellt"
-        except subprocess.CalledProcessError as e:
-            return None, f"❌ FFmpeg Fehler beim Hinzufügen des Audios:\n{e.stderr}"
-    return str(output_with_text_file), "✅ Slideshow mit Text Overlay erstellt (ohne Audio)"
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# Slideshow mit Audio und Text Overlay")
-    img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
-    text_input = gr.Textbox(
-        label="Text eingeben",
-        placeholder="Gib hier den Text ein, der Wort für Wort eingeblendet werden soll",
-        lines=5
-    )
-    duration_word_input = gr.Number(value=0.5, label="Dauer pro Wort in Sekunden", precision=1)
-    duration_image_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
-    fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
-    ypos_input = gr.Slider(minimum=0.0, maximum=0.9, step=0.01, value=0.5, label="Y-Position für alle Texte (0=oben, 0.5=mitte, 0.9=unten)")
-    font_size_input = gr.Number(value=60, label="Textgröße (px)")
-    audio_input = gr.File(
-        label="Audio hinzufügen (optional)",
-        file_types=allowed_audios
-    )
-    out_video = gr.Video(interactive=False, label="Generiertes Video")
-    status = gr.Textbox(interactive=False, label="Status")
-    btn = gr.Button("Video erstellen")
     btn.click(
         fn=generate_slideshow_with_audio,
-        inputs=[img_input, text_input, duration_word_input, duration_image_input, ypos_input, fade_input, font_size_input, audio_input],
         outputs=[out_video, status]
     )
-demo.launch()

 import uuid
 import subprocess
 import shutil
+import os
 # Erlaubte Dateiformate
 allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
 allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
+def get_font_path():
+    """Versucht, eine Standard-Schriftart im Linux-System zu finden."""
+    possible_fonts = [
+        "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+        "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+        "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"
+    ]
+    for font in possible_fonts:
+        if os.path.exists(font):
+            return font
+    return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht)
 def save_temp_audio(audio_file):
     if isinstance(audio_file, str):
         ext = Path(audio_file).suffix
         if ext.lower() not in allowed_audios:
+            ext = ".mp3"
         temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
         with open(temp_audio, "wb") as f:
             f.write(audio_file.encode())
         with open(temp_audio, "wb") as f:
             shutil.copyfileobj(audio_file, f)
         return temp_audio
+    return None
+def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
+    # Debug Print, um zu sehen, ob Werte korrekt ankommen
+    print(f"DEBUG: Font Size: {font_size}, Y-Pos: {y_pos}, Fade: {fade_duration}")
     if not images:
         return None, "❌ Keine Bilder ausgewählt"
     temp_dir = tempfile.mkdtemp()
+    clips_with_text = []
     # Text in Wörter aufteilen
+    words = input_text.split() if input_text else []
+    word_index = 0
+    # Audio verarbeiten
     temp_audio_file = None
     if audio_file:
         temp_audio_file = save_temp_audio(audio_file)
+    # Schriftart finden
+    font_path = get_font_path()
+    font_option = f":fontfile='{font_path}'" if font_path else ""
     for i, img_path in enumerate(images):
+        img_path = Path(img_path.name)
         clip_path_with_text = Path(temp_dir) / f"clip_with_text_{i}.mp4"
+        # Aktuelles Wort holen
         if word_index < len(words):
             text = words[word_index]
             word_index += 1
         else:
+            text = ""
+        # WICHTIG: Text in temporäre Datei schreiben, um Escaping-Probleme zu vermeiden
+        text_file_path = Path(temp_dir) / f"text_{i}.txt"
+        with open(text_file_path, "w", encoding="utf-8") as f:
+            f.write(text)
+        # Drawtext Filter mit textfile statt text='...'
+        # box=1 macht einen leichten Hintergrund hinter den Text für Lesbarkeit
         vf_filters = (
             "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
             "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
+            "fps=25,format=yuv420p,"
+            f"drawtext=textfile='{text_file_path}'{font_option}:fontcolor=white:fontsize={font_size}:borderw=2:bordercolor=black:"
             f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
             f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
         )
         cmd = [
+            "ffmpeg", "-y", "-loop", "1", "-i", str(img_path),
             "-t", str(duration_per_image),
             "-vf", vf_filters,
             str(clip_path_with_text)
         ]
         try:
             subprocess.run(cmd, check=True, capture_output=True, text=True)
+            clips_with_text.append(clip_path_with_text)
         except subprocess.CalledProcessError as e:
+            return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
+    # Zusammenfügen
+    filelist_path = Path(temp_dir) / "filelist.txt"
+    with open(filelist_path, "w") as f:
         for clip in clips_with_text:
             f.write(f"file '{clip}'\n")
+    output_video = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4"
+    cmd_concat = [
+        "ffmpeg", "-y", "-f", "concat", "-safe", "0",
+        "-i", str(filelist_path),
+        "-c:v", "libx264", "-pix_fmt", "yuv420p",
+        str(output_video)
     ]
+    subprocess.run(cmd_concat, check=True)
+    # Audio hinzufügen falls vorhanden
     if temp_audio_file:
+        final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4"
         cmd_audio = [
+            "ffmpeg", "-y", "-i", str(output_video), "-i", str(temp_audio_file),
+            "-c:v", "copy", "-c:a", "aac", "-shortest",
             str(final_output)
         ]
+        subprocess.run(cmd_audio, check=True)
+        return str(final_output), "✅ Video mit Audio erstellt!"
+    return str(output_video), "✅ Video erstellt (ohne Audio)"
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# Slideshow Generator")
+    with gr.Row():
+        img_input = gr.Files(label="Bilder", file_types=allowed_medias)
+        text_input = gr.Textbox(label="Text", lines=5, placeholder="Wörter werden auf Bilder verteilt")
+    with gr.Row():
+        duration_image_input = gr.Number(value=3, label="Dauer pro Bild (s)")
+        fade_input = gr.Number(value=0.5, label="Fade Dauer (s)")
+        font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
+        ypos_input = gr.Slider(0.0, 1.0, value=0.5, label="Y-Position (0=Oben, 1=Unten)")
+    # Dummy Input für duration_per_word (wird im Script aktuell nicht genutzt, aber die Funk erwartet ihn)
+    duration_word_input = gr.Number(value=0.5, visible=False)
+    audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios)
+    btn = gr.Button("Erstellen", variant="primary")
+    out_video = gr.Video(label="Ergebnis")
+    status = gr.Textbox(label="Status")
+    # KORREKTE REIHENFOLGE DER INPUTS:
+    # (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file)
     btn.click(
         fn=generate_slideshow_with_audio,
+        inputs=[
+            img_input,
+            text_input,
+            duration_word_input,
+            duration_image_input,
+            fade_input,       # War vorher vertauscht
+            font_size_input,  # War vorher vertauscht
+            ypos_input,       # War vorher vertauscht
+            audio_input
+        ],
         outputs=[out_video, status]
     )
+demo.launch()