video-ffmpeg

Sleeping

App Files Files Community

Tim13ekd commited on Dec 15, 2025

Commit

de9f244

verified ·

1 Parent(s): 2ae5fc5

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -29

app.py CHANGED Viewed

@@ -47,15 +47,15 @@ def save_temp_audio(audio_file_path):
              shutil.rmtree(temp_audio_dir)
         return None, None
-def create_sentence_base_filter(full_text, duration_clip, font_option, font_size, y_pos, style):
     """
-    Erstellt den FFmpeg drawtext Filter für die Basisschicht (den gesamten Satzabschnitt).
-    Dies ist die "Hintergrund"-Untertitelspur.
     """
     # Standard-Stil
     base_params = {
         "fontcolor": "white",
-        "borderw": 2,
         "bordercolor": "black",
         "box": 0, "boxcolor": "",
         "fontsize": font_size
@@ -63,46 +63,49 @@ def create_sentence_base_filter(full_text, duration_clip, font_option, font_size
     style_lower = style.lower()
-    # SPEZIALFALL: Modern Style (graue Hintergrundbox, wie YouTube)
     if style_lower == "modern":
         base_params["box"] = 1
-        # Dunkelgrau (0x444444) mit 60% Transparenz (@0.6)
-        base_params["boxcolor"] = "0x444444@0.6"
-        base_params["borderw"] = 0 # Kein Text-Rand bei Hintergrundbox
         base_params["fontsize"] = font_size
-    # SPEZIALFALL: Pop Style (Box wird von der Basisschicht gezeichnet)
     elif style_lower == "pop":
         base_params["box"] = 1
         base_params["boxcolor"] = "0x000000@0.6"
         base_params["fontsize"] = font_size * 1.1
-    escaped_text = full_text.replace(':', FFMPEG_ESCAPE_CHAR + ':')
-    # Filter für den gesamten Satz, sichtbar für die gesamte Clip-Dauer
     drawtext_filter = (
         f"drawtext=text='{escaped_text}':"
         f"fontcolor={base_params['fontcolor']}:"
         f"fontsize={base_params['fontsize']}:"
         f"borderw={base_params['borderw']}:"
         f"bordercolor={base_params['bordercolor']}:"
-        # boxborderw=10 fügt etwas Polsterung um die Box hinzu
         + (f"box={base_params['box']}:boxcolor={base_params['boxcolor']}:boxborderw=10:" if base_params["box"] else "") +
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
-    # Fügt fontfile nur hinzu, wenn vorhanden und vermeidet doppelte Doppelpunkte
     if font_option:
         drawtext_filter += f":{font_option}"
-    drawtext_filter += f":enable='between(t, 0, {duration_clip})'"
     return drawtext_filter
-def create_highlight_word_filter(word, full_text, start_time, duration, font_option, font_size, y_pos, style):
     """
     Erstellt den FFmpeg drawtext Filter für die Highlight-Schicht (nur das aktive Wort).
-    Dies ist die "Hervorhebungs"-Spur.
     """
     word_end_time = start_time + duration
@@ -125,10 +128,11 @@ def create_highlight_word_filter(word, full_text, start_time, duration, font_opt
     style_lower = style.lower()
     if style_lower == "modern":
-        # Modern: Minimaler Highlight. Gelbe Schrift über der Box des Basissatzes.
         params["fontcolor"] = "yellow"
-        params["borderw"] = 0 # Kein Rand, da Box vorhanden
         params["fontsize_override"] = font_size * 1.05
     elif style_lower == "bold":
@@ -172,7 +176,6 @@ def create_highlight_word_filter(word, full_text, start_time, duration, font_opt
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
-    # Fügt fontfile nur hinzu, wenn vorhanden und vermeidet doppelte Doppelpunkte
     if font_option:
         drawtext_filter += f":{font_option}"
@@ -201,7 +204,6 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
     # Schriftart finden
     font_path = get_font_path()
-    # font_option enthält NUR den Parameter-Teil, OHNE führenden Doppelpunkt.
     font_option = f"fontfile='{font_path}'" if font_path else ""
     # Audio verarbeiten
@@ -227,15 +229,38 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
         drawtext_filters = []
         if full_text:
-            # ERSTE SCHICHT: Der gesamte Satz (als Basis)
-            base_filter = create_sentence_base_filter(full_text, duration_clip, font_option, font_size, y_pos, subtitle_style)
-            drawtext_filters.append(base_filter)
-            # ZWEITE SCHICHT: Highlight-Layer für jedes Wort
             word_start_time = 0.0
-            for word in word_segment:
-                highlight_filter = create_highlight_word_filter(word, full_text, word_start_time, duration_per_word, font_option, font_size, y_pos, subtitle_style)
                 drawtext_filters.append(highlight_filter)
                 word_start_time += duration_per_word
@@ -252,6 +277,7 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
         # 4. Kombiniere alle Filter
         if drawtext_filters:
             all_drawtext_filters = ",".join(drawtext_filters)
             vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
         else:
@@ -328,7 +354,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         img_input = gr.Files(label="Bilder", file_types=allowed_medias)
-        text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Der Satzabschnitt ist pro Clip sichtbar. Das aktive Wort wird hervorgehoben.")
     with gr.Row():
         duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")

              shutil.rmtree(temp_audio_dir)
         return None, None
+def create_cumulative_base_filter(text_to_draw, start_time, font_option, font_size, y_pos, style):
     """
+    Erstellt den FFmpeg drawtext Filter für die Basisschicht des kumulierten Textes.
+    Dieser Text bleibt ab start_time bis zum Ende des Clips sichtbar.
     """
     # Standard-Stil
     base_params = {
         "fontcolor": "white",
+        "borderw": 0,
         "bordercolor": "black",
         "box": 0, "boxcolor": "",
         "fontsize": font_size
     style_lower = style.lower()
+    # --- STYLES FÜR DIE BASISSCHICHT (Der Satz selbst) ---
     if style_lower == "modern":
+        # Modern: Graue, semi-transparente Hintergrundbox (HINWEIS: FFmpeg unterstützt keine abgerundeten Ecken)
         base_params["box"] = 1
+        base_params["boxcolor"] = "0x444444@0.6" # Dunkelgrau mit 60% Transparenz
+        base_params["fontcolor"] = "white"
+        base_params["borderw"] = 0
         base_params["fontsize"] = font_size
     elif style_lower == "pop":
+        # Pop: Schwarze, semi-transparente Hintergrundbox
         base_params["box"] = 1
         base_params["boxcolor"] = "0x000000@0.6"
         base_params["fontsize"] = font_size * 1.1
+        base_params["borderw"] = 0
+    # Für andere Stile wird die Basisschicht ohne Box oder Rand gezeichnet (falls sie überhaupt gebraucht wird)
+    escaped_text = text_to_draw.replace(':', FFMPEG_ESCAPE_CHAR + ':')
+    # Filter für den gesamten Satz, der ab start_time sichtbar wird
     drawtext_filter = (
         f"drawtext=text='{escaped_text}':"
         f"fontcolor={base_params['fontcolor']}:"
         f"fontsize={base_params['fontsize']}:"
         f"borderw={base_params['borderw']}:"
         f"bordercolor={base_params['bordercolor']}:"
+        # boxborderw=10 fügt Polsterung hinzu
         + (f"box={base_params['box']}:boxcolor={base_params['boxcolor']}:boxborderw=10:" if base_params["box"] else "") +
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
     if font_option:
         drawtext_filter += f":{font_option}"
+    # enable='gt(t, {start_time})' sorgt dafür, dass dieser Text dauerhaft ab start_time angezeigt wird
+    drawtext_filter += f":enable='gt(t, {start_time - 0.05})'" # -0.05 für nahtlosen Übergang
     return drawtext_filter
+def create_highlight_word_filter(word, start_time, duration, font_option, font_size, y_pos, style):
     """
     Erstellt den FFmpeg drawtext Filter für die Highlight-Schicht (nur das aktive Wort).
     """
     word_end_time = start_time + duration
     style_lower = style.lower()
+    # --- STYLES FÜR DIE HIGHLIGHT-SCHICHT (Das aktuell hervorgehobene Wort) ---
     if style_lower == "modern":
+        # Modern: Gelbe Schrift über dem Basissatz
         params["fontcolor"] = "yellow"
+        params["borderw"] = 0
         params["fontsize_override"] = font_size * 1.05
     elif style_lower == "bold":
         f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
     )
     if font_option:
         drawtext_filter += f":{font_option}"
     # Schriftart finden
     font_path = get_font_path()
     font_option = f"fontfile='{font_path}'" if font_path else ""
     # Audio verarbeiten
         drawtext_filters = []
         if full_text:
+            cumulative_text_list = []
             word_start_time = 0.0
+            for j, word in enumerate(word_segment):
+                # Aktualisiere den kumulierten Text
+                cumulative_text_list.append(word)
+                current_cumulative_text = " ".join(cumulative_text_list)
+                # ERSTE SCHICHT: Kumulierter Basistext (wird ab diesem Wort permanent sichtbar)
+                base_cumulative_filter = create_cumulative_base_filter(
+                    current_cumulative_text,
+                    word_start_time,
+                    font_option,
+                    font_size,
+                    y_pos,
+                    subtitle_style
+                )
+                drawtext_filters.append(base_cumulative_filter)
+                # ZWEITE SCHICHT: Highlight-Layer (fadet ein und aus)
+                highlight_filter = create_highlight_word_filter(
+                    word,
+                    word_start_time,
+                    duration_per_word,
+                    font_option,
+                    font_size,
+                    y_pos,
+                    subtitle_style
+                )
                 drawtext_filters.append(highlight_filter)
                 word_start_time += duration_per_word
         # 4. Kombiniere alle Filter
         if drawtext_filters:
+            # Wichtig: Die Filter werden in der Reihenfolge angewendet, d.h. der letzte Filter liegt oben.
             all_drawtext_filters = ",".join(drawtext_filters)
             vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
         else:
     with gr.Row():
         img_input = gr.Files(label="Bilder", file_types=allowed_medias)
+        text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort im Basissatz wird nach und nach hinzugefügt.")
     with gr.Row():
         duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")