Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -47,15 +47,15 @@ def save_temp_audio(audio_file_path):
|
|
| 47 |
shutil.rmtree(temp_audio_dir)
|
| 48 |
return None, None
|
| 49 |
|
| 50 |
-
def
|
| 51 |
"""
|
| 52 |
-
Erstellt den FFmpeg drawtext Filter für die Basisschicht
|
| 53 |
-
|
| 54 |
"""
|
| 55 |
# Standard-Stil
|
| 56 |
base_params = {
|
| 57 |
"fontcolor": "white",
|
| 58 |
-
"borderw":
|
| 59 |
"bordercolor": "black",
|
| 60 |
"box": 0, "boxcolor": "",
|
| 61 |
"fontsize": font_size
|
|
@@ -63,46 +63,49 @@ def create_sentence_base_filter(full_text, duration_clip, font_option, font_size
|
|
| 63 |
|
| 64 |
style_lower = style.lower()
|
| 65 |
|
| 66 |
-
#
|
| 67 |
if style_lower == "modern":
|
|
|
|
| 68 |
base_params["box"] = 1
|
| 69 |
-
# Dunkelgrau
|
| 70 |
-
base_params["
|
| 71 |
-
base_params["borderw"] = 0
|
| 72 |
base_params["fontsize"] = font_size
|
| 73 |
|
| 74 |
-
# SPEZIALFALL: Pop Style (Box wird von der Basisschicht gezeichnet)
|
| 75 |
elif style_lower == "pop":
|
|
|
|
| 76 |
base_params["box"] = 1
|
| 77 |
base_params["boxcolor"] = "0x000000@0.6"
|
| 78 |
base_params["fontsize"] = font_size * 1.1
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
# Filter für den gesamten Satz,
|
| 83 |
drawtext_filter = (
|
| 84 |
f"drawtext=text='{escaped_text}':"
|
| 85 |
f"fontcolor={base_params['fontcolor']}:"
|
| 86 |
f"fontsize={base_params['fontsize']}:"
|
| 87 |
f"borderw={base_params['borderw']}:"
|
| 88 |
f"bordercolor={base_params['bordercolor']}:"
|
| 89 |
-
# boxborderw=10 fügt
|
| 90 |
+ (f"box={base_params['box']}:boxcolor={base_params['boxcolor']}:boxborderw=10:" if base_params["box"] else "") +
|
| 91 |
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
|
| 92 |
)
|
| 93 |
|
| 94 |
-
# Fügt fontfile nur hinzu, wenn vorhanden und vermeidet doppelte Doppelpunkte
|
| 95 |
if font_option:
|
| 96 |
drawtext_filter += f":{font_option}"
|
| 97 |
|
| 98 |
-
|
|
|
|
| 99 |
return drawtext_filter
|
| 100 |
|
| 101 |
|
| 102 |
-
def create_highlight_word_filter(word,
|
| 103 |
"""
|
| 104 |
Erstellt den FFmpeg drawtext Filter für die Highlight-Schicht (nur das aktive Wort).
|
| 105 |
-
Dies ist die "Hervorhebungs"-Spur.
|
| 106 |
"""
|
| 107 |
word_end_time = start_time + duration
|
| 108 |
|
|
@@ -125,10 +128,11 @@ def create_highlight_word_filter(word, full_text, start_time, duration, font_opt
|
|
| 125 |
|
| 126 |
style_lower = style.lower()
|
| 127 |
|
|
|
|
| 128 |
if style_lower == "modern":
|
| 129 |
-
# Modern:
|
| 130 |
params["fontcolor"] = "yellow"
|
| 131 |
-
params["borderw"] = 0
|
| 132 |
params["fontsize_override"] = font_size * 1.05
|
| 133 |
|
| 134 |
elif style_lower == "bold":
|
|
@@ -172,7 +176,6 @@ def create_highlight_word_filter(word, full_text, start_time, duration, font_opt
|
|
| 172 |
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
|
| 173 |
)
|
| 174 |
|
| 175 |
-
# Fügt fontfile nur hinzu, wenn vorhanden und vermeidet doppelte Doppelpunkte
|
| 176 |
if font_option:
|
| 177 |
drawtext_filter += f":{font_option}"
|
| 178 |
|
|
@@ -201,7 +204,6 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
|
|
| 201 |
|
| 202 |
# Schriftart finden
|
| 203 |
font_path = get_font_path()
|
| 204 |
-
# font_option enthält NUR den Parameter-Teil, OHNE führenden Doppelpunkt.
|
| 205 |
font_option = f"fontfile='{font_path}'" if font_path else ""
|
| 206 |
|
| 207 |
# Audio verarbeiten
|
|
@@ -227,15 +229,38 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
|
|
| 227 |
drawtext_filters = []
|
| 228 |
|
| 229 |
if full_text:
|
| 230 |
-
|
| 231 |
-
base_filter = create_sentence_base_filter(full_text, duration_clip, font_option, font_size, y_pos, subtitle_style)
|
| 232 |
-
drawtext_filters.append(base_filter)
|
| 233 |
-
|
| 234 |
-
# ZWEITE SCHICHT: Highlight-Layer für jedes Wort
|
| 235 |
word_start_time = 0.0
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
drawtext_filters.append(highlight_filter)
|
|
|
|
| 239 |
word_start_time += duration_per_word
|
| 240 |
|
| 241 |
|
|
@@ -252,6 +277,7 @@ def generate_slideshow_with_audio(images, input_text, duration_per_word, duratio
|
|
| 252 |
|
| 253 |
# 4. Kombiniere alle Filter
|
| 254 |
if drawtext_filters:
|
|
|
|
| 255 |
all_drawtext_filters = ",".join(drawtext_filters)
|
| 256 |
vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
|
| 257 |
else:
|
|
@@ -328,7 +354,7 @@ with gr.Blocks() as demo:
|
|
| 328 |
|
| 329 |
with gr.Row():
|
| 330 |
img_input = gr.Files(label="Bilder", file_types=allowed_medias)
|
| 331 |
-
text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="
|
| 332 |
|
| 333 |
with gr.Row():
|
| 334 |
duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
|
|
|
|
| 47 |
shutil.rmtree(temp_audio_dir)
|
| 48 |
return None, None
|
| 49 |
|
| 50 |
+
def create_cumulative_base_filter(text_to_draw, start_time, font_option, font_size, y_pos, style):
|
| 51 |
"""
|
| 52 |
+
Erstellt den FFmpeg drawtext Filter für die Basisschicht des kumulierten Textes.
|
| 53 |
+
Dieser Text bleibt ab start_time bis zum Ende des Clips sichtbar.
|
| 54 |
"""
|
| 55 |
# Standard-Stil
|
| 56 |
base_params = {
|
| 57 |
"fontcolor": "white",
|
| 58 |
+
"borderw": 0,
|
| 59 |
"bordercolor": "black",
|
| 60 |
"box": 0, "boxcolor": "",
|
| 61 |
"fontsize": font_size
|
|
|
|
| 63 |
|
| 64 |
style_lower = style.lower()
|
| 65 |
|
| 66 |
+
# --- STYLES FÜR DIE BASISSCHICHT (Der Satz selbst) ---
|
| 67 |
if style_lower == "modern":
|
| 68 |
+
# Modern: Graue, semi-transparente Hintergrundbox (HINWEIS: FFmpeg unterstützt keine abgerundeten Ecken)
|
| 69 |
base_params["box"] = 1
|
| 70 |
+
base_params["boxcolor"] = "0x444444@0.6" # Dunkelgrau mit 60% Transparenz
|
| 71 |
+
base_params["fontcolor"] = "white"
|
| 72 |
+
base_params["borderw"] = 0
|
| 73 |
base_params["fontsize"] = font_size
|
| 74 |
|
|
|
|
| 75 |
elif style_lower == "pop":
|
| 76 |
+
# Pop: Schwarze, semi-transparente Hintergrundbox
|
| 77 |
base_params["box"] = 1
|
| 78 |
base_params["boxcolor"] = "0x000000@0.6"
|
| 79 |
base_params["fontsize"] = font_size * 1.1
|
| 80 |
+
base_params["borderw"] = 0
|
| 81 |
+
|
| 82 |
+
# Für andere Stile wird die Basisschicht ohne Box oder Rand gezeichnet (falls sie überhaupt gebraucht wird)
|
| 83 |
+
|
| 84 |
+
escaped_text = text_to_draw.replace(':', FFMPEG_ESCAPE_CHAR + ':')
|
| 85 |
|
| 86 |
+
# Filter für den gesamten Satz, der ab start_time sichtbar wird
|
| 87 |
drawtext_filter = (
|
| 88 |
f"drawtext=text='{escaped_text}':"
|
| 89 |
f"fontcolor={base_params['fontcolor']}:"
|
| 90 |
f"fontsize={base_params['fontsize']}:"
|
| 91 |
f"borderw={base_params['borderw']}:"
|
| 92 |
f"bordercolor={base_params['bordercolor']}:"
|
| 93 |
+
# boxborderw=10 fügt Polsterung hinzu
|
| 94 |
+ (f"box={base_params['box']}:boxcolor={base_params['boxcolor']}:boxborderw=10:" if base_params["box"] else "") +
|
| 95 |
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
|
| 96 |
)
|
| 97 |
|
|
|
|
| 98 |
if font_option:
|
| 99 |
drawtext_filter += f":{font_option}"
|
| 100 |
|
| 101 |
+
# enable='gt(t, {start_time})' sorgt dafür, dass dieser Text dauerhaft ab start_time angezeigt wird
|
| 102 |
+
drawtext_filter += f":enable='gt(t, {start_time - 0.05})'" # -0.05 für nahtlosen Übergang
|
| 103 |
return drawtext_filter
|
| 104 |
|
| 105 |
|
| 106 |
+
def create_highlight_word_filter(word, start_time, duration, font_option, font_size, y_pos, style):
|
| 107 |
"""
|
| 108 |
Erstellt den FFmpeg drawtext Filter für die Highlight-Schicht (nur das aktive Wort).
|
|
|
|
| 109 |
"""
|
| 110 |
word_end_time = start_time + duration
|
| 111 |
|
|
|
|
| 128 |
|
| 129 |
style_lower = style.lower()
|
| 130 |
|
| 131 |
+
# --- STYLES FÜR DIE HIGHLIGHT-SCHICHT (Das aktuell hervorgehobene Wort) ---
|
| 132 |
if style_lower == "modern":
|
| 133 |
+
# Modern: Gelbe Schrift über dem Basissatz
|
| 134 |
params["fontcolor"] = "yellow"
|
| 135 |
+
params["borderw"] = 0
|
| 136 |
params["fontsize_override"] = font_size * 1.05
|
| 137 |
|
| 138 |
elif style_lower == "bold":
|
|
|
|
| 176 |
f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}"
|
| 177 |
)
|
| 178 |
|
|
|
|
| 179 |
if font_option:
|
| 180 |
drawtext_filter += f":{font_option}"
|
| 181 |
|
|
|
|
| 204 |
|
| 205 |
# Schriftart finden
|
| 206 |
font_path = get_font_path()
|
|
|
|
| 207 |
font_option = f"fontfile='{font_path}'" if font_path else ""
|
| 208 |
|
| 209 |
# Audio verarbeiten
|
|
|
|
| 229 |
drawtext_filters = []
|
| 230 |
|
| 231 |
if full_text:
|
| 232 |
+
cumulative_text_list = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
word_start_time = 0.0
|
| 234 |
+
|
| 235 |
+
for j, word in enumerate(word_segment):
|
| 236 |
+
|
| 237 |
+
# Aktualisiere den kumulierten Text
|
| 238 |
+
cumulative_text_list.append(word)
|
| 239 |
+
current_cumulative_text = " ".join(cumulative_text_list)
|
| 240 |
+
|
| 241 |
+
# ERSTE SCHICHT: Kumulierter Basistext (wird ab diesem Wort permanent sichtbar)
|
| 242 |
+
base_cumulative_filter = create_cumulative_base_filter(
|
| 243 |
+
current_cumulative_text,
|
| 244 |
+
word_start_time,
|
| 245 |
+
font_option,
|
| 246 |
+
font_size,
|
| 247 |
+
y_pos,
|
| 248 |
+
subtitle_style
|
| 249 |
+
)
|
| 250 |
+
drawtext_filters.append(base_cumulative_filter)
|
| 251 |
+
|
| 252 |
+
# ZWEITE SCHICHT: Highlight-Layer (fadet ein und aus)
|
| 253 |
+
highlight_filter = create_highlight_word_filter(
|
| 254 |
+
word,
|
| 255 |
+
word_start_time,
|
| 256 |
+
duration_per_word,
|
| 257 |
+
font_option,
|
| 258 |
+
font_size,
|
| 259 |
+
y_pos,
|
| 260 |
+
subtitle_style
|
| 261 |
+
)
|
| 262 |
drawtext_filters.append(highlight_filter)
|
| 263 |
+
|
| 264 |
word_start_time += duration_per_word
|
| 265 |
|
| 266 |
|
|
|
|
| 277 |
|
| 278 |
# 4. Kombiniere alle Filter
|
| 279 |
if drawtext_filters:
|
| 280 |
+
# Wichtig: Die Filter werden in der Reihenfolge angewendet, d.h. der letzte Filter liegt oben.
|
| 281 |
all_drawtext_filters = ",".join(drawtext_filters)
|
| 282 |
vf_filters_clip = f"{base_filters},{all_drawtext_filters},{fade_img_filter}"
|
| 283 |
else:
|
|
|
|
| 354 |
|
| 355 |
with gr.Row():
|
| 356 |
img_input = gr.Files(label="Bilder", file_types=allowed_medias)
|
| 357 |
+
text_input = gr.Textbox(label="Text (Wörter werden gleichmäßig auf alle Bilder verteilt)", lines=5, placeholder="Jedes Wort im Basissatz wird nach und nach hinzugefügt.")
|
| 358 |
|
| 359 |
with gr.Row():
|
| 360 |
duration_image_input = gr.Number(value=3, label="Mindest-Dauer pro BILD (s)")
|