Tim13ekd commited on
Commit
c5cfcb5
·
verified ·
1 Parent(s): 4ce04cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -86
app.py CHANGED
@@ -4,20 +4,29 @@ from pathlib import Path
4
  import uuid
5
  import subprocess
6
  import shutil
7
- import shlex # Für sicheres Escapen von Text
8
 
9
  # Erlaubte Dateiformate
10
  allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
11
  allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
12
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def save_temp_audio(audio_file):
14
- """
15
- Speichert die hochgeladene Datei sicher in einem temporären Verzeichnis.
16
- """
17
  if isinstance(audio_file, str):
18
  ext = Path(audio_file).suffix
19
  if ext.lower() not in allowed_audios:
20
- ext = ".mp3" # Standard, falls Endung fehlt
21
  temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
22
  with open(temp_audio, "wb") as f:
23
  f.write(audio_file.encode())
@@ -31,142 +40,139 @@ def save_temp_audio(audio_file):
31
  with open(temp_audio, "wb") as f:
32
  shutil.copyfileobj(audio_file, f)
33
  return temp_audio
34
- else:
35
- raise ValueError("Das übergebene Audio ist kein gültiges Dateiformat oder NamedString.")
 
 
 
36
 
37
- def generate_slideshow_with_audio(images, input_text, duration_per_word=0.5, duration_per_image=3, fade_duration=0.7, font_size=60, y_pos=0.5, audio_file=None):
38
  if not images:
39
  return None, "❌ Keine Bilder ausgewählt"
40
 
41
  temp_dir = tempfile.mkdtemp()
42
- clips = []
43
 
44
  # Text in Wörter aufteilen
45
- words = input_text.split()
46
- total_words = len(words)
47
 
48
- # Falls Audio vorhanden ist, speichern wir es einfach als temporäre Datei
49
  temp_audio_file = None
50
  if audio_file:
51
  temp_audio_file = save_temp_audio(audio_file)
52
 
53
- # Clips für jedes Bild erstellen
54
- clips_with_text = []
55
- word_index = 0
56
 
57
  for i, img_path in enumerate(images):
58
- img_path = Path(img_path.name) # Sicherstellen, dass es den richtigen Pfad hat
59
  clip_path_with_text = Path(temp_dir) / f"clip_with_text_{i}.mp4"
60
 
61
- # Berechne Start- und Endzeit für jedes Wort
62
- start_time = i * duration_per_image
63
- end_time = (i + 1) * duration_per_image
64
-
65
- # Text-Filters für jedes Wort
66
  if word_index < len(words):
67
  text = words[word_index]
68
  word_index += 1
69
  else:
70
- text = "" # Falls keine weiteren Wörter, leeres Text
71
 
 
 
 
 
 
 
 
72
  vf_filters = (
73
  "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
74
  "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
75
- "fps=25,format=yuv420p"
76
- f",drawtext=text='{shlex.quote(text)}':fontcolor=white:fontsize={font_size}:borderw=2:"
77
  f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
78
  f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
79
  )
80
 
81
- # Bild als Video mit Text Overlay erstellen
82
  cmd = [
83
- "ffmpeg",
84
- "-y",
85
- "-loop", "1",
86
- "-i", str(img_path),
87
  "-t", str(duration_per_image),
88
  "-vf", vf_filters,
89
  str(clip_path_with_text)
90
  ]
 
91
  try:
92
  subprocess.run(cmd, check=True, capture_output=True, text=True)
 
93
  except subprocess.CalledProcessError as e:
94
- return None, f"❌ FFmpeg Fehler bei Text Overlay für Bild {i+1}:\n{e.stderr}"
95
 
96
- clips_with_text.append(clip_path_with_text)
97
-
98
- # Zusammenfügen der Clips mit Text
99
- filelist_with_text_path = Path(temp_dir) / "filelist_with_text.txt"
100
- with open(filelist_with_text_path, "w") as f:
101
  for clip in clips_with_text:
102
  f.write(f"file '{clip}'\n")
103
 
104
- output_with_text_file = Path(temp_dir) / f"slideshow_with_text_{uuid.uuid4().hex}.mp4"
105
- cmd_concat_with_text = [
106
- "ffmpeg",
107
- "-y",
108
- "-f", "concat",
109
- "-safe", "0",
110
- "-i", str(filelist_with_text_path),
111
- "-c:v", "libx264",
112
- "-pix_fmt", "yuv420p",
113
- str(output_with_text_file)
114
  ]
115
- try:
116
- subprocess.run(cmd_concat_with_text, check=True, capture_output=True, text=True)
117
- except subprocess.CalledProcessError as e:
118
- return None, f"❌ FFmpeg Fehler beim Zusammenfügen der Clips mit Text:\n{e.stderr}"
119
 
120
- # Audio hinzufügen, falls vorhanden
121
  if temp_audio_file:
122
- final_output = Path(temp_dir) / f"slideshow_with_audio_{uuid.uuid4().hex}.mp4"
123
  cmd_audio = [
124
- "ffmpeg",
125
- "-y",
126
- "-i", str(output_with_text_file),
127
- "-i", str(temp_audio_file),
128
- "-c:v", "copy",
129
- "-c:a", "aac",
130
- "-shortest",
131
  str(final_output)
132
  ]
133
- try:
134
- subprocess.run(cmd_audio, check=True, capture_output=True, text=True)
135
- return str(final_output), "✅ Slideshow mit Audio und Text Overlay erstellt"
136
- except subprocess.CalledProcessError as e:
137
- return None, f"❌ FFmpeg Fehler beim Hinzufügen des Audios:\n{e.stderr}"
138
 
139
- return str(output_with_text_file), "✅ Slideshow mit Text Overlay erstellt (ohne Audio)"
140
 
141
  # Gradio UI
142
  with gr.Blocks() as demo:
143
- gr.Markdown("# Slideshow mit Audio und Text Overlay")
144
 
145
- img_input = gr.Files(label="Bilder auswählen (mehrere)", file_types=allowed_medias)
146
- text_input = gr.Textbox(
147
- label="Text eingeben",
148
- placeholder="Gib hier den Text ein, der Wort für Wort eingeblendet werden soll",
149
- lines=5
150
- )
151
- duration_word_input = gr.Number(value=0.5, label="Dauer pro Wort in Sekunden", precision=1)
152
- duration_image_input = gr.Number(value=3, label="Dauer pro Bild in Sekunden", precision=1)
153
- fade_input = gr.Number(value=0.7, label="Fade Dauer in Sekunden", precision=1)
154
- ypos_input = gr.Slider(minimum=0.0, maximum=0.9, step=0.01, value=0.5, label="Y-Position für alle Texte (0=oben, 0.5=mitte, 0.9=unten)")
155
- font_size_input = gr.Number(value=60, label="Textgröße (px)")
156
 
157
- audio_input = gr.File(
158
- label="Audio hinzufügen (optional)",
159
- file_types=allowed_audios
160
- )
161
-
162
- out_video = gr.Video(interactive=False, label="Generiertes Video")
163
- status = gr.Textbox(interactive=False, label="Status")
 
 
 
 
 
 
 
164
 
165
- btn = gr.Button("Video erstellen")
 
166
  btn.click(
167
  fn=generate_slideshow_with_audio,
168
- inputs=[img_input, text_input, duration_word_input, duration_image_input, ypos_input, fade_input, font_size_input, audio_input],
 
 
 
 
 
 
 
 
 
169
  outputs=[out_video, status]
170
  )
171
 
172
- demo.launch()
 
4
  import uuid
5
  import subprocess
6
  import shutil
7
+ import os
8
 
9
  # Erlaubte Dateiformate
10
  allowed_medias = [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff"]
11
  allowed_audios = [".mp3", ".wav", ".m4a", ".ogg"]
12
 
13
+ def get_font_path():
14
+ """Versucht, eine Standard-Schriftart im Linux-System zu finden."""
15
+ possible_fonts = [
16
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
17
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
18
+ "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf"
19
+ ]
20
+ for font in possible_fonts:
21
+ if os.path.exists(font):
22
+ return font
23
+ return None # Fallback: FFmpeg soll selbst suchen (klappt manchmal nicht)
24
+
25
  def save_temp_audio(audio_file):
 
 
 
26
  if isinstance(audio_file, str):
27
  ext = Path(audio_file).suffix
28
  if ext.lower() not in allowed_audios:
29
+ ext = ".mp3"
30
  temp_audio = Path(tempfile.mkdtemp()) / f"input{ext}"
31
  with open(temp_audio, "wb") as f:
32
  f.write(audio_file.encode())
 
40
  with open(temp_audio, "wb") as f:
41
  shutil.copyfileobj(audio_file, f)
42
  return temp_audio
43
+ return None
44
+
45
+ def generate_slideshow_with_audio(images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file):
46
+ # Debug Print, um zu sehen, ob Werte korrekt ankommen
47
+ print(f"DEBUG: Font Size: {font_size}, Y-Pos: {y_pos}, Fade: {fade_duration}")
48
 
 
49
  if not images:
50
  return None, "❌ Keine Bilder ausgewählt"
51
 
52
  temp_dir = tempfile.mkdtemp()
53
+ clips_with_text = []
54
 
55
  # Text in Wörter aufteilen
56
+ words = input_text.split() if input_text else []
57
+ word_index = 0
58
 
59
+ # Audio verarbeiten
60
  temp_audio_file = None
61
  if audio_file:
62
  temp_audio_file = save_temp_audio(audio_file)
63
 
64
+ # Schriftart finden
65
+ font_path = get_font_path()
66
+ font_option = f":fontfile='{font_path}'" if font_path else ""
67
 
68
  for i, img_path in enumerate(images):
69
+ img_path = Path(img_path.name)
70
  clip_path_with_text = Path(temp_dir) / f"clip_with_text_{i}.mp4"
71
 
72
+ # Aktuelles Wort holen
 
 
 
 
73
  if word_index < len(words):
74
  text = words[word_index]
75
  word_index += 1
76
  else:
77
+ text = ""
78
 
79
+ # WICHTIG: Text in temporäre Datei schreiben, um Escaping-Probleme zu vermeiden
80
+ text_file_path = Path(temp_dir) / f"text_{i}.txt"
81
+ with open(text_file_path, "w", encoding="utf-8") as f:
82
+ f.write(text)
83
+
84
+ # Drawtext Filter mit textfile statt text='...'
85
+ # box=1 macht einen leichten Hintergrund hinter den Text für Lesbarkeit
86
  vf_filters = (
87
  "scale=w=1280:h=720:force_original_aspect_ratio=decrease,"
88
  "pad=1280:720:(ow-iw)/2:(oh-ih)/2:color=black,"
89
+ "fps=25,format=yuv420p,"
90
+ f"drawtext=textfile='{text_file_path}'{font_option}:fontcolor=white:fontsize={font_size}:borderw=2:bordercolor=black:"
91
  f"x=(w-text_w)/2:y=(h-text_h)*{y_pos}:"
92
  f"alpha='if(lt(t,{fade_duration}), t/{fade_duration}, if(lt(t,{duration_per_image}-{fade_duration}), 1, ({duration_per_image}-t)/{fade_duration}))'"
93
  )
94
 
 
95
  cmd = [
96
+ "ffmpeg", "-y", "-loop", "1", "-i", str(img_path),
 
 
 
97
  "-t", str(duration_per_image),
98
  "-vf", vf_filters,
99
  str(clip_path_with_text)
100
  ]
101
+
102
  try:
103
  subprocess.run(cmd, check=True, capture_output=True, text=True)
104
+ clips_with_text.append(clip_path_with_text)
105
  except subprocess.CalledProcessError as e:
106
+ return None, f"❌ FFmpeg Fehler bei Bild {i+1}:\n{e.stderr}"
107
 
108
+ # Zusammenfügen
109
+ filelist_path = Path(temp_dir) / "filelist.txt"
110
+ with open(filelist_path, "w") as f:
 
 
111
  for clip in clips_with_text:
112
  f.write(f"file '{clip}'\n")
113
 
114
+ output_video = Path(temp_dir) / f"slideshow_{uuid.uuid4().hex}.mp4"
115
+
116
+ cmd_concat = [
117
+ "ffmpeg", "-y", "-f", "concat", "-safe", "0",
118
+ "-i", str(filelist_path),
119
+ "-c:v", "libx264", "-pix_fmt", "yuv420p",
120
+ str(output_video)
 
 
 
121
  ]
122
+
123
+ subprocess.run(cmd_concat, check=True)
 
 
124
 
125
+ # Audio hinzufügen falls vorhanden
126
  if temp_audio_file:
127
+ final_output = Path(temp_dir) / f"final_{uuid.uuid4().hex}.mp4"
128
  cmd_audio = [
129
+ "ffmpeg", "-y", "-i", str(output_video), "-i", str(temp_audio_file),
130
+ "-c:v", "copy", "-c:a", "aac", "-shortest",
 
 
 
 
 
131
  str(final_output)
132
  ]
133
+ subprocess.run(cmd_audio, check=True)
134
+ return str(final_output), "✅ Video mit Audio erstellt!"
 
 
 
135
 
136
+ return str(output_video), "✅ Video erstellt (ohne Audio)"
137
 
138
  # Gradio UI
139
  with gr.Blocks() as demo:
140
+ gr.Markdown("# Slideshow Generator")
141
 
142
+ with gr.Row():
143
+ img_input = gr.Files(label="Bilder", file_types=allowed_medias)
144
+ text_input = gr.Textbox(label="Text", lines=5, placeholder="Wörter werden auf Bilder verteilt")
 
 
 
 
 
 
 
 
145
 
146
+ with gr.Row():
147
+ duration_image_input = gr.Number(value=3, label="Dauer pro Bild (s)")
148
+ fade_input = gr.Number(value=0.5, label="Fade Dauer (s)")
149
+ font_size_input = gr.Number(value=80, label="Schriftgröße (px)")
150
+ ypos_input = gr.Slider(0.0, 1.0, value=0.5, label="Y-Position (0=Oben, 1=Unten)")
151
+
152
+ # Dummy Input für duration_per_word (wird im Script aktuell nicht genutzt, aber die Funk erwartet ihn)
153
+ duration_word_input = gr.Number(value=0.5, visible=False)
154
+
155
+ audio_input = gr.File(label="Audio (optional)", file_types=allowed_audios)
156
+ btn = gr.Button("Erstellen", variant="primary")
157
+
158
+ out_video = gr.Video(label="Ergebnis")
159
+ status = gr.Textbox(label="Status")
160
 
161
+ # KORREKTE REIHENFOLGE DER INPUTS:
162
+ # (images, input_text, duration_per_word, duration_per_image, fade_duration, font_size, y_pos, audio_file)
163
  btn.click(
164
  fn=generate_slideshow_with_audio,
165
+ inputs=[
166
+ img_input,
167
+ text_input,
168
+ duration_word_input,
169
+ duration_image_input,
170
+ fade_input, # War vorher vertauscht
171
+ font_size_input, # War vorher vertauscht
172
+ ypos_input, # War vorher vertauscht
173
+ audio_input
174
+ ],
175
  outputs=[out_video, status]
176
  )
177
 
178
+ demo.launch()