Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import moviepy
|
|
| 8 |
from transformers import pipeline
|
| 9 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
| 10 |
from moviepy.editor import (
|
|
|
|
| 11 |
VideoFileClip,
|
| 12 |
TextClip,
|
| 13 |
CompositeVideoClip,
|
|
@@ -16,6 +17,7 @@ from moviepy.editor import (
|
|
| 16 |
concatenate_videoclips,
|
| 17 |
concatenate_audioclips
|
| 18 |
)
|
|
|
|
| 19 |
from moviepy.audio.AudioClip import AudioArrayClip
|
| 20 |
import subprocess
|
| 21 |
import speech_recognition as sr
|
|
@@ -306,47 +308,83 @@ def update_translations(file, edited_table, mode):
|
|
| 306 |
except Exception as e:
|
| 307 |
raise ValueError(f"Error updating translations: {e}")
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
|
| 310 |
logger.debug(f"Processing entry {i}: {entry}")
|
| 311 |
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
stroke_width=2,
|
| 319 |
-
fontsize=int(video_height // 20),
|
| 320 |
-
method='label',
|
| 321 |
-
).with_start(entry["start"]).with_duration(entry["end"] - entry["start"]).with_position(('bottom')).with_opacity(0.8)
|
| 322 |
|
| 323 |
audio_segment = None
|
| 324 |
if add_voiceover:
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
audio_clip = AudioFileClip(segment_audio_path)
|
| 332 |
-
# Get and log all methods in AudioFileClip
|
| 333 |
-
logger.info("Methods in AudioFileClip:")
|
| 334 |
-
for method in dir(audio_clip):
|
| 335 |
-
logger.info(method)
|
| 336 |
-
|
| 337 |
-
# Log duration of the audio clip and the desired duration for debugging.
|
| 338 |
-
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
| 339 |
|
| 340 |
-
|
| 341 |
-
# Pad with silence if audio is too short
|
| 342 |
-
silence_duration = desired_duration - audio_clip.duration
|
| 343 |
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
return i, txt_clip, audio_segment
|
| 352 |
|
|
|
|
| 8 |
from transformers import pipeline
|
| 9 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
| 10 |
from moviepy.editor import (
|
| 11 |
+
ImageClip,
|
| 12 |
VideoFileClip,
|
| 13 |
TextClip,
|
| 14 |
CompositeVideoClip,
|
|
|
|
| 17 |
concatenate_videoclips,
|
| 18 |
concatenate_audioclips
|
| 19 |
)
|
| 20 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 21 |
from moviepy.audio.AudioClip import AudioArrayClip
|
| 22 |
import subprocess
|
| 23 |
import speech_recognition as sr
|
|
|
|
| 308 |
except Exception as e:
|
| 309 |
raise ValueError(f"Error updating translations: {e}")
|
| 310 |
|
| 311 |
+
def create_subtitle_clip_pil(entry, video_width, video_height, font_path="./NotoSansSC-Regular.ttf"):
|
| 312 |
+
"""
|
| 313 |
+
Creates a PIL-based ImageClip for subtitle text (no ImageMagick needed).
|
| 314 |
+
"""
|
| 315 |
+
subtitle_font_size = int(video_height // 20)
|
| 316 |
+
subtitle_width = int(video_width * 0.8)
|
| 317 |
+
text = entry["translated"]
|
| 318 |
+
|
| 319 |
+
try:
|
| 320 |
+
font = ImageFont.truetype(font_path, subtitle_font_size)
|
| 321 |
+
except Exception as e:
|
| 322 |
+
print(f"⚠️ Could not load font from {font_path}, using default font: {e}")
|
| 323 |
+
font = ImageFont.load_default()
|
| 324 |
+
|
| 325 |
+
# Estimate text height using multiline
|
| 326 |
+
dummy_img = Image.new("RGBA", (subtitle_width, 1), (0, 0, 0, 0))
|
| 327 |
+
draw = ImageDraw.Draw(dummy_img)
|
| 328 |
+
lines = []
|
| 329 |
+
line = ""
|
| 330 |
+
for word in text.split():
|
| 331 |
+
test_line = f"{line} {word}".strip()
|
| 332 |
+
w, _ = draw.textsize(test_line, font=font)
|
| 333 |
+
if w <= subtitle_width - 10:
|
| 334 |
+
line = test_line
|
| 335 |
+
else:
|
| 336 |
+
lines.append(line)
|
| 337 |
+
line = word
|
| 338 |
+
lines.append(line)
|
| 339 |
+
|
| 340 |
+
line_height = subtitle_font_size + 4
|
| 341 |
+
total_height = len(lines) * line_height + 10
|
| 342 |
+
img = Image.new("RGBA", (subtitle_width, total_height), (0, 0, 0, 0))
|
| 343 |
+
draw = ImageDraw.Draw(img)
|
| 344 |
+
|
| 345 |
+
for idx, l in enumerate(lines):
|
| 346 |
+
draw.text((5, 5 + idx * line_height), l, font=font, fill=(255, 255, 0, 255))
|
| 347 |
+
|
| 348 |
+
np_img = np.array(img)
|
| 349 |
+
|
| 350 |
+
txt_clip = ImageClip(np_img, ismask=False).set_position(("center", "bottom")) \
|
| 351 |
+
.set_start(entry["start"]).set_duration(entry["end"] - entry["start"]).set_opacity(0.8)
|
| 352 |
+
|
| 353 |
+
return txt_clip
|
| 354 |
+
|
| 355 |
def process_entry(entry, i, video_width, video_height, add_voiceover, target_language, speaker_sample_paths=None):
|
| 356 |
logger.debug(f"Processing entry {i}: {entry}")
|
| 357 |
|
| 358 |
+
try:
|
| 359 |
+
# Subtitle clip via PIL (robust, no ImageMagick needed)
|
| 360 |
+
txt_clip = create_subtitle_clip_pil(entry, video_width, video_height)
|
| 361 |
+
except Exception as e:
|
| 362 |
+
logger.error(f"❌ Failed to create subtitle clip for entry {i}: {e}")
|
| 363 |
+
txt_clip = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
audio_segment = None
|
| 366 |
if add_voiceover:
|
| 367 |
+
try:
|
| 368 |
+
segment_audio_path = f"segment_{i}_voiceover.wav"
|
| 369 |
+
desired_duration = entry["end"] - entry["start"]
|
| 370 |
+
speaker_id = entry.get("speaker", "default")
|
| 371 |
+
speaker_wav_path = speaker_sample_paths.get(speaker_id, None) if speaker_sample_paths else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
+
generate_voiceover_clone([entry], desired_duration, target_language, speaker_wav_path, segment_audio_path)
|
|
|
|
|
|
|
| 374 |
|
| 375 |
+
audio_clip = AudioFileClip(segment_audio_path)
|
| 376 |
+
logger.debug(f"Audio clip duration: {audio_clip.duration}, Desired duration: {desired_duration}")
|
| 377 |
+
|
| 378 |
+
if audio_clip.duration < desired_duration:
|
| 379 |
+
silence_duration = desired_duration - audio_clip.duration
|
| 380 |
+
audio_clip = concatenate_audioclips([audio_clip, silence(duration=silence_duration)])
|
| 381 |
+
logger.info(f"Padded audio with {silence_duration:.2f}s silence.")
|
| 382 |
+
|
| 383 |
+
audio_segment = audio_clip.set_start(entry["start"]).set_duration(desired_duration)
|
| 384 |
+
|
| 385 |
+
except Exception as e:
|
| 386 |
+
logger.error(f"❌ Failed to generate audio segment for entry {i}: {e}")
|
| 387 |
+
audio_segment = None
|
| 388 |
|
| 389 |
return i, txt_clip, audio_segment
|
| 390 |
|