ex510 commited on
Commit
6ad2031
·
verified ·
1 Parent(s): c6c14f2

Update processor.py

Browse files
Files changed (1) hide show
  1. processor.py +165 -23
processor.py CHANGED
@@ -17,12 +17,17 @@ Fixes applied:
17
  { clip_index, start, end, segments, full_text }
18
  - ✅ NEW: process_video returns a dict with keys:
19
  output_files, transcripts, viral_segments, duration
20
- - ✅ NEW: mix_audio method added to VideoProcessor
21
- blends background music with original video audio
 
 
22
  """
23
  import os
24
  import gc
25
  import json
 
 
 
26
  import traceback
27
  import moviepy.editor as mpe
28
  import json_repair
@@ -75,49 +80,152 @@ class VideoProcessor:
75
  self.stt = STT(model_size=model_size)
76
  Config.setup_dirs()
77
 
78
- # ── Audio Mixing ──────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  def mix_audio(self, video_clip, audio_path=None, bg_music_volume=0.1, original_volume=1.0):
81
  """
82
- Blends background music with the original video audio.
 
83
 
84
  video_clip : MoviePy VideoFileClip or CompositeVideoClip
85
  audio_path : path to music file (mp3/m4a/...) — None = skip
86
  bg_music_volume : background music level (0.0 → 1.0)
87
  original_volume : original video audio level (0.0 → 1.0)
88
-
89
- Returns: video_clip with mixed audio
90
  """
91
  if not audio_path or not os.path.exists(audio_path):
92
  return video_clip
93
 
94
  clip_duration = video_clip.duration
95
- logger.info(f"🎵 Mixing audio: {audio_path} | vol={bg_music_volume}")
96
 
97
  music = mpe.AudioFileClip(audio_path)
98
 
99
- # لو الموسيقى أقصر من الكليب → لوّب
100
  if music.duration < clip_duration:
101
  loops = int(clip_duration / music.duration) + 1
102
  music = mpe.concatenate_audioclips([music] * loops)
103
  logger.info(f"🔁 Music looped x{loops}")
104
 
105
- # قص الموسيقى بنفس طول الكليب
106
  music = music.subclip(0, clip_duration).volumex(bg_music_volume)
107
 
108
  original_audio = video_clip.audio
109
 
110
- # لو مفيش صوت أصلي → خلي الموسيقى بس
111
  if original_audio is None:
112
  logger.info("⚠️ No original audio — using music only")
113
  return video_clip.set_audio(music)
114
 
115
- # خلط الصوتين
116
  mixed = mpe.CompositeAudioClip([
117
  original_audio.volumex(original_volume),
118
  music,
119
  ])
120
- logger.info("✅ Audio mixed successfully")
121
  return video_clip.set_audio(mixed)
122
 
123
  # ── JSON helpers ──────────────────────────────────────────────────────────
@@ -298,6 +406,13 @@ class VideoProcessor:
298
  """
299
  Cuts, styles, captions, and exports each viral clip.
300
 
 
 
 
 
 
 
 
301
  ✅ Returns: (output_files, transcripts_per_clip)
302
  output_files : list of str — paths to rendered .mp4 files
303
  transcripts_per_clip : list of dicts, one per successfully rendered clip:
@@ -333,6 +448,10 @@ class VideoProcessor:
333
  if "." in style_str:
334
  style_str = style_str.split(".")[-1]
335
 
 
 
 
 
336
  # ── Main loop ─────────────────────────────────────────────────────────
337
  output_files = []
338
  transcripts_per_clip = []
@@ -421,15 +540,9 @@ class VideoProcessor:
421
  playground_path = kwargs.get("playground_path"),
422
  )
423
 
424
- # Mix background music ──────────────────────────────────────
425
- final_clip = self.mix_audio(
426
- final_clip,
427
- audio_path = kwargs.get("audio_path"),
428
- bg_music_volume = kwargs.get("bg_music_volume", 0.1),
429
- original_volume = 1.0,
430
- )
431
-
432
- # ── Export ────────────────────────────────────────────────────
433
  cpu_count = os.cpu_count() or 4
434
  logger.info(f"⚙️ Rendering with {cpu_count} thread(s) …")
435
 
@@ -441,6 +554,37 @@ class VideoProcessor:
441
  logger = None,
442
  )
443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  output_files.append(final_output)
445
  logger.info(f"✅ Saved: {final_output}")
446
 
@@ -484,7 +628,6 @@ class VideoProcessor:
484
  def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
485
  """
486
  End-to-end pipeline: STT → AI analysis → clip export.
487
-
488
  ✅ Returns a dict with:
489
  {
490
  "output_files" : list[str],
@@ -493,7 +636,6 @@ def process_video(video_path, style="cinematic_blur", model_size="base", **kwarg
493
  "full_transcript": str,
494
  "duration" : float,
495
  }
496
-
497
  Important kwargs:
498
  source_language : language of the original video → passed to Whisper.
499
  language : desired output language (translation + captions).
 
17
  { clip_index, start, end, segments, full_text }
18
  - ✅ NEW: process_video returns a dict with keys:
19
  output_files, transcripts, viral_segments, duration
20
+ - ✅ NEW: mix_audio method simple MoviePy blend (fallback / no-audio-path case)
21
+ - NEW: _apply_ducking_ffmpeg FFmpeg sidechaincompress ducking (production)
22
+ Called as a post-process step after write_videofile to avoid
23
+ double-encoding. Falls back to simple mix_audio on FFmpeg failure.
24
  """
25
  import os
26
  import gc
27
  import json
28
+ import shutil
29
+ import subprocess
30
+ import tempfile
31
  import traceback
32
  import moviepy.editor as mpe
33
  import json_repair
 
80
  self.stt = STT(model_size=model_size)
81
  Config.setup_dirs()
82
 
83
+ # ── Audio: FFmpeg Ducking (Production) ────────────────────────────────────
84
+
85
+ def _apply_ducking_ffmpeg(
86
+ self,
87
+ video_path: str,
88
+ audio_path: str,
89
+ bg_music_volume: float = 0.1,
90
+ ) -> bool:
91
+ """
92
+ ✅ Production-grade audio ducking via FFmpeg sidechaincompress.
93
+
94
+ Works as a POST-PROCESS step on an already-rendered .mp4 file,
95
+ so there is NO double-encoding of the video stream (codec=copy).
96
+
97
+ Ducking parameters (tuned for speech-over-music):
98
+ threshold : 0.02 → ducking kicks in when speech RMS > ~-34 dBFS
99
+ ratio : 4 → music reduced to 1/4 of its level under speech
100
+ attack : 200ms → smooth fade-down when speech starts
101
+ release : 1000ms→ smooth fade-up when speech ends
102
+
103
+ Returns True on success, False on any FFmpeg error (caller falls back).
104
+ """
105
+ if not audio_path or not os.path.exists(audio_path):
106
+ return False
107
+
108
+ tmp_output = tempfile.mktemp(suffix=".mp4")
109
+
110
+ try:
111
+ logger.info(f"🎚️ FFmpeg ducking: {os.path.basename(audio_path)} | vol={bg_music_volume}")
112
+
113
+ # ── Build filter_complex ─────────────────────────────────────────
114
+ # [0:a] = original speech (from rendered video)
115
+ # [1:a] = background music (from audio_path)
116
+ #
117
+ # Step 1 – split original audio: one copy for sidechain detection,
118
+ # one copy for the final mix.
119
+ # Step 2 – apply volume to music.
120
+ # Step 3 – sidechaincompress: music ducks when speech is loud.
121
+ # Step 4 – amix: blend original speech + ducked music.
122
+ filter_complex = (
123
+ "[0:a]asplit=2[speech_sc][speech_mix];"
124
+ f"[1:a]volume={bg_music_volume},"
125
+ f"afade=t=in:ss=0:d=1.5,"
126
+ f"afade=t=out:st={{fade_start}}:d=2.0[music_in];"
127
+ "[music_in][speech_sc]"
128
+ "sidechaincompress="
129
+ "threshold=0.02:ratio=4:attack=200:release=1000"
130
+ "[music_ducked];"
131
+ "[speech_mix][music_ducked]amix=inputs=2:duration=first[aout]"
132
+ )
133
+
134
+ # Calculate fade-out start from video duration
135
+ try:
136
+ probe = subprocess.run(
137
+ [
138
+ "ffprobe", "-v", "error",
139
+ "-show_entries", "format=duration",
140
+ "-of", "default=noprint_wrappers=1:nokey=1",
141
+ video_path,
142
+ ],
143
+ capture_output=True, text=True, check=True,
144
+ )
145
+ duration = float(probe.stdout.strip())
146
+ fade_start = max(0.0, duration - 2.0)
147
+ except Exception:
148
+ fade_start = 0.0 # fallback: no fade-out
149
+
150
+ filter_complex = filter_complex.format(fade_start=fade_start)
151
+
152
+ cmd = [
153
+ "ffmpeg", "-y",
154
+ "-i", video_path, # input 0: rendered video (speech)
155
+ "-i", audio_path, # input 1: background music
156
+ "-filter_complex", filter_complex,
157
+ "-map", "0:v", # video stream: copy as-is (no re-encode)
158
+ "-map", "[aout]", # mixed audio
159
+ "-c:v", "copy", # ✅ NO video re-encoding
160
+ "-c:a", "aac",
161
+ "-b:a", "192k",
162
+ tmp_output,
163
+ ]
164
+
165
+ result = subprocess.run(cmd, capture_output=True, text=True)
166
+
167
+ if result.returncode != 0:
168
+ logger.error(f"❌ FFmpeg ducking failed:\n{result.stderr[-1000:]}")
169
+ return False
170
+
171
+ # Replace original file with ducked version
172
+ shutil.move(tmp_output, video_path)
173
+ logger.info("✅ FFmpeg ducking applied successfully")
174
+ return True
175
+
176
+ except FileNotFoundError:
177
+ logger.error("❌ FFmpeg not found — install ffmpeg and add to PATH")
178
+ return False
179
+ except Exception as e:
180
+ logger.error(f"❌ FFmpeg ducking error: {e}")
181
+ logger.error(traceback.format_exc())
182
+ return False
183
+ finally:
184
+ if os.path.exists(tmp_output):
185
+ try:
186
+ os.unlink(tmp_output)
187
+ except Exception:
188
+ pass
189
+
190
+ # ── Audio: Simple MoviePy Mix (Fallback) ──────────────────────────────────
191
 
192
  def mix_audio(self, video_clip, audio_path=None, bg_music_volume=0.1, original_volume=1.0):
193
  """
194
+ Simple MoviePy audio blend used as fallback when FFmpeg ducking fails,
195
+ or when no audio_path is provided.
196
 
197
  video_clip : MoviePy VideoFileClip or CompositeVideoClip
198
  audio_path : path to music file (mp3/m4a/...) — None = skip
199
  bg_music_volume : background music level (0.0 → 1.0)
200
  original_volume : original video audio level (0.0 → 1.0)
201
+ Returns: video_clip with mixed audio (or original clip unchanged)
 
202
  """
203
  if not audio_path or not os.path.exists(audio_path):
204
  return video_clip
205
 
206
  clip_duration = video_clip.duration
207
+ logger.info(f"🎵 Fallback mix: {audio_path} | vol={bg_music_volume}")
208
 
209
  music = mpe.AudioFileClip(audio_path)
210
 
 
211
  if music.duration < clip_duration:
212
  loops = int(clip_duration / music.duration) + 1
213
  music = mpe.concatenate_audioclips([music] * loops)
214
  logger.info(f"🔁 Music looped x{loops}")
215
 
 
216
  music = music.subclip(0, clip_duration).volumex(bg_music_volume)
217
 
218
  original_audio = video_clip.audio
219
 
 
220
  if original_audio is None:
221
  logger.info("⚠️ No original audio — using music only")
222
  return video_clip.set_audio(music)
223
 
 
224
  mixed = mpe.CompositeAudioClip([
225
  original_audio.volumex(original_volume),
226
  music,
227
  ])
228
+ logger.info("✅ Fallback audio mixed successfully")
229
  return video_clip.set_audio(mixed)
230
 
231
  # ── JSON helpers ──────────────────────────────────────────────────────────
 
406
  """
407
  Cuts, styles, captions, and exports each viral clip.
408
 
409
+ Audio strategy:
410
+ 1. MoviePy renders the styled clip with original audio only.
411
+ 2. _apply_ducking_ffmpeg() applies sidechaincompress as a post-process
412
+ on the written .mp4 (video stream copied, no re-encode).
413
+ 3. If FFmpeg is unavailable or fails, mix_audio() is called as fallback
414
+ and the file is re-written with the simple blend.
415
+
416
  ✅ Returns: (output_files, transcripts_per_clip)
417
  output_files : list of str — paths to rendered .mp4 files
418
  transcripts_per_clip : list of dicts, one per successfully rendered clip:
 
448
  if "." in style_str:
449
  style_str = style_str.split(".")[-1]
450
 
451
+ # ── kwargs ────────────────────────────────────────────────────────────
452
+ audio_path = kwargs.get("audio_path")
453
+ bg_music_volume = float(kwargs.get("bg_music_volume", 0.1))
454
+
455
  # ── Main loop ─────────────────────────────────────────────────────────
456
  output_files = []
457
  transcripts_per_clip = []
 
540
  playground_path = kwargs.get("playground_path"),
541
  )
542
 
543
+ # ── Step 1: Write clip with original audio only ───────────────
544
+ # Background music is NOT mixed here — FFmpeg handles it below
545
+ # as a post-process to avoid double video encoding.
 
 
 
 
 
 
546
  cpu_count = os.cpu_count() or 4
547
  logger.info(f"⚙️ Rendering with {cpu_count} thread(s) …")
548
 
 
554
  logger = None,
555
  )
556
 
557
+ # ── Step 2: Apply FFmpeg ducking as post-process ──────────────
558
+ if audio_path:
559
+ ducking_ok = self._apply_ducking_ffmpeg(
560
+ final_output,
561
+ audio_path,
562
+ bg_music_volume,
563
+ )
564
+
565
+ if not ducking_ok:
566
+ # ── Fallback: MoviePy simple blend ───────────────────
567
+ logger.warning("⚠️ Falling back to MoviePy simple audio blend")
568
+ fallback_clip = mpe.VideoFileClip(final_output)
569
+ fallback_mixed = self.mix_audio(
570
+ fallback_clip,
571
+ audio_path = audio_path,
572
+ bg_music_volume = bg_music_volume,
573
+ original_volume = 1.0,
574
+ )
575
+ fallback_mixed.write_videofile(
576
+ final_output,
577
+ codec = "libx264",
578
+ audio_codec = "aac",
579
+ threads = cpu_count,
580
+ logger = None,
581
+ )
582
+ try:
583
+ fallback_mixed.close()
584
+ fallback_clip.close()
585
+ except Exception:
586
+ pass
587
+
588
  output_files.append(final_output)
589
  logger.info(f"✅ Saved: {final_output}")
590
 
 
628
  def process_video(video_path, style="cinematic_blur", model_size="base", **kwargs):
629
  """
630
  End-to-end pipeline: STT → AI analysis → clip export.
 
631
  ✅ Returns a dict with:
632
  {
633
  "output_files" : list[str],
 
636
  "full_transcript": str,
637
  "duration" : float,
638
  }
 
639
  Important kwargs:
640
  source_language : language of the original video → passed to Whisper.
641
  language : desired output language (translation + captions).