garyuzair commited on
Commit
5e03617
·
verified ·
1 Parent(s): 3c52655

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -126
app.py CHANGED
@@ -8,9 +8,8 @@ import soundfile as sf
8
  import os
9
  import tempfile
10
  import subprocess
11
- from pydub import AudioSegment
12
  import moviepy.editor as mpy
13
- from functools import lru_cache
14
 
15
  # Optional scene detection
16
  scene_detect_available = True
@@ -23,34 +22,7 @@ except ImportError:
23
  # Set page configuration
24
  st.set_page_config(page_title="Video Sound Effect Generator", layout="centered")
25
 
26
- # CSS for compact video preview
27
- st.markdown("""
28
- <style>
29
- .video-container {
30
- max-width: 640px;
31
- margin: auto;
32
- overflow: hidden;
33
- border-radius: 8px;
34
- box-shadow: 0 4px 8px rgba(0,0,0,0.1);
35
- }
36
- video {
37
- width: 100%;
38
- height: auto;
39
- display: block;
40
- }
41
- .stButton>button {
42
- background-color: #007bff;
43
- color: white;
44
- border-radius: 5px;
45
- padding: 10px 20px;
46
- }
47
- .stButton>button:hover {
48
- background-color: #0056b3;
49
- }
50
- </style>
51
- """, unsafe_allow_html=True)
52
-
53
- # Load BLIP model
54
  @st.cache_resource
55
  def load_blip_model():
56
  processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -61,86 +33,88 @@ def load_blip_model():
61
 
62
  # Load MusicGen model
63
  @st.cache_resource
64
- def load_musicgen_model(model_name="facebook/musicgen-small"):
65
  processor = AutoProcessor.from_pretrained(model_name)
66
  model = MusicgenForConditionalGeneration.from_pretrained(model_name)
67
  if torch.cuda.is_available():
68
- model = model.to("cuda")
69
  return processor, model
70
 
71
- # Optimized frame extraction
72
- def extract_frames(video_path, num_frames, method="uniform"):
73
  video = imageio.get_reader(video_path, "ffmpeg")
74
  meta = video.get_meta_data()
75
  fps = meta['fps']
76
  total_frames = int(meta['duration'] * fps)
77
 
 
 
 
 
 
 
 
78
  if method == "scene" and scene_detect_available:
79
  try:
80
  video_manager = VideoManager([video_path])
81
  scene_manager = SceneManager()
82
- scene_manager.add_detector(ContentDetector(threshold=25))
83
- video_manager.set_downscale_factor(4) # Aggressive downscaling
84
  video_manager.start()
85
  scene_manager.detect_scenes(frame_source=video_manager)
86
  scene_list = scene_manager.get_scene_list()
87
- frame_indices = [scene[0].get_frames() for scene in scene_list[:num_frames]]
 
 
 
 
 
88
  video_manager.release()
89
- if len(frame_indices) < num_frames:
90
- step = total_frames // (num_frames - len(frame_indices) + 1)
91
- frame_indices.extend(range(step, total_frames, step)[:num_frames - len(frame_indices)])
92
- except Exception:
93
- frame_indices = list(range(0, total_frames, total_frames // num_frames))[:num_frames]
94
- else:
95
- frame_indices = list(range(0, total_frames, total_frames // num_frames))[:num_frames]
96
-
97
- frames = []
98
- for idx in frame_indices[:num_frames]:
99
- try:
100
- frame = Image.fromarray(video.get_data(idx)).resize((320, 180), Image.BILINEAR) # Downscale frames
101
- frames.append(frame)
102
- except:
103
- continue
104
- video.close()
105
- return frames
106
 
107
- # Cached helper function to generate caption for a single frame
108
- @lru_cache(maxsize=100)
109
- def get_caption_for_frame(frame_bytes, mode, size, processor, model):
110
- frame = Image.frombytes(mode, size, frame_bytes)
111
- inputs = processor(images=frame, return_tensors="pt")
112
- if torch.cuda.is_available():
113
- inputs = {k: v.to("cuda") for k, v in inputs.items()}
114
- out = model.generate(**inputs, max_length=20, num_beams=3)
115
- return processor.decode(out[0], skip_special_tokens=True)
116
 
117
- # Generate captions using cached helper
118
  def generate_captions(frames, processor, model):
119
  descriptions = []
120
  for frame in frames:
121
- frame_bytes = frame.tobytes()
122
- mode = frame.mode
123
- size = frame.size
124
- description = get_caption_for_frame(frame_bytes, mode, size, processor, model)
 
125
  descriptions.append(description)
126
  return descriptions
127
 
128
  # Enhance prompts
129
  def enhance_prompt(descriptions, mood="default"):
130
  if not descriptions:
131
- return f"{mood} cinematic ambient sound with dynamic effects"
132
  combined = ". ".join(descriptions).lower()
133
  base_prompts = {
134
- "walk|run": "crisp footsteps on diverse surfaces, vivid movement sounds",
135
- "car|drive": "powerful engine roar, tire screeches, immersive road noise",
136
- "talk|person": "rich voices, layered crowd chatter, spatial depth",
137
- "wind|tree|forest": "whistling wind, rustling foliage, natural resonance",
138
- "crash|fall": "sharp crash impact, debris scatter, intense bursts"
139
  }
140
  for pattern, effect in base_prompts.items():
141
  if any(word in combined for word in pattern.split("|")):
142
- return f"{mood} {combined}, {effect}, high-fidelity cinematic quality"
143
- return f"{mood} {combined}, vibrant ambient soundscape with compelling effects, high-fidelity cinematic quality"
144
 
145
  # Generate audio
146
  def generate_audio(prompt, processor, model, duration, sample_rate=44100):
@@ -149,15 +123,14 @@ def generate_audio(prompt, processor, model, duration, sample_rate=44100):
149
  inputs = {k: v.to("cuda") for k, v in inputs.items()}
150
  audio_values = model.generate(
151
  **inputs,
152
- max_new_tokens=int(256 * (duration / 6)), # Optimized token scaling
153
  do_sample=True,
154
- guidance_scale=8.0,
155
- top_k=150,
156
- top_p=0.85,
157
- num_beams=2 # Beam search for quality
158
  )
159
  audio_array = audio_values[0].cpu().numpy()
160
- audio_array = audio_array / np.max(np.abs(audio_array)) * 0.98
161
  audio_array = np.clip(audio_array, -1.0, 1.0)
162
  return audio_array
163
 
@@ -165,18 +138,18 @@ def generate_audio(prompt, processor, model, duration, sample_rate=44100):
165
  def apply_audio_effects(audio_path, settings):
166
  sound = AudioSegment.from_wav(audio_path)
167
  if settings['reverb_ms'] > 0:
168
- sound = sound + AudioSegment.silent(duration=settings['reverb_ms']) - 8
169
  if settings['echo_ms'] > 0:
170
- echo = sound - 12
171
  sound = sound.overlay(echo, position=settings['echo_ms'])
172
  if settings['highpass'] > 0:
173
  sound = sound.high_pass_filter(settings['highpass'])
174
  if settings['lowpass'] < 20000:
175
  sound = sound.low_pass_filter(settings['lowpass'])
176
  if settings['compress']:
177
- sound = sound - 6 # Simulate compression
178
  sound = sound.pan(settings['stereo_pan'])
179
- sound = sound + 2 # Slight volume boost
180
  processed_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
181
  sound.export(processed_path, format="wav")
182
  return processed_path
@@ -196,7 +169,6 @@ def sync_audio_video(video_path, audio_path, output_path, mix_original=False, or
196
  audio_path = mixed_path
197
  else:
198
  st.warning("No original audio found. Using generated audio only.")
199
- video_clip.close()
200
 
201
  cmd = [
202
  'ffmpeg',
@@ -215,79 +187,76 @@ def sync_audio_video(video_path, audio_path, output_path, mix_original=False, or
215
  # Main application
216
  def main():
217
  st.title("🎬 Video Sound Effect Generator")
218
- st.markdown("Create high-quality, cinematic sound effects for your videos with AI.")
219
 
220
  # User Guide
221
- with st.expander("📖 How to Use"):
222
  st.markdown("""
223
- 1. **Upload Video**: Select an MP4, MOV, or AVI file (keep under 1 minute for best performance).
224
- 2. **Choose Mode**:
225
- - **Automatic**: AI analyzes video frames to create sound prompts.
226
- - **Manual**: Write your own sound description.
227
- 3. **Adjust Settings**: Use the sidebar to tweak frame analysis, audio effects, and model size.
228
- 4. **Generate**: Click "Generate" to process and download the enhanced video.
 
 
229
 
230
  **Tips**:
231
- - 5+ frames ensure accurate sound effects.
232
- - Scene extraction (if available) enhances relevance.
233
- - Experiment with audio effects for a polished result.
234
  """)
235
 
236
  # Sidebar Settings
237
  with st.sidebar:
238
  st.header("⚙️ Settings")
239
- prompt_mode = st.selectbox("Mode", ["Automatic", "Manual"], help="Automatic uses AI to analyze video; Manual lets you describe the sound.")
240
- model_size = st.selectbox("Model Size", ["small", "medium"], index=0, help="Small is faster; Medium is higher quality.")
241
- mix_original = st.checkbox("Mix Original Audio", help="Blend with video's audio if available.")
242
  original_volume, generated_volume = 0.5, 0.5
243
  if mix_original:
244
- original_volume = st.slider("Original Volume", 0.0, 1.0, 0.5)
245
- generated_volume = st.slider("Generated Volume", 0.0, 1.0, 0.5)
246
 
247
  st.subheader("Frame Analysis")
248
- num_frames = st.slider("Frames to Analyze", 5, 8, 5, help="More frames improve sound accuracy but slow processing.")
249
- frame_method = st.selectbox("Extraction Method", ["Uniform", "Scene"] if scene_detect_available else ["Uniform"], help="Scene is more accurate but slower.")
250
 
251
  st.subheader("Audio Effects")
252
  effects_settings = {
253
- 'reverb_ms': st.slider("Reverb (ms)", 0, 300, 50, help="Adds depth to sound."),
254
- 'echo_ms': st.slider("Echo (ms)", 0, 500, 100, help="Creates repeating sound effects."),
255
- 'highpass': st.slider("High-pass Filter (Hz)", 0, 2000, 50, help="Removes low frequencies."),
256
- 'lowpass': st.slider("Low-pass Filter (Hz)", 5000, 20000, 18000, help="Removes high frequencies."),
257
- 'compress': st.checkbox("Compression", value=True, help="Balances audio dynamics."),
258
- 'stereo_pan': st.slider("Stereo Pan", -1.0, 1.0, 0.0, help="-1 is left, 1 is right.")
259
  }
260
 
261
  # Main Content
262
- uploaded_file = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"], help="Max 1 minute recommended.")
263
  if uploaded_file:
264
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
265
  tmp.write(uploaded_file.read())
266
  video_path = tmp.name
267
- st.markdown('<div class="video-container">', unsafe_allow_html=True)
268
  st.video(video_path)
269
- st.markdown('</div>', unsafe_allow_html=True)
270
 
271
  video_clip = mpy.VideoFileClip(video_path)
272
  duration = video_clip.duration
273
  video_clip.close()
274
- if duration > 60:
275
- st.warning("Videos over 1 minute may slow processing. Consider trimming.")
276
 
277
  if prompt_mode == "Automatic":
278
  with st.spinner("Analyzing frames..."):
279
  blip_processor, blip_model = load_blip_model()
280
  frames = extract_frames(video_path, num_frames, frame_method)
281
  if not frames:
282
- st.error("Failed to extract frames. Try another video or method.")
283
  return
284
- # Generate captions using the updated function
285
  descriptions = generate_captions(frames, blip_processor, blip_model)
286
- mood = st.selectbox("Sound Mood", ["default", "dramatic", "ambient", "action"], help="Sets the tone of sound effects.")
287
  text_prompt = enhance_prompt(descriptions, mood)
288
- text_prompt = st.text_area("Edit Prompt", text_prompt, height=80)
289
  else:
290
- text_prompt = st.text_area("Sound Description", "E.g., 'intense action with explosions'", height=80)
291
 
292
  if st.button("Generate Sound Effects", key="generate"):
293
  progress = st.progress(0)
@@ -302,22 +271,20 @@ def main():
302
  sf.write(temp_audio, audio_array, 44100)
303
  progress.progress(50)
304
 
305
- status.text("Applying effects...")
306
  processed_audio = apply_audio_effects(temp_audio, effects_settings)
307
  progress.progress(75)
308
 
309
- status.text("Syncing video...")
310
  output_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
311
  sync_audio_video(video_path, processed_audio, output_video, mix_original, original_volume, generated_volume)
312
  progress.progress(100)
313
  status.text("Done!")
314
 
315
  st.success("Sound effects applied!")
316
- st.markdown('<div class="video-container">', unsafe_allow_html=True)
317
  st.video(output_video)
318
- st.markdown('</div>', unsafe_allow_html=True)
319
  with open(output_video, "rb") as f:
320
- st.download_button("Download Video", f, "enhanced_video.mp4", "video/mp4")
321
 
322
  # Cleanup
323
  for file in [video_path, temp_audio, processed_audio, output_video]:
 
8
  import os
9
  import tempfile
10
  import subprocess
11
+ from pydub import AudioSegment, effects
12
  import moviepy.editor as mpy
 
13
 
14
  # Optional scene detection
15
  scene_detect_available = True
 
22
  # Set page configuration
23
  st.set_page_config(page_title="Video Sound Effect Generator", layout="centered")
24
 
25
+ # Load BLIP model for captioning
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  @st.cache_resource
27
  def load_blip_model():
28
  processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 
33
 
34
  # Load MusicGen model
35
  @st.cache_resource
36
+ def load_musicgen_model(model_name="facebook/musicgen-medium"):
37
  processor = AutoProcessor.from_pretrained(model_name)
38
  model = MusicgenForConditionalGeneration.from_pretrained(model_name)
39
  if torch.cuda.is_available():
40
+ model = model.half().to("cuda")
41
  return processor, model
42
 
43
+ # Extract frames efficiently
44
+ def extract_frames(video_path, num_frames, method="uniform", segment_start=0, segment_end=None):
45
  video = imageio.get_reader(video_path, "ffmpeg")
46
  meta = video.get_meta_data()
47
  fps = meta['fps']
48
  total_frames = int(meta['duration'] * fps)
49
 
50
+ if segment_end is None:
51
+ segment_end = total_frames / fps
52
+
53
+ start_frame = int(segment_start * fps)
54
+ end_frame = int(segment_end * fps)
55
+ total_segment_frames = end_frame - start_frame
56
+
57
  if method == "scene" and scene_detect_available:
58
  try:
59
  video_manager = VideoManager([video_path])
60
  scene_manager = SceneManager()
61
+ scene_manager.add_detector(ContentDetector(threshold=30))
62
+ video_manager.set_downscale_factor(2) # Optimize for speed
63
  video_manager.start()
64
  scene_manager.detect_scenes(frame_source=video_manager)
65
  scene_list = scene_manager.get_scene_list()
66
+ segment_scenes = [scene for scene in scene_list if scene[0].get_seconds() >= segment_start and scene[0].get_seconds() < segment_end]
67
+ frames = []
68
+ for scene in segment_scenes[:num_frames]:
69
+ frame = video_manager.get_frame(scene[0].get_frames())
70
+ if frame is not None:
71
+ frames.append(Image.fromarray(frame))
72
  video_manager.release()
73
+ if len(frames) < num_frames and total_segment_frames > 0:
74
+ remaining = num_frames - len(frames)
75
+ step = total_segment_frames // (remaining + 1)
76
+ for i in range(1, remaining + 1):
77
+ frame_idx = start_frame + i * step
78
+ if frame_idx < end_frame:
79
+ frames.append(Image.fromarray(video.get_data(frame_idx)))
80
+ return frames[:num_frames]
81
+ except Exception as e:
82
+ st.warning(f"Scene detection failed: {e}. Using uniform extraction.")
 
 
 
 
 
 
 
83
 
84
+ # Uniform extraction
85
+ step = max(1, total_segment_frames // num_frames)
86
+ frame_indices = [start_frame + i * step for i in range(num_frames) if start_frame + i * step < end_frame]
87
+ frames = [Image.fromarray(video.get_data(idx)) for idx in frame_indices]
88
+ return frames[:num_frames]
 
 
 
 
89
 
90
+ # Generate captions
91
  def generate_captions(frames, processor, model):
92
  descriptions = []
93
  for frame in frames:
94
+ inputs = processor(images=frame, return_tensors="pt")
95
+ if torch.cuda.is_available():
96
+ inputs = {k: v.to("cuda") for k, v in inputs.items()}
97
+ out = model.generate(**inputs, max_length=30)
98
+ description = processor.decode(out[0], skip_special_tokens=True)
99
  descriptions.append(description)
100
  return descriptions
101
 
102
  # Enhance prompts
103
  def enhance_prompt(descriptions, mood="default"):
104
  if not descriptions:
105
+ return f"{mood} ambient sound with subtle effects"
106
  combined = ". ".join(descriptions).lower()
107
  base_prompts = {
108
+ "walk|run": "crisp footsteps on varied surfaces, immersive movement sounds",
109
+ "car|drive": "roaring engine, tire screeches, dynamic road noise",
110
+ "talk|person": "lively voices, crowd murmur, spatial chatter",
111
+ "wind|tree|forest": "rustling leaves, gentle wind gusts, natural ambiance",
112
+ "crash|fall": "intense crash impact, debris scattering, sharp effects"
113
  }
114
  for pattern, effect in base_prompts.items():
115
  if any(word in combined for word in pattern.split("|")):
116
+ return f"{mood} {combined}, {effect}"
117
+ return f"{mood} {combined}, rich ambient soundscape with engaging effects"
118
 
119
  # Generate audio
120
  def generate_audio(prompt, processor, model, duration, sample_rate=44100):
 
123
  inputs = {k: v.to("cuda") for k, v in inputs.items()}
124
  audio_values = model.generate(
125
  **inputs,
126
+ max_new_tokens=int(512 * (duration / 8)), # Optimized for speed
127
  do_sample=True,
128
+ guidance_scale=7.0,
129
+ top_k=120,
130
+ top_p=0.9
 
131
  )
132
  audio_array = audio_values[0].cpu().numpy()
133
+ audio_array = audio_array / np.max(np.abs(audio_array)) * 0.95
134
  audio_array = np.clip(audio_array, -1.0, 1.0)
135
  return audio_array
136
 
 
138
  def apply_audio_effects(audio_path, settings):
139
  sound = AudioSegment.from_wav(audio_path)
140
  if settings['reverb_ms'] > 0:
141
+ sound = sound + AudioSegment.silent(duration=settings['reverb_ms']) - 10
142
  if settings['echo_ms'] > 0:
143
+ echo = sound - 15
144
  sound = sound.overlay(echo, position=settings['echo_ms'])
145
  if settings['highpass'] > 0:
146
  sound = sound.high_pass_filter(settings['highpass'])
147
  if settings['lowpass'] < 20000:
148
  sound = sound.low_pass_filter(settings['lowpass'])
149
  if settings['compress']:
150
+ sound = effects.compress_dynamic_range(sound)
151
  sound = sound.pan(settings['stereo_pan'])
152
+ sound = effects.normalize(sound)
153
  processed_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
154
  sound.export(processed_path, format="wav")
155
  return processed_path
 
169
  audio_path = mixed_path
170
  else:
171
  st.warning("No original audio found. Using generated audio only.")
 
172
 
173
  cmd = [
174
  'ffmpeg',
 
187
  # Main application
188
  def main():
189
  st.title("🎬 Video Sound Effect Generator")
190
+ st.markdown("Upload a video to create immersive, video-specific sound effects with AI.")
191
 
192
  # User Guide
193
+ with st.expander("📖 User Guide"):
194
  st.markdown("""
195
+ **How to Use:**
196
+ 1. **Upload a Video**: Choose an MP4, MOV, or AVI file.
197
+ 2. **Select Prompt Mode**:
198
+ - **Automatic**: Analyzes video frames to generate sound prompts.
199
+ - **Manual**: Enter your own sound description.
200
+ 3. **Configure Settings**: Adjust frame analysis, audio effects, and model size in the sidebar.
201
+ 4. **Generate**: Click "Generate Sound Effects" to process the video.
202
+ 5. **Download**: Save the enhanced video with sound effects.
203
 
204
  **Tips**:
205
+ - Use at least 5 frames for better sound relevance.
206
+ - Scene-based frame extraction (if available) improves accuracy.
207
+ - Adjust audio effects for a customized sound experience.
208
  """)
209
 
210
  # Sidebar Settings
211
  with st.sidebar:
212
  st.header("⚙️ Settings")
213
+ prompt_mode = st.selectbox("Prompt Mode", ["Automatic", "Manual"])
214
+ model_size = st.selectbox("Model Size", ["small", "medium", "large"], index=1)
215
+ mix_original = st.checkbox("Mix with Original Audio", value=False)
216
  original_volume, generated_volume = 0.5, 0.5
217
  if mix_original:
218
+ original_volume = st.slider("Original Audio Volume", 0.0, 1.0, 0.5)
219
+ generated_volume = st.slider("Generated Audio Volume", 0.0, 1.0, 0.5)
220
 
221
  st.subheader("Frame Analysis")
222
+ num_frames = st.slider("Frames to Analyze", 5, 10, 5, help="More frames improve sound relevance but increase processing time")
223
+ frame_method = st.selectbox("Frame Extraction Method", ["Uniform", "Scene"] if scene_detect_available else ["Uniform"])
224
 
225
  st.subheader("Audio Effects")
226
  effects_settings = {
227
+ 'reverb_ms': st.slider("Reverb (ms)", 0, 500, 100),
228
+ 'echo_ms': st.slider("Echo (ms)", 0, 1000, 200),
229
+ 'highpass': st.slider("High-pass Filter (Hz)", 0, 3000, 100),
230
+ 'lowpass': st.slider("Low-pass Filter (Hz)", 5000, 20000, 15000),
231
+ 'compress': st.checkbox("Dynamic Compression", value=True),
232
+ 'stereo_pan': st.slider("Stereo Pan (-1 left, 1 right)", -1.0, 1.0, 0.0)
233
  }
234
 
235
  # Main Content
236
+ uploaded_file = st.file_uploader("Upload Video", type=["mp4", "mov", "avi"])
237
  if uploaded_file:
238
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
239
  tmp.write(uploaded_file.read())
240
  video_path = tmp.name
 
241
  st.video(video_path)
 
242
 
243
  video_clip = mpy.VideoFileClip(video_path)
244
  duration = video_clip.duration
245
  video_clip.close()
 
 
246
 
247
  if prompt_mode == "Automatic":
248
  with st.spinner("Analyzing frames..."):
249
  blip_processor, blip_model = load_blip_model()
250
  frames = extract_frames(video_path, num_frames, frame_method)
251
  if not frames:
252
+ st.error("No frames extracted. Try a different video or settings.")
253
  return
 
254
  descriptions = generate_captions(frames, blip_processor, blip_model)
255
+ mood = st.selectbox("Sound Mood", ["default", "dramatic", "ambient", "action"])
256
  text_prompt = enhance_prompt(descriptions, mood)
257
+ text_prompt = st.text_area("Edit Prompt", text_prompt, height=100)
258
  else:
259
+ text_prompt = st.text_area("Enter Sound Description", "Describe the desired sound effects", height=100)
260
 
261
  if st.button("Generate Sound Effects", key="generate"):
262
  progress = st.progress(0)
 
271
  sf.write(temp_audio, audio_array, 44100)
272
  progress.progress(50)
273
 
274
+ status.text("Applying audio effects...")
275
  processed_audio = apply_audio_effects(temp_audio, effects_settings)
276
  progress.progress(75)
277
 
278
+ status.text("Syncing with video...")
279
  output_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
280
  sync_audio_video(video_path, processed_audio, output_video, mix_original, original_volume, generated_volume)
281
  progress.progress(100)
282
  status.text("Done!")
283
 
284
  st.success("Sound effects applied!")
 
285
  st.video(output_video)
 
286
  with open(output_video, "rb") as f:
287
+ st.download_button("Download Enhanced Video", f, "enhanced_video.mp4", "video/mp4")
288
 
289
  # Cleanup
290
  for file in [video_path, temp_audio, processed_audio, output_video]: