mohitrai76 commited on
Commit
f9b4f82
·
verified ·
1 Parent(s): 031aac1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -101
app.py CHANGED
@@ -54,7 +54,7 @@ def separate_vocals(input_path):
54
  class AudioProcessor:
55
  def __init__(self, device="cpu"):
56
  self.whisper_model = WhisperModel("small", device=device)
57
- self.openrouter_api_key = "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"
58
  self.client = OpenAI(
59
  base_url="https://openrouter.ai/api/v1",
60
  api_key=self.openrouter_api_key,
@@ -122,7 +122,7 @@ class AudioProcessor:
122
  messages=[
123
  {
124
  "role": "system",
125
- "content": f"You are a professional translator from English to {target_language}. Translate exactly as requested."
126
  },
127
  {
128
  "role": "user",
@@ -260,17 +260,13 @@ async def process_audio_chunks(input_audio_path, voice, target_language):
260
  final_mix.export(output_path, format="wav")
261
  print(f"✅ Output saved as: {output_path}")
262
 
263
- final_audio_path = output_path
264
- final_background_path = background_path
265
-
266
  cleanup_files(chunk_files)
267
  shutil.rmtree(temp_dir, ignore_errors=True)
268
- return final_audio_path, final_background_path
269
 
270
  # --- Gradio Interface ---
271
  def gradio_interface(video_file, voice, target_language):
272
  try:
273
- # Create temporary directory for processing
274
  temp_dir = Path(tempfile.mkdtemp())
275
  input_video_path = temp_dir / "input_video.mp4"
276
 
@@ -278,26 +274,21 @@ def gradio_interface(video_file, voice, target_language):
278
  if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
279
  raise ValueError("Invalid file type. Please upload a video file.")
280
 
281
- # Save the uploaded file to the temporary directory
282
  shutil.copyfile(video_file.name, input_video_path)
283
 
284
- # Extract audio from video
285
  audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
286
  if not audio_path:
287
  return None
288
 
289
- # Process audio chunks
290
  audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
291
 
292
  if audio_output_path is None or background_path is None:
293
  return None
294
 
295
- # Combine with original video
296
  output_video_path = temp_dir / "translated_video.mp4"
297
  success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
298
 
299
  if success:
300
- # Return the path to the output video
301
  return str(output_video_path)
302
  else:
303
  return None
@@ -305,14 +296,8 @@ def gradio_interface(video_file, voice, target_language):
305
  except Exception as e:
306
  print(f"Error processing video: {e}")
307
  return None
308
- finally:
309
- # Cleanup temporary files
310
- # Commented out for debugging purposes
311
- # shutil.rmtree(temp_dir, ignore_errors=True)
312
- pass
313
 
314
  def extract_audio_from_video(video_path):
315
- """Extract audio from video file using ffmpeg"""
316
  temp_dir = tempfile.mkdtemp()
317
  audio_path = os.path.join(temp_dir, "extracted_audio.wav")
318
 
@@ -333,97 +318,30 @@ def extract_audio_from_video(video_path):
333
  return None, None
334
 
335
  def combine_video_audio(video_path, audio_path, output_path):
336
- """Combine original video with new audio track"""
337
  try:
338
  subprocess.run([
339
- "ffmpeg", "-y", "-i", video_path,
 
340
  "-i", audio_path,
341
- "-c:v", "copy", "-map", "0:v:0", "-map", "1:a:0",
342
- "-shortest", output_path
 
 
343
  ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
344
  return True
345
  except Exception as e:
346
- print(f"Video combining error: {e}")
347
  return False
348
 
349
- # Voice options for each language
350
- voice_options = {
351
- "Hindi": [
352
- "hi-IN-MadhurNeural", # Male
353
- "hi-IN-SwaraNeural" # Female
354
- ],
355
- "English": [
356
- "en-US-GuyNeural", # Male
357
- "en-US-BenjaminRUS", # Male
358
- "en-US-ChristopherNeural", # Male
359
- "en-US-AriaNeural", # Female
360
- "en-US-JessaNeural", # Female
361
- "en-US-JennyNeural" # Female
362
- ],
363
- "Spanish": [
364
- "es-ES-AlvaroNeural", # Male
365
- "es-MX-JorgeNeural", # Male
366
- "es-US-AlonsoNeural", # Male
367
- "es-ES-ElviraNeural", # Female
368
- "es-MX-DaliaNeural", # Female
369
- "es-US-PalomaNeural" # Female
370
- ],
371
- "French": [
372
- "fr-FR-HenriNeural", # Male
373
- "fr-FR-RemyMultilingualNeural", # Male
374
- "fr-CA-AntoineNeural", # Male
375
- "fr-FR-DeniseNeural", # Female
376
- "fr-FR-JulieNeural", # Female
377
- "fr-FR-VivienneMultilingualNeural" # Female
378
- ],
379
- "Japanese": [
380
- "ja-JP-KeitaNeural", # Male
381
- "ja-JP-DaichiNeural", # Male
382
- "ja-JP-RikuNeural", # Male
383
- "ja-JP-AoiNeural", # Female
384
- "ja-JP-NanamiNeural", # Female
385
- "ja-JP-ShioriNeural" # Female
386
- ],
387
- "Korean": [
388
- "ko-KR-InJoonNeural", # Male
389
- "ko-KR-SunHiNeural" # Female
390
- ]
391
- }
392
-
393
-
394
- # Create Gradio interface
395
  with gr.Blocks() as demo:
396
- gr.Markdown("# DeepDub : Video Dubbing Application")
397
- gr.Markdown("Upload a video and get a dubbed version with translated audio")
398
-
399
- with gr.Row():
400
-
401
- video_input = gr.File(file_types=[".mp4", ".mov", ".avi", ".mkv", ".webm", ".ogg"], label="Upload Video")
402
-
403
- language_dropdown = gr.Dropdown(
404
- list(voice_options.keys()),
405
- label="Translate to",
406
- value="Hindi"
407
- )
408
- voice_dropdown = gr.Dropdown(
409
- voice_options["Hindi"],
410
- label="Select Voice",
411
- value="hi-IN-MadhurNeural"
412
- )
413
-
414
- output_video = gr.Video(label="Dubbed Video")
415
-
416
- submit_btn = gr.Button("Start Dubbing")
417
-
418
- def update_voice_options(language):
419
- return gr.update(choices=voice_options[language], value=voice_options[language][0])
420
-
421
- language_dropdown.change(update_voice_options, inputs=[language_dropdown], outputs=[voice_dropdown])
422
 
423
- submit_btn.click(
424
- gradio_interface,
425
- inputs=[video_input, voice_dropdown, language_dropdown],
426
- outputs=output_video
427
- )
428
 
429
- demo.queue().launch(server_name="0.0.0.0", debug=True)
 
54
  class AudioProcessor:
55
  def __init__(self, device="cpu"):
56
  self.whisper_model = WhisperModel("small", device=device)
57
+ self.openrouter_api_key = "your_openrouter_api_key_here"
58
  self.client = OpenAI(
59
  base_url="https://openrouter.ai/api/v1",
60
  api_key=self.openrouter_api_key,
 
122
  messages=[
123
  {
124
  "role": "system",
125
+ "content": f"You are a professional translator from Given language to {target_language}. Translate exactly as requested."
126
  },
127
  {
128
  "role": "user",
 
260
  final_mix.export(output_path, format="wav")
261
  print(f"✅ Output saved as: {output_path}")
262
 
 
 
 
263
  cleanup_files(chunk_files)
264
  shutil.rmtree(temp_dir, ignore_errors=True)
265
+ return output_path, background_path
266
 
267
  # --- Gradio Interface ---
268
  def gradio_interface(video_file, voice, target_language):
269
  try:
 
270
  temp_dir = Path(tempfile.mkdtemp())
271
  input_video_path = temp_dir / "input_video.mp4"
272
 
 
274
  if not os.path.splitext(video_file.name)[1].lower() in ['.mp4', '.mov', '.avi', '.mkv']:
275
  raise ValueError("Invalid file type. Please upload a video file.")
276
 
 
277
  shutil.copyfile(video_file.name, input_video_path)
278
 
 
279
  audio_path, audio_temp_dir = extract_audio_from_video(str(input_video_path))
280
  if not audio_path:
281
  return None
282
 
 
283
  audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
284
 
285
  if audio_output_path is None or background_path is None:
286
  return None
287
 
 
288
  output_video_path = temp_dir / "translated_video.mp4"
289
  success = combine_video_audio(str(input_video_path), audio_output_path, str(output_video_path))
290
 
291
  if success:
 
292
  return str(output_video_path)
293
  else:
294
  return None
 
296
  except Exception as e:
297
  print(f"Error processing video: {e}")
298
  return None
 
 
 
 
 
299
 
300
  def extract_audio_from_video(video_path):
 
301
  temp_dir = tempfile.mkdtemp()
302
  audio_path = os.path.join(temp_dir, "extracted_audio.wav")
303
 
 
318
  return None, None
319
 
320
  def combine_video_audio(video_path, audio_path, output_path):
 
321
  try:
322
  subprocess.run([
323
+ "ffmpeg", "-y",
324
+ "-i", video_path,
325
  "-i", audio_path,
326
+ "-c:v", "copy",
327
+ "-c:a", "aac",
328
+ "-strict", "experimental",
329
+ output_path
330
  ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
331
  return True
332
  except Exception as e:
333
+ print(f"Video/audio combine error: {e}")
334
  return False
335
 
336
+ # --- Gradio UI Setup ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  with gr.Blocks() as demo:
338
+ gr.Markdown("# Video Dubbing & Translation App")
339
+ video_input = gr.File(label="Upload Video", file_types=['.mp4', '.mov', '.avi', '.mkv'])
340
+ voice_selector = gr.Dropdown(choices=["en-US-JennyNeural", "en-GB-RyanNeural", "hi-IN-SwaraNeural"], label="Select Voice", value="en-US-JennyNeural")
341
+ target_lang = gr.Textbox(label="Target Language (e.g. Hindi, French, Spanish)", value="Hindi")
342
+ translate_btn = gr.Button("Translate & Dub")
343
+ output_video = gr.Video(label="Output Video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
 
345
+ translate_btn.click(fn=gradio_interface, inputs=[video_input, voice_selector, target_lang], outputs=output_video)
 
 
 
 
346
 
347
+ demo.launch()