mohitrai76 commited on
Commit
b071cfb
·
verified ·
1 Parent(s): 56c4c63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -13
app.py CHANGED
@@ -82,7 +82,7 @@ class AudioProcessor:
82
 
83
  return results
84
 
85
- def translate_segments_batch(self, segments):
86
  """Translate all text segments in a single batch request"""
87
  try:
88
  # Filter out None segments (pauses)
@@ -94,14 +94,14 @@ class AudioProcessor:
94
  print(f"Translating {len(text_segments)} segments in batch...")
95
 
96
  # Prepare the prompt with clear formatting instructions
97
- prompt = f"""Translate the following Given language text segments to Hindi while maintaining EXACTLY the same format and order:
98
 
99
  {chr(10).join(text_segments)}
100
 
101
  IMPORTANT INSTRUCTIONS:
102
  1. Maintain the EXACT same order and number of segments
103
  2. Each line must be a separate translation
104
- 3. Use natural conversational Hindi
105
  4. Preserve meaning/context
106
  5. Leave proper nouns unchanged
107
  6. Match original word count where possible
@@ -122,7 +122,7 @@ class AudioProcessor:
122
  messages=[
123
  {
124
  "role": "system",
125
- "content": "You are a professional translator from Given language to Hindi. Translate exactly as requested."
126
  },
127
  {
128
  "role": "user",
@@ -165,7 +165,7 @@ def get_audio_duration(audio_path):
165
  print(f"Duration error: {e}")
166
  return None
167
 
168
- async def synthesize_tts_to_wav(text, voice):
169
  import edge_tts
170
  temp_mp3 = "temp_tts.mp3"
171
  communicate = edge_tts.Communicate(text, voice)
@@ -205,8 +205,8 @@ def cleanup_files(file_list):
205
  if os.path.exists(file):
206
  os.remove(file)
207
 
208
- # --- Main Gradio Interface ---
209
- async def process_audio_chunks(input_audio_path, voice="hi-IN-MadhurNeural"):
210
  audio_processor = AudioProcessor()
211
 
212
  print("🔎 Separating vocals and music using Demucs...")
@@ -222,7 +222,7 @@ async def process_audio_chunks(input_audio_path, voice="hi-IN-MadhurNeural"):
222
  segment_texts = [seg[2] if seg[2] is not None else None for seg in segments]
223
 
224
  # Batch translate all segments at once
225
- translated_texts = audio_processor.translate_segments_batch(segment_texts)
226
 
227
  chunk_files = []
228
  chunk_idx = 0
@@ -239,7 +239,7 @@ async def process_audio_chunks(input_audio_path, voice="hi-IN-MadhurNeural"):
239
  print(f"🔤 {chunk_idx}: Translated: {translated}")
240
 
241
  # Synthesize TTS audio
242
- raw_tts = await synthesize_tts_to_wav(translated, voice)
243
 
244
  # Stretch the audio to match the target duration
245
  stretched = stretch_audio(raw_tts, duration)
@@ -267,7 +267,8 @@ async def process_audio_chunks(input_audio_path, voice="hi-IN-MadhurNeural"):
267
  shutil.rmtree(temp_dir, ignore_errors=True)
268
  return final_audio_path, final_background_path
269
 
270
- def gradio_interface(video_file, voice):
 
271
  try:
272
  # Create temporary directory for processing
273
  temp_dir = Path(tempfile.mkdtemp())
@@ -286,7 +287,7 @@ def gradio_interface(video_file, voice):
286
  return None
287
 
288
  # Process audio chunks
289
- audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice))
290
 
291
  if audio_output_path is None or background_path is None:
292
  return None
@@ -345,6 +346,51 @@ def combine_video_audio(video_path, audio_path, output_path):
345
  print(f"Video combining error: {e}")
346
  return False
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  # Create Gradio interface
349
  with gr.Blocks() as demo:
350
  gr.Markdown("# Video Dubbing Application")
@@ -352,8 +398,13 @@ with gr.Blocks() as demo:
352
 
353
  with gr.Row():
354
  video_input = gr.File(label="Upload Video", file_types=[".mp4", ".mov", ".avi", ".mkv"])
 
 
 
 
 
355
  voice_dropdown = gr.Dropdown(
356
- ["hi-IN-MadhurNeural", "hi-IN-RekhaNeural", "hi-IN-SwaraNeural"],
357
  label="Select Voice",
358
  value="hi-IN-MadhurNeural"
359
  )
@@ -362,9 +413,14 @@ with gr.Blocks() as demo:
362
 
363
  submit_btn = gr.Button("Start Dubbing")
364
 
 
 
 
 
 
365
  submit_btn.click(
366
  gradio_interface,
367
- inputs=[video_input, voice_dropdown],
368
  outputs=output_video
369
  )
370
 
 
82
 
83
  return results
84
 
85
+ def translate_segments_batch(self, segments, target_language):
86
  """Translate all text segments in a single batch request"""
87
  try:
88
  # Filter out None segments (pauses)
 
94
  print(f"Translating {len(text_segments)} segments in batch...")
95
 
96
  # Prepare the prompt with clear formatting instructions
97
+ prompt = f"""Translate the following text segments to {target_language} while maintaining EXACTLY the same format and order:
98
 
99
  {chr(10).join(text_segments)}
100
 
101
  IMPORTANT INSTRUCTIONS:
102
  1. Maintain the EXACT same order and number of segments
103
  2. Each line must be a separate translation
104
+ 3. Use natural conversational {target_language}
105
  4. Preserve meaning/context
106
  5. Leave proper nouns unchanged
107
  6. Match original word count where possible
 
122
  messages=[
123
  {
124
  "role": "system",
125
+ "content": f"You are a professional translator from English to {target_language}. Translate exactly as requested."
126
  },
127
  {
128
  "role": "user",
 
165
  print(f"Duration error: {e}")
166
  return None
167
 
168
+ async def synthesize_tts_to_wav(text, voice, target_language):
169
  import edge_tts
170
  temp_mp3 = "temp_tts.mp3"
171
  communicate = edge_tts.Communicate(text, voice)
 
205
  if os.path.exists(file):
206
  os.remove(file)
207
 
208
+ # --- Main Process Function ---
209
+ async def process_audio_chunks(input_audio_path, voice, target_language):
210
  audio_processor = AudioProcessor()
211
 
212
  print("🔎 Separating vocals and music using Demucs...")
 
222
  segment_texts = [seg[2] if seg[2] is not None else None for seg in segments]
223
 
224
  # Batch translate all segments at once
225
+ translated_texts = audio_processor.translate_segments_batch(segment_texts, target_language)
226
 
227
  chunk_files = []
228
  chunk_idx = 0
 
239
  print(f"🔤 {chunk_idx}: Translated: {translated}")
240
 
241
  # Synthesize TTS audio
242
+ raw_tts = await synthesize_tts_to_wav(translated, voice, target_language)
243
 
244
  # Stretch the audio to match the target duration
245
  stretched = stretch_audio(raw_tts, duration)
 
267
  shutil.rmtree(temp_dir, ignore_errors=True)
268
  return final_audio_path, final_background_path
269
 
270
+ # --- Gradio Interface ---
271
+ def gradio_interface(video_file, voice, target_language):
272
  try:
273
  # Create temporary directory for processing
274
  temp_dir = Path(tempfile.mkdtemp())
 
287
  return None
288
 
289
  # Process audio chunks
290
+ audio_output_path, background_path = asyncio.run(process_audio_chunks(audio_path, voice, target_language))
291
 
292
  if audio_output_path is None or background_path is None:
293
  return None
 
346
  print(f"Video combining error: {e}")
347
  return False
348
 
349
+ # Voice options for each language
350
+ voice_options = {
351
+ "Hindi": [
352
+ "hi-IN-MadhurNeural", # Male
353
+ "hi-IN-SwaraNeural" # Female
354
+ ],
355
+ "English": [
356
+ "en-US-GuyNeural", # Male
357
+ "en-US-BenjaminRUS", # Male
358
+ "en-US-ChristopherNeural", # Male
359
+ "en-US-AriaNeural", # Female
360
+ "en-US-JessaNeural", # Female
361
+ "en-US-JennyNeural" # Female
362
+ ],
363
+ "Spanish": [
364
+ "es-ES-AlvaroNeural", # Male
365
+ "es-MX-JorgeNeural", # Male
366
+ "es-US-AlonsoNeural", # Male
367
+ "es-ES-ElviraNeural", # Female
368
+ "es-MX-DaliaNeural", # Female
369
+ "es-US-PalomaNeural" # Female
370
+ ],
371
+ "French": [
372
+ "fr-FR-HenriNeural", # Male
373
+ "fr-FR-RemyMultilingualNeural", # Male
374
+ "fr-CA-AntoineNeural", # Male
375
+ "fr-FR-DeniseNeural", # Female
376
+ "fr-FR-JulieNeural", # Female
377
+ "fr-FR-VivienneMultilingualNeural" # Female
378
+ ],
379
+ "Japanese": [
380
+ "ja-JP-KeitaNeural", # Male
381
+ "ja-JP-DaichiNeural", # Male
382
+ "ja-JP-RikuNeural", # Male
383
+ "ja-JP-AoiNeural", # Female
384
+ "ja-JP-NanamiNeural", # Female
385
+ "ja-JP-ShioriNeural" # Female
386
+ ],
387
+ "Korean": [
388
+ "ko-KR-InJoonNeural", # Male
389
+ "ko-KR-SunHiNeural" # Female
390
+ ]
391
+ }
392
+
393
+
394
  # Create Gradio interface
395
  with gr.Blocks() as demo:
396
  gr.Markdown("# Video Dubbing Application")
 
398
 
399
  with gr.Row():
400
  video_input = gr.File(label="Upload Video", file_types=[".mp4", ".mov", ".avi", ".mkv"])
401
+ language_dropdown = gr.Dropdown(
402
+ list(voice_options.keys()),
403
+ label="Translate to",
404
+ value="Hindi"
405
+ )
406
  voice_dropdown = gr.Dropdown(
407
+ voice_options["Hindi"],
408
  label="Select Voice",
409
  value="hi-IN-MadhurNeural"
410
  )
 
413
 
414
  submit_btn = gr.Button("Start Dubbing")
415
 
416
+ def update_voice_options(language):
417
+ return gr.update(choices=voice_options[language], value=voice_options[language][0])
418
+
419
+ language_dropdown.change(update_voice_options, inputs=[language_dropdown], outputs=[voice_dropdown])
420
+
421
  submit_btn.click(
422
  gradio_interface,
423
+ inputs=[video_input, voice_dropdown, language_dropdown],
424
  outputs=output_video
425
  )
426