prasanacodes commited on
Commit
04cc0d9
·
verified ·
1 Parent(s): 73cb6f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -9
app.py CHANGED
@@ -8,7 +8,9 @@ import re
8
  from deep_translator import MyMemoryTranslator
9
  import soundfile as sf
10
  from gradio_client import Client, handle_file
11
-
 
 
12
 
13
  # You only need to run this download command once
14
  nltk.download('punkt_tab')
@@ -181,18 +183,79 @@ def synthesize_speech(synth_text, target_lang, gender="Male", pace="normal", out
181
  print("synthesize_speech saved to ", result)
182
  return result
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  def main_run(video_path,target_lang):
185
  original_audio_file = extract_audio_from_video(video_path)
186
  original_text , pace = transcribe_audio(original_audio_file)
187
  translated_text = translate_local(original_text,target_lang)
188
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
189
- return translated_audio
 
 
 
190
 
191
  def audio_pipeline_run(audio_path,target_lang):
192
  original_text , pace = transcribe_audio(audio_path)
193
  translated_text = translate_local(original_text,target_lang)
194
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
195
- return translated_audio
 
 
196
 
197
 
198
  # --- Gradio Interface Definition ---
@@ -219,6 +282,7 @@ app_interface = gr.Interface(
219
  allow_flagging="never"
220
  )
221
  '''
 
222
  with gr.Blocks(title="Audio/Video Translation Toolkit") as app_interface:
223
  gr.Markdown("# 🚀 Audio/Video Translation Toolkit")
224
  with gr.Tabs():
@@ -228,14 +292,9 @@ with gr.Blocks(title="Audio/Video Translation Toolkit") as app_interface:
228
  video_in = gr.Video(label="Input Video", height=500)
229
  lang_radio_vid = gr.Radio(choices=["Tamil", "Telugu", "Hindi"], label="Target Language", value="Tamil")
230
  submit_btn_vid = gr.Button("Translate Video", variant="primary")
231
-
232
- with gr.Column():
233
- video_out = gr.Audio(label="Output Audio", interactive=False)
234
-
235
- '''
236
  with gr.Column():
237
  video_out = gr.Video(label="Output Video", interactive=False, height=500)
238
- '''
239
 
240
  submit_btn_vid.click(fn=main_run, inputs=[video_in, lang_radio_vid], outputs=[video_out])
241
 
 
8
  from deep_translator import MyMemoryTranslator
9
  import soundfile as sf
10
  from gradio_client import Client, handle_file
11
+ from openvoice_cli.__main__ import tune_one
12
+ import pyrubberband as rb
13
+ import librosa
14
 
15
  # You only need to run this download command once
16
  nltk.download('punkt_tab')
 
183
  print("synthesize_speech saved to ", result)
184
  return result
185
 
186
+ def match_audio_duration(original_path, translated_path, output_path="temp_audio_synced.wav"):
187
+ """
188
+ Matches Synthesized Audio duration to Original Audio duration
189
+ """
190
+ print("\n[STEP 7/9] Syncing Audio durations")
191
+ # Load original audio
192
+ original_audio, original_sr = librosa.load(original_path, sr=None)
193
+ original_duration = librosa.get_duration(y=original_audio, sr=original_sr)
194
+ print(f"Original audio duration: {original_duration:.2f} seconds")
195
+
196
+ # Load translated audio
197
+ translated_audio, translated_sr = librosa.load(translated_path, sr=None)
198
+ translated_duration = librosa.get_duration(y=translated_audio, sr=translated_sr)
199
+ print(f"Translated audio duration: {translated_duration:.2f} seconds")
200
+
201
+ # Compute the speed-up/slow-down rate
202
+ # If rate > 1.0, audio is sped up. If rate < 1.0, audio is slowed down.
203
+ rate = translated_duration / original_duration
204
+ print(f"Stretch rate: {rate:.4f}")
205
+
206
+ # Apply time-stretch using the high-quality rubberband library
207
+ # The parameters are: audio_data, sample_rate, and the desired rate
208
+ adjusted_audio = rb.time_stretch(translated_audio, translated_sr, rate=rate)
209
+
210
+ # Save output
211
+ # The sample rate remains the same as the translated audio's original rate
212
+ sf.write(output_path, adjusted_audio, translated_sr)
213
+ print(f"✅ Adjusted audio saved as: {output_path}")
214
+ return output_path
215
+
216
+ def clone_voice(target_audio_path, reference_audio_path, target_lang, gender="Male", pace="normal", output_path="temp_audio_cloned.wav", device=None):
217
+ # Set parameters for single file processing
218
+ ref_file = str('reference/'+target_lang.upper()+'/'+gender.upper()+'_'+pace.upper()+'.wav')
219
+ output_file = 'cloned_audio.wav'
220
+ device = 'cpu' # or 'cuda:0' for GPU processing
221
+
222
+ print("Cloning Voice")
223
+ # Convert the tone color of a single audio file
224
+ tune_one(input_file=input_file, ref_file=ref_file, output_file=output_file, device=device)
225
+
226
+ return output_file
227
+
228
+ def merge_audio_video(video_path, audio_path, output_path="temp_merged.mp4"):
229
+ """
230
+ Merges an audio file with a video file into a single output video.
231
+ """
232
+ print("\n[STEP] Merging audio and video...")
233
+ video_input = ffmpeg.input(video_path)
234
+ audio_input = ffmpeg.input(audio_path)
235
+ (
236
+ ffmpeg.output(video_input.video, audio_input.audio, output_path, vcodec='copy', acodec='aac', shortest=None)
237
+ .run(overwrite_output=True, quiet=True)
238
+ )
239
+ print(f"✅ Merged video saved to {output_path}")
240
+ return output_path
241
+
242
  def main_run(video_path,target_lang):
243
  original_audio_file = extract_audio_from_video(video_path)
244
  original_text , pace = transcribe_audio(original_audio_file)
245
  translated_text = translate_local(original_text,target_lang)
246
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
247
+ synced_translated_audio = match_audio_duration(original_audio_file, translated_audio)
248
+ cloned_synced_translated_audio = clone_voice(original_audio_file, synced_translated_audio, target_lang, gender="Male", pace)
249
+ final_video_nobgm = merge_audio_video(video_path, cloned_synced_translated_audio)
250
+ return final_video_nobgm
251
 
252
  def audio_pipeline_run(audio_path,target_lang):
253
  original_text , pace = transcribe_audio(audio_path)
254
  translated_text = translate_local(original_text,target_lang)
255
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
256
+ synced_translated_audio = match_audio_duration(original_audio_file, translated_audio)
257
+ cloned_synced_translated_audio = clone_voice(original_audio_file, synced_translated_audio, target_lang, gender="Male", pace)
258
+ return cloned_synced_translated_audio
259
 
260
 
261
  # --- Gradio Interface Definition ---
 
282
  allow_flagging="never"
283
  )
284
  '''
285
+
286
  with gr.Blocks(title="Audio/Video Translation Toolkit") as app_interface:
287
  gr.Markdown("# 🚀 Audio/Video Translation Toolkit")
288
  with gr.Tabs():
 
292
  video_in = gr.Video(label="Input Video", height=500)
293
  lang_radio_vid = gr.Radio(choices=["Tamil", "Telugu", "Hindi"], label="Target Language", value="Tamil")
294
  submit_btn_vid = gr.Button("Translate Video", variant="primary")
295
+
 
 
 
 
296
  with gr.Column():
297
  video_out = gr.Video(label="Output Video", interactive=False, height=500)
 
298
 
299
  submit_btn_vid.click(fn=main_run, inputs=[video_in, lang_radio_vid], outputs=[video_out])
300