prasanacodes commited on
Commit
94ae67a
Β·
verified Β·
1 Parent(s): 98b094e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -106,12 +106,13 @@ def transcribe_audio(audio_path):
106
  result = transcriber(audio_path)
107
  # The result is a dictionary, and we need the 'text' key
108
  transcription = result["text"]
109
- print(f"Transcription successful: {transcription}")
 
110
 
111
  duration_s = get_audio_duration(audio_path)
112
  wpm = compute_wpm(transcription, duration_s)
113
  pace = categorize_wpm(wpm)
114
- print(f" > Pace detected: {pace.upper()} ({wpm:.1f} WPM)")
115
 
116
  return transcription, pace
117
 
@@ -163,7 +164,9 @@ def translate_local(text_to_translate, target_lang='ta-IN', device=None):
163
  translated_chunks.append("") # Add an empty string on error
164
 
165
  translated_text = " ".join(translated_chunks)
166
-
 
 
167
  return translated_text
168
 
169
  def synthesize_speech(synth_text, target_lang, gender="Male", pace="normal", output_path="temp_audio_synthesized.wav", device="cpu"):
@@ -218,20 +221,15 @@ def match_audio_duration(original_path, translated_path, output_path="temp_audio
218
  # Save output
219
  # The sample rate remains the same as the translated audio's original rate
220
  sf.write(output_path, adjusted_audio, translated_sr)
221
- print(f"βœ… Adjusted audio saved as: {output_path}")
222
  return output_path
223
 
224
- def clone_voice(target_audio_path, reference_audio_path, target_lang, gender="Male", pace="normal", output_path="temp_audio_cloned.wav", device=None):
225
- # Set parameters for single file processing
226
- ref_file = str('reference/'+target_lang.upper()+'/'+gender.upper()+'_'+pace.upper()+'.wav')
227
- output_file = 'cloned_audio.wav'
228
- device = 'cpu' # or 'cuda:0' for GPU processing
229
-
230
  print("Cloning Voice")
231
  # Convert the tone color of a single audio file
232
- tune_one(input_file=target_audio_path, ref_file=ref_file, output_file=output_file, device=device)
233
-
234
- return output_file
235
 
236
  def merge_audio_video(video_path, audio_path, output_path="temp_merged.mp4"):
237
  """
@@ -253,8 +251,9 @@ def main_run(video_path,target_lang):
253
  translated_text = translate_local(original_text,target_lang)
254
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
255
  synced_translated_audio = match_audio_duration(original_audio_file, translated_audio)
256
- cloned_synced_translated_audio = clone_voice(original_audio_file, synced_translated_audio, target_lang, "Male", pace)
257
  final_video_nobgm = merge_audio_video(video_path, cloned_synced_translated_audio)
 
258
  return final_video_nobgm
259
 
260
  def audio_pipeline_run(audio_path,target_lang):
@@ -262,7 +261,8 @@ def audio_pipeline_run(audio_path,target_lang):
262
  translated_text = translate_local(original_text,target_lang)
263
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
264
  synced_translated_audio = match_audio_duration(audio_path, translated_audio)
265
- cloned_synced_translated_audio = clone_voice(audio_path, synced_translated_audio, target_lang, "Male", pace)
 
266
  return cloned_synced_translated_audio
267
 
268
 
 
106
  result = transcriber(audio_path)
107
  # The result is a dictionary, and we need the 'text' key
108
  transcription = result["text"]
109
+
110
+ print(f"βœ… Transcription successful: {transcription}")
111
 
112
  duration_s = get_audio_duration(audio_path)
113
  wpm = compute_wpm(transcription, duration_s)
114
  pace = categorize_wpm(wpm)
115
+ print(f"βœ… > Pace detected: {pace.upper()} ({wpm:.1f} WPM)")
116
 
117
  return transcription, pace
118
 
 
164
  translated_chunks.append("") # Add an empty string on error
165
 
166
  translated_text = " ".join(translated_chunks)
167
+
168
+ print(f"βœ… Translated Text to {target_lang} Successfully")
169
+
170
  return translated_text
171
 
172
  def synthesize_speech(synth_text, target_lang, gender="Male", pace="normal", output_path="temp_audio_synthesized.wav", device="cpu"):
 
221
  # Save output
222
  # The sample rate remains the same as the translated audio's original rate
223
  sf.write(output_path, adjusted_audio, translated_sr)
224
+ print(f"βœ… Duration Adjusted audio saved as: {output_path}")
225
  return output_path
226
 
227
+ def clone_voice(translated_audio_path, original_audio_path, output_path="temp_audio_cloned.wav", device="cpu"):
 
 
 
 
 
228
  print("Cloning Voice")
229
  # Convert the tone color of a single audio file
230
+ tune_one(input_file=translated_audio_path, ref_file=original_audio_path, output_file=output_path, device=device)
231
+ print(f"βœ… Voice cloned audio saved to {output_path}")
232
+ return output_path
233
 
234
  def merge_audio_video(video_path, audio_path, output_path="temp_merged.mp4"):
235
  """
 
251
  translated_text = translate_local(original_text,target_lang)
252
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
253
  synced_translated_audio = match_audio_duration(original_audio_file, translated_audio)
254
+ cloned_synced_translated_audio = clone_voice(synced_translated_audio, original_audio_file)
255
  final_video_nobgm = merge_audio_video(video_path, cloned_synced_translated_audio)
256
+ print(f"βœ… Pipeline finished")
257
  return final_video_nobgm
258
 
259
  def audio_pipeline_run(audio_path,target_lang):
 
261
  translated_text = translate_local(original_text,target_lang)
262
  translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
263
  synced_translated_audio = match_audio_duration(audio_path, translated_audio)
264
+ cloned_synced_translated_audio = clone_voice(synced_translated_audio, original_audio_file)
265
+ print(f"βœ… Pipeline finished")
266
  return cloned_synced_translated_audio
267
 
268