Spaces:

prasanacodes
/

Indic-Translation-Toolkit

Running

App Files Files Community

prasanacodes commited on Aug 20, 2025

Commit

94ae67a

verified ·

1 Parent(s): 98b094e

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -106,12 +106,13 @@ def transcribe_audio(audio_path):
     result = transcriber(audio_path)
     # The result is a dictionary, and we need the 'text' key
     transcription = result["text"]
-    print(f"Transcription successful: {transcription}")
     duration_s = get_audio_duration(audio_path)
     wpm = compute_wpm(transcription, duration_s)
     pace = categorize_wpm(wpm)
-    print(f"    > Pace detected: {pace.upper()} ({wpm:.1f} WPM)")
     return transcription, pace
@@ -163,7 +164,9 @@ def translate_local(text_to_translate, target_lang='ta-IN', device=None):
             translated_chunks.append("") # Add an empty string on error
     translated_text = " ".join(translated_chunks)
     return translated_text
 def synthesize_speech(synth_text, target_lang, gender="Male", pace="normal", output_path="temp_audio_synthesized.wav", device="cpu"):
@@ -218,20 +221,15 @@ def match_audio_duration(original_path, translated_path, output_path="temp_audio
     # Save output
     # The sample rate remains the same as the translated audio's original rate
     sf.write(output_path, adjusted_audio, translated_sr)
-    print(f"✅ Adjusted audio saved as: {output_path}")
     return output_path
-def clone_voice(target_audio_path, reference_audio_path, target_lang, gender="Male", pace="normal", output_path="temp_audio_cloned.wav", device=None):
-    # Set parameters for single file processing
-    ref_file =    str('reference/'+target_lang.upper()+'/'+gender.upper()+'_'+pace.upper()+'.wav')
-    output_file = 'cloned_audio.wav'
-    device = 'cpu'  # or 'cuda:0' for GPU processing
     print("Cloning Voice")
     # Convert the tone color of a single audio file
-    tune_one(input_file=target_audio_path, ref_file=ref_file, output_file=output_file, device=device)
-    return output_file
 def merge_audio_video(video_path, audio_path, output_path="temp_merged.mp4"):
     """
@@ -253,8 +251,9 @@ def main_run(video_path,target_lang):
     translated_text = translate_local(original_text,target_lang)
     translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
     synced_translated_audio = match_audio_duration(original_audio_file, translated_audio)
-    cloned_synced_translated_audio = clone_voice(original_audio_file, synced_translated_audio, target_lang, "Male", pace)
     final_video_nobgm = merge_audio_video(video_path, cloned_synced_translated_audio)
     return final_video_nobgm
 def audio_pipeline_run(audio_path,target_lang):
@@ -262,7 +261,8 @@ def audio_pipeline_run(audio_path,target_lang):
     translated_text = translate_local(original_text,target_lang)
     translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
     synced_translated_audio = match_audio_duration(audio_path, translated_audio)
-    cloned_synced_translated_audio = clone_voice(audio_path, synced_translated_audio, target_lang, "Male", pace)
     return cloned_synced_translated_audio

     result = transcriber(audio_path)
     # The result is a dictionary, and we need the 'text' key
     transcription = result["text"]
+    print(f"✅ Transcription successful: {transcription}")
     duration_s = get_audio_duration(audio_path)
     wpm = compute_wpm(transcription, duration_s)
     pace = categorize_wpm(wpm)
+    print(f"✅ > Pace detected: {pace.upper()} ({wpm:.1f} WPM)")
     return transcription, pace
             translated_chunks.append("") # Add an empty string on error
     translated_text = " ".join(translated_chunks)
+    print(f"✅ Translated Text to {target_lang} Successfully")
     return translated_text
 def synthesize_speech(synth_text, target_lang, gender="Male", pace="normal", output_path="temp_audio_synthesized.wav", device="cpu"):
     # Save output
     # The sample rate remains the same as the translated audio's original rate
     sf.write(output_path, adjusted_audio, translated_sr)
+    print(f"✅ Duration Adjusted audio saved as: {output_path}")
     return output_path
+def clone_voice(translated_audio_path, original_audio_path, output_path="temp_audio_cloned.wav", device="cpu"):
     print("Cloning Voice")
     # Convert the tone color of a single audio file
+    tune_one(input_file=translated_audio_path, ref_file=original_audio_path, output_file=output_path, device=device)
+    print(f"✅ Voice cloned audio saved to {output_path}")
+    return output_path
 def merge_audio_video(video_path, audio_path, output_path="temp_merged.mp4"):
     """
     translated_text = translate_local(original_text,target_lang)
     translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
     synced_translated_audio = match_audio_duration(original_audio_file, translated_audio)
+    cloned_synced_translated_audio = clone_voice(synced_translated_audio, original_audio_file)
     final_video_nobgm = merge_audio_video(video_path, cloned_synced_translated_audio)
+    print(f"✅ Pipeline finished")
     return final_video_nobgm
 def audio_pipeline_run(audio_path,target_lang):
     translated_text = translate_local(original_text,target_lang)
     translated_audio = synthesize_speech(translated_text, target_lang, "Male", pace)
     synced_translated_audio = match_audio_duration(audio_path, translated_audio)
+    cloned_synced_translated_audio = clone_voice(synced_translated_audio, original_audio_file)
+    print(f"✅ Pipeline finished")
     return cloned_synced_translated_audio