Spaces:
Build error
Build error
| """ | |
| Script to translate given single english audio file to corresponding hindi text | |
| Usage : python s2t_en2hi.py <audio_file_path> <averaged_checkpoints_file_path> | |
| """ | |
| import gradio as gr | |
| import sys | |
| import os | |
| import subprocess | |
| from pydub import AudioSegment | |
| from huggingface_hub import snapshot_download | |
| subprocess.check_call(["pip", "install", "gtts"]) | |
| subprocess.check_call(["pip", "install", "gTTS"]) | |
| from gtts import gTTS | |
| def install_fairseq(): | |
| try: | |
| # Run pip install command to install fairseq | |
| subprocess.check_call(["pip", "install", "fairseq"]) | |
| subprocess.check_call(["pip", "install", "sentencepiece"]) | |
| subprocess.check_call(["pip", "install", "soundfile"]) | |
| return "fairseq successfully installed!" | |
| except subprocess.CalledProcessError as e: | |
| return f"An error occurred while installing fairseq: {str(e)}" | |
| def convert_audio_to_16k_wav(audio_input): | |
| sound = AudioSegment.from_file(audio_input) | |
| sample_rate = sound.frame_rate | |
| num_channels = sound.channels | |
| num_frames = int(sound.frame_count()) | |
| filename = audio_input.split("/")[-1] | |
| print("original file is at:", audio_input) | |
| if (num_channels > 1) or (sample_rate != 16000): # convert to mono-channel 16k wav | |
| if num_channels > 1: | |
| sound = sound.set_channels(1) | |
| if sample_rate != 16000: | |
| sound = sound.set_frame_rate(16000) | |
| num_frames = int(sound.frame_count()) | |
| filename = filename.replace(".wav", "") + "_16k.wav" | |
| sound.export(f"{filename}", format="wav") | |
| return filename | |
| def run_my_code(input_text, language): | |
| # TODO better argument handling | |
| audio=convert_audio_to_16k_wav(input_text) | |
| hi_wav = audio | |
| data_root="" | |
| model_checkpoint="" | |
| d_r="" | |
| lang='' | |
| if(language=="Hindi"): | |
| model_checkpoint = "./models/hi_m.pt" | |
| data_root="./lang/hi/" | |
| lang='hi' | |
| if(language=="Gujrati"): | |
| model_checkpoint = "./models/gj_m.pt" | |
| data_root="./lang/gj/" | |
| lang='gu' | |
| if(language=="Bengali"): | |
| model_checkpoint = "./models/bn_m.pt" | |
| data_root="./lang/bn/" | |
| lang='bn' | |
| if(language=="Nepali"): | |
| model_checkpoint = "./models/ne_m.pt" | |
| data_root="./lang/ne/" | |
| lang='ne' | |
| if(language=="Tamil"): | |
| model_checkpoint = "./models/tm_m.pt" | |
| data_root="./lang/tm/" | |
| lang='ta' | |
| if(language=="Marathi"): | |
| model_checkpoint = "./models/mt_m.pt" | |
| data_root="./lang/mt/" | |
| lang='mr' | |
| #os.system(f"cp {hi_wav} {data_root}data/tst-COMMON/wav/test.wav") | |
| f = open('input.txt', 'w') | |
| f.write(hi_wav) | |
| f = open('input.txt', 'r') | |
| content = f. read() | |
| print(content) | |
| print(hi_wav) | |
| print("------Performing translation...") | |
| #subprocess.run(["fairseq-interactive", data_root, "--config-yaml", "config_st.yaml", "--task", "speech_to_text", "--path", model_checkpoint, "--max-tokens", "50000", "--beam", "5" ,"--input" ,"input.txt"]) | |
| translation_result = subprocess.run(["fairseq-interactive", data_root, "--config-yaml", "config_st.yaml", "--task", "speech_to_text", "--path", model_checkpoint, "--max-tokens", "50000", "--beam", "5" ,"--input" ,"input.txt"], capture_output=True, text=True) | |
| translation_result_text = translation_result.stdout | |
| lines = translation_result_text.split("\n") | |
| output_text="" | |
| print("\n\n------Translation results are:") | |
| for i in lines: | |
| if (i.startswith("D-0")): | |
| print(i.split("\t")[2]) | |
| output_text=i.split("\t")[2] | |
| break | |
| output_audio = "output_audio.mp3" | |
| tts = gTTS(text=output_text, lang=lang) | |
| tts.save(output_audio) | |
| #os.system(f"rm test.wav") | |
| f = open('input.txt', 'w') | |
| f.write("") | |
| f = open('input.txt', 'r') | |
| content = f. read() | |
| print(content) | |
| return output_text, output_audio | |
| install_fairseq() | |
| # Define the input and output interfaces for Gradio | |
| #inputs = [ | |
| # gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)..."), | |
| # gr.inputs.Dropdown(list(LANGUAGE_CODES.keys()), default="Hindi", label="From English to Languages X..."), | |
| # ] | |
| #input_textbox = gr.inputs.Textbox(label="test2.wav") | |
| #input=gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in English)...") | |
| #audio=convert_audio_to_16k_wav(input) | |
| output_textbox = gr.outputs.Textbox(label="Translated Text") | |
| # Create a Gradio interface | |
| iface = gr.Interface( | |
| fn=run_my_code, | |
| inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record something (in American English accent)"), gr.inputs.Radio(["Hindi", "Gujrati", "Bengali", "Tamil", "Nepali", "Marathi"], label="Language")], | |
| outputs=[output_textbox, gr.outputs.Audio(label="Output speech", type="filepath")], | |
| title="English to Indic Language Translator") | |
| # Launch the interface | |
| iface.launch() |