| import gradio as gr |
| from os import getenv |
| from huggingface_hub import hf_hub_download |
| from torch import device as Device |
| from torch.cuda import is_available as cuda_is_available |
| from indicai_projects.indic_tts import Indic_TTS |
| from indicai_projects.sanskrit_tts import SansTTS |
| from indicai_projects.lite_tts import Lite_TTS |
| from indicai_projects.speech2text_translation import INDIC_SEAMLESS |
| from indicai_projects.speech2text_all import Indic_STT_ALL |
| from zipfile import ZipFile |
| device = Device("cuda" if cuda_is_available() else "cpu") |
|
|
| indic_tts_lang = getenv("indic_tts_lang","hi") |
| ZipFile(hf_hub_download("shethjenil/CONFORMER_INDIC_STT","conformer_onnx.zip"), 'r').extractall("conformer_onnx") |
| indic_stt_all_model = Indic_STT_ALL("conformer_onnx",device) |
| indic_tts = Indic_TTS(indic_tts_lang,device) |
| sans_tts = SansTTS(device) |
| vits_tts = Lite_TTS(device) |
| indic_seamless_model = INDIC_SEAMLESS(device) |
|
|
| gr.TabbedInterface( |
| [ |
| gr.Interface(indic_tts.predict,[gr.Textbox(label="Enter Text"),gr.Dropdown(indic_tts.speakers, label="speaker"),],gr.Audio(type="filepath", label="Speech")), |
| gr.Interface(sans_tts.predict,[gr.Textbox(value="उद्यमेन हि सिध्यन्ति कार्याणि न मनोरथैः"),gr.Dropdown(sans_tts.speakers,label='Speaker'),gr.Slider(0.5,2,1,step=0.1,label='Speaking Speed')],gr.Audio(label="Speech")), |
| gr.Interface(vits_tts.predict,[gr.Textbox(),gr.Dropdown(vits_tts.speakers,label='Speaker'),gr.Dropdown(vits_tts.styles,label='Style')],gr.Audio(label="Speech")), |
| gr.Interface(indic_seamless_model.predict,[gr.Audio(type="filepath"),gr.Dropdown(list(indic_seamless_model.lang_conf.keys()), label="Target Language"),],gr.Text(label="Translations"),title="Audio Translation",), |
| gr.Interface(indic_stt_all_model.predict,[gr.Audio(type="filepath"),gr.Dropdown(indic_stt_all_model.supported_langs,label='Language')],[gr.Text(label="CTC"),gr.Text(label="RNNT")]), |
| ], |
| [ |
| f"{indic_tts.full_name} TTS", |
| "Sanskrit TTS", |
| "Lite TTS With ", |
| "Audio Translation", |
| "All Indic Speech To Text", |
| ], |
| ).launch() |