| import gradio as gr |
| from asr import transcribe_multiple_files, ASR_LANGUAGES, model |
| from lid import identify, LID_EXAMPLES |
| from csv_processor import CSV_FILE_PATH |
| import logging |
| import soundfile as sf |
| import os |
|
|
| |
| logging.basicConfig(level=logging.DEBUG) |
| logger = logging.getLogger(__name__) |
|
|
| def download_csv(): |
| file_path = CSV_FILE_PATH |
| if os.path.exists(file_path): |
| return file_path |
| else: |
| logger.error(f"file {file_path} not found!") |
| full_path = "/home/user/app/"+ file_path |
| exist_full_path = os.path.exists(full_path) |
| res = "found" if exist_full_path else "not found" |
| logger.error(f"file {exist_full_path} {res}!") |
| return None |
| |
| |
| language_options = [f"{k} ({v})" for k, v in ASR_LANGUAGES.items()] |
| bam_val = "bam (Bamanankan)" |
| bam_index = 0 if bam_val not in language_options else language_options.index(bam_val) |
|
|
| download_interface = gr.Interface( |
| fn=download_csv, |
| inputs=[], |
| outputs=gr.File(label="Download CSV"), |
| title="Download CSV file", |
| description="Download file audio_plus_hash_uniq_07102024.csv" |
| ) |
|
|
|
|
| mms_transcribe = gr.Interface( |
| fn=transcribe_multiple_files, |
| inputs=[ |
| gr.File(type="filepath"), |
| gr.Dropdown( |
| choices=language_options, |
| label="Language", |
| value=language_options[bam_index] if language_options else None, |
| ), |
| gr.Textbox(label="Optional: Provide your own transcription"), |
| ], |
| outputs=gr.Textbox(label="Transcriptions", lines=10), |
| title="Speech-to-text", |
| description="Transcribe multiple audio files in your desired language.", |
| allow_flagging="never", |
| ) |
|
|
| mms_identify = gr.Interface( |
| fn=identify, |
| inputs=[gr.Audio()], |
| outputs=gr.Label(num_top_classes=10), |
| examples=LID_EXAMPLES, |
| title="Language Identification", |
| description="Identify the language of input audio.", |
| allow_flagging="never", |
| ) |
|
|
| tabbed_interface = gr.TabbedInterface( |
| [mms_transcribe, mms_identify, download_interface], |
| ["Speech-to-text", "Language Identification", "Download CSV file"], |
| ) |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown( |
| "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>" |
| ) |
| gr.HTML( |
| """<center>Click on the appropriate tab to explore Speech-to-text (ASR) and Language identification (LID) demos.</center>""" |
| ) |
| gr.HTML( |
| """<center>You can also finetune MMS models on your data using the recipes provided here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a></center>""" |
| ) |
| gr.HTML( |
| """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>""" |
| ) |
|
|
| tabbed_interface.render() |
| gr.HTML( |
| """ |
| <div class="footer" style="text-align:center"> |
| <p> |
| Model by <a href="https://ai.facebook.com" style="text-decoration: underline;" target="_blank">Meta AI</a> - Gradio Demo by 🤗 Hugging Face |
| </p> |
| </div> |
| """ |
| ) |
|
|
| if __name__ == "__main__": |
| mms_transcribe.launch() |