MMS

Runtime error

App Files Files Community

unijoh commited on Jun 6, 2024

Commit

ce8e849

verified ·

1 Parent(s): 237c3b9

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -97

app.py CHANGED Viewed

@@ -1,87 +1,18 @@
 import gradio as gr
-import librosa
 from asr import transcribe, ASR_EXAMPLES, ASR_NOTE
 from tts import synthesize, TTS_EXAMPLES
 from lid import identify, LID_EXAMPLES
-demo = gr.Blocks()
-mms_select_source_trans = gr.Radio(
-    ["Record from Mic", "Upload audio"],
-    label="Audio input",
-    value="Record from Mic",
-)
-mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
-mms_upload_source_trans = gr.Audio(
-    source="upload", type="filepath", label="Upload file", visible=False
-)
-mms_transcribe = gr.Interface(
-    fn=lambda audio_input, mic_input, upload_input: transcribe(audio_input, mic_input, upload_input, "fao (Faroese)"),
-    inputs=[
-        mms_select_source_trans,
-        mms_mic_source_trans,
-        mms_upload_source_trans,
-        # Hidden language input
-        gr.Textbox(value="fao (Faroese)", visible=False),
-        # gr.Checkbox(label="Use Language Model (if available)", default=True),
-    ],
-    outputs="text",
-    examples=ASR_EXAMPLES,
-    title="Speech-to-text",
-    description=(
-        "Transcribe audio from a microphone or input file in Faroese."
-    ),
-    article=ASR_NOTE,
-    allow_flagging="never",
-)
-mms_synthesize = gr.Interface(
-    fn=lambda text, speed: synthesize(text, "fao (Faroese)", speed),
-    inputs=[
-        gr.Text(label="Input text"),
-        # Hidden language input
-        gr.Textbox(value="fao (Faroese)", visible=False),
-        gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
-    ],
-    outputs=[
-        gr.Audio(label="Generated Audio", type="numpy"),
-        gr.Text(label="Filtered text after removing OOVs"),
-    ],
-    examples=TTS_EXAMPLES,
-    title="Text-to-speech",
-    description=("Generate audio in Faroese from input text."),
-    allow_flagging="never",
-)
-mms_select_source_iden = gr.Radio(
-    ["Record from Mic", "Upload audio"],
-    label="Audio input",
-    value="Record from Mic",
-)
-mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
-mms_upload_source_iden = gr.Audio(
-    source="upload", type="filepath", label="Upload file", visible=False
-)
-mms_identify = gr.Interface(
-    fn=identify,
-    inputs=[
-        mms_select_source_iden,
-        mms_mic_source_iden,
-        mms_upload_source_iden,
-    ],
-    outputs=gr.Label(num_top_classes=10),
-    examples=LID_EXAMPLES,
-    title="Language Identification",
-    description=("Identity the language of input audio."),
-    allow_flagging="never",
-)
-tabbed_interface = gr.TabbedInterface(
-    [mms_transcribe, mms_synthesize, mms_identify],
-    ["Speech-to-text", "Text-to-speech", "Language Identification"],
-)
-with gr.Blocks() as demo:
     gr.Markdown(
         "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
     )
@@ -89,31 +20,102 @@ with gr.Blocks() as demo:
         """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos.   </center>"""
     )
     gr.HTML(
-        """<center>You can also finetune MMS models on your data using the recipes provides here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a>  </center>"""
     )
     gr.HTML(
         """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true"  style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
     )
-    tabbed_interface.render()
-    mms_select_source_trans.change(
-        lambda x: [
-            gr.update(visible=True if x == "Record from Mic" else False),
-            gr.update(visible=True if x == "Upload audio" else False),
-        ],
-        inputs=[mms_select_source_trans],
-        outputs=[mms_mic_source_trans, mms_upload_source_trans],
-        queue=False,
-    )
-    mms_select_source_iden.change(
-        lambda x: [
-            gr.update(visible=True if x == "Record from Mic" else False),
-            gr.update(visible=True if x == "Upload audio" else False),
-        ],
-        inputs=[mms_select_source_iden],
-        outputs=[mms_mic_source_iden, mms_upload_source_iden],
-        queue=False,
-    )
     gr.HTML(
         """
             <div class="footer" style="text-align:center">
@@ -122,7 +124,7 @@ with gr.Blocks() as demo:
                 </p>
             </div>
            """
-        )
 demo.queue(concurrency_count=3)
 demo.launch()

 import gradio as gr
 from asr import transcribe, ASR_EXAMPLES, ASR_NOTE
 from tts import synthesize, TTS_EXAMPLES
 from lid import identify, LID_EXAMPLES
+def wrapped_transcribe(select_source, mic_audio, upload_audio):
+    audio_input = mic_audio if select_source == "Record from Mic" else upload_audio
+    return transcribe(audio_input, "fao (Faroese)")
+def wrapped_synthesize(text, speed):
+    return synthesize(text, "fao (Faroese)", speed)
+demo = gr.Blocks()
+with demo:
     gr.Markdown(
         "<p align='center' style='font-size: 20px;'>MMS: Scaling Speech Technology to 1000+ languages demo. See our <a href='https://ai.facebook.com/blog/multilingual-model-speech-recognition/'>blog post</a> and <a href='https://arxiv.org/abs/2305.13516'>paper</a>.</p>"
     )
         """<center>Click on the appropriate tab to explore Speech-to-text (ASR), Text-to-speech (TTS) and Language identification (LID) demos.   </center>"""
     )
     gr.HTML(
+        """<center>You can also finetune MMS models on your data using the recipes provided here - <a href='https://huggingface.co/blog/mms_adapters'>ASR</a> <a href='https://github.com/ylacombe/finetune-hf-vits'>TTS</a>  </center>"""
     )
     gr.HTML(
         """<center><a href="https://huggingface.co/spaces/facebook/MMS?duplicate=true"  style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"><img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for more control and no queue.</center>"""
     )
+    with gr.TabbedInterface(["Speech-to-text", "Text-to-speech", "Language Identification"]) as tabs:
+        with tabs[0]:
+            mms_select_source_trans = gr.Radio(
+                ["Record from Mic", "Upload audio"],
+                label="Audio input",
+                value="Record from Mic",
+            )
+            mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
+            mms_upload_source_trans = gr.Audio(
+                source="upload", type="filepath", label="Upload file", visible=False
+            )
+            gr.Interface(
+                fn=wrapped_transcribe,
+                inputs=[
+                    mms_select_source_trans,
+                    mms_mic_source_trans,
+                    mms_upload_source_trans,
+                ],
+                outputs="text",
+                examples=ASR_EXAMPLES,
+                title="Speech-to-text",
+                description=(
+                    "Transcribe audio from a microphone or input file in Faroese."
+                ),
+                article=ASR_NOTE,
+                allow_flagging="never",
+            ).render()
+            mms_select_source_trans.change(
+                lambda x: [
+                    gr.update(visible=True if x == "Record from Mic" else False),
+                    gr.update(visible=True if x == "Upload audio" else False),
+                ],
+                inputs=[mms_select_source_trans],
+                outputs=[mms_mic_source_trans, mms_upload_source_trans],
+                queue=False,
+            )
+        with tabs[1]:
+            gr.Interface(
+                fn=wrapped_synthesize,
+                inputs=[
+                    gr.Text(label="Input text"),
+                    gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
+                ],
+                outputs=[
+                    gr.Audio(label="Generated Audio", type="numpy"),
+                    gr.Text(label="Filtered text after removing OOVs"),
+                ],
+                examples=TTS_EXAMPLES,
+                title="Text-to-speech",
+                description=("Generate audio in Faroese from input text."),
+                allow_flagging="never",
+            ).render()
+        with tabs[2]:
+            mms_select_source_iden = gr.Radio(
+                ["Record from Mic", "Upload audio"],
+                label="Audio input",
+                value="Record from Mic",
+            )
+            mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
+            mms_upload_source_iden = gr.Audio(
+                source="upload", type="filepath", label="Upload file", visible=False
+            )
+            gr.Interface(
+                fn=identify,
+                inputs=[
+                    mms_select_source_iden,
+                    mms_mic_source_iden,
+                    mms_upload_source_iden,
+                ],
+                outputs=gr.Label(num_top_classes=10),
+                examples=LID_EXAMPLES,
+                title="Language Identification",
+                description=("Identify the language of input audio."),
+                allow_flagging="never",
+            ).render()
+            mms_select_source_iden.change(
+                lambda x: [
+                    gr.update(visible=True if x == "Record from Mic" else False),
+                    gr.update(visible=True if x == "Upload audio" else False),
+                ],
+                inputs=[mms_select_source_iden],
+                outputs=[mms_mic_source_iden, mms_upload_source_iden],
+                queue=False,
+            )
     gr.HTML(
         """
             <div class="footer" style="text-align:center">
                 </p>
             </div>
            """
+    )
 demo.queue(concurrency_count=3)
 demo.launch()