Spaces:

mms-meta
/

mms-zeroshot

Running

mms-zeroshot / app.py

Vineel Pratap

update_model

a4107b1 over 1 year ago

2.63 kB

	import gradio as gr
	from zeroshot import process, ZS_EXAMPLES

	with gr.Blocks(css="style.css") as demo:
	gr.Markdown(
	"<p align='center' style='font-size: 20px;'>MMS Zero-shot ASR Demo. See our arXiV <a href='https://arxiv.org/'>paper</a> for model details.</p>"
	)
	gr.HTML(
	"""<center>The demo works on input audio in any language, as long as you provide a list of words or sentences for that language and an optional n-gram language model (even a simple 1-gram model will work!) to help with accuracy.<br>We recommend having a minimum of 5000 distinct words in the textfile to acheive a good performance.</center>"""
	)
	with gr.Row():
	with gr.Column():
	audio = gr.Audio(label="Audio Input\n(use microphone or upload a file)")

	with gr.Row():
	words_file = gr.File(label="Text Data")
	lm_file = gr.File(label="Language Model\n(optional)")

	with gr.Accordion("Advanced Settings", open=False):
	gr.Markdown(
	"The following parameters are used for beam-search decoding. Use the default values if you are not sure."
	)
	with gr.Row():
	wscore = gr.Slider(
	minimum=-10.0,
	maximum=10.0,
	value=0,
	step=0.1,
	interactive=True,
	label="Word Insertion Score",
	)
	lmscore = gr.Slider(
	minimum=-10.0,
	maximum=10.0,
	value=0,
	step=0.1,
	interactive=True,
	label="Language Model Score",
	)
	with gr.Row():
	wscore_usedefault = gr.Checkbox(
	label="Use Default Word Insertion Score", value=True
	)
	lmscore_usedefault = gr.Checkbox(
	label="Use Default Language Model Score", value=True
	)
	btn = gr.Button("Submit", elem_id="submit")
	with gr.Column():
	text = gr.Textbox(label="Transcript")
	btn.click(
	process,
	inputs=[
	audio,
	words_file,
	lm_file,
	wscore,
	lmscore,
	wscore_usedefault,
	lmscore_usedefault,
	],
	outputs=text,
	)
	examples = gr.Examples(examples=ZS_EXAMPLES, inputs=[audio, words_file])

	demo.launch()