| from transformers import pipeline | |
| import gradio as gr | |
| model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") | |
| def transcribe_audio(mic=None, file=None): | |
| if mic is not None: | |
| audio = mic | |
| elif file is not None: | |
| audio = file | |
| else: | |
| return "You must either provide a mic recording or a file" | |
| transcription = model(audio)["text"] | |
| return transcription | |
| gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=[ | |
| gr.Audio(source="microphone", type="filepath", optional=True), | |
| gr.Audio(source="upload", type="filepath", optional=True), | |
| ], | |
| title = "Automatic Speech Recognition", | |
| description = "This application can convert speech to text using the best models in huggingface. Get your speech transcribed by using your microphone or uploading audio where someone is talking.", | |
| outputs="text", | |
| ).launch() |