from transformers import pipeline
import gradio as gr

model = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")


def transcribe_audio(mic=None, file=None):
    if mic is not None:
        audio = mic
    elif file is not None:
        audio = file
    else:
        return "You must either provide a mic recording or a file"
    transcription = model(audio)["text"]
    return transcription


gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Audio(source="microphone", type="filepath", optional=True),
        gr.Audio(source="upload", type="filepath", optional=True),
    ],
    title = "Automatic Speech Recognition",
    description = "This application can convert speech to text using the best models in huggingface. Get your speech transcribed by using your microphone or uploading audio where someone is talking.",
    outputs="text",
).launch()