test / app.py
Konstantin Dorichev
share=True
d2a9540 unverified
import gradio as gr
from transformers import pipeline
import numpy as np
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", return_timestamps=True)
def transcribe(stream, new_chunk):
if stream is None:
return ""
sr, y = stream
# Convert to mono if stereo
if y.ndim > 1:
y = y.mean(axis=1)
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate": sr, "raw": y})["text"]
return text
def clear(audio, transcribed):
audio = None
transcribed = None
return audio, transcribed
with gr.Blocks() as demo:
gr.HTML(value="<h1>Transcribe Audio to Text Demo</h1>")
with gr.Row():
with gr.Column():
audio = gr.Audio(sources=["upload"], streaming=False, label="wav")
with gr.Row():
clr = gr.Button(value="Clear", variant="huggingface")
btn = gr.Button(value="Transcribe", variant="primary")
transcribed = gr.TextArea(label="Transcribed", lines=9)
btn.click(fn=transcribe, inputs=audio, outputs=transcribed)
clr.click(fn=clear, inputs=[audio, transcribed], outputs=[audio, transcribed])
demo.launch(share=True)