| | |
| |
|
| | import gradio as gr |
| | from transformers import pipeline |
| | import numpy as np |
| |
|
| | transcriber = pipeline("automatic-speech-recognition", model="bartelds/gos-gpu6-cp1_adp0_192m_no_test_1e-5_cp-12000") |
| |
|
| | def transcribe(audio): |
| | sr, y = audio |
| | y = y.astype(np.float32) |
| | y /= np.max(np.abs(y)) |
| |
|
| | return transcriber({"sampling_rate": sr, "raw": y})["text"] |
| |
|
| |
|
| | demo = gr.Interface( |
| | transcribe, |
| | gr.Audio(source="upload"), |
| | "text", |
| | title="Speech-to-text for Gronings", |
| | description="Upload an audio file (in 16 kHz) with Gronings speech to obtain its transcription. Example files are in our [gos-demo](https://huggingface.co/datasets/bartelds/gos-demo) dataset." |
| | ) |
| |
|
| | demo.launch() |
| |
|