Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| from librosa import resample | |
| from transformers import pipeline | |
| pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", chunk_length_s=30) | |
| def transcribe(audio_in): | |
| orig_sr, samples = audio_in | |
| min_s, max_s = min(samples), max(samples) | |
| range_in = (max_s - min_s) | |
| samples_scl = np.array(samples) / range_in | |
| min_scl = min_s / range_in | |
| samples_f = 2.0 * (samples_scl - min_scl) - 1.0 | |
| resamples = resample(samples_f, orig_sr=orig_sr, target_sr=16000) | |
| prediction = pipe(resamples.copy(), batch_size=8) | |
| return prediction["text"].strip().lower() | |
| with gr.Blocks() as demo: | |
| gr.Markdown(""" | |
| # 5020 Audio Transcription. | |
| ## API for [whisper-base.en](https://huggingface.co/openai/whisper-base.en) english model\ | |
| to help with Audio Analysis exercises. | |
| """) | |
| gr.Interface( | |
| transcribe, | |
| inputs=gr.Audio(type="numpy"), | |
| outputs="text", | |
| flagging_mode="never", | |
| cache_examples=True, | |
| examples=[["./audio/plain_01.wav"]] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) | |