Spaces:
Runtime error
Runtime error
| from transformers import pipeline, Wav2Vec2ProcessorWithLM | |
| from librosa import to_mono, resample | |
| import numpy as np | |
| import gradio as gr | |
| DESC = """\ | |
| Ukrainian speech recognition app/ | |
| Розпізнавання голосу для української мови | |
| """ | |
| model_id = "arampacha/wav2vec2-xls-r-1b-uk" | |
| processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_id) | |
| asr = pipeline( | |
| "automatic-speech-recognition", model=model_id, device=-1, | |
| feature_extractor=processor.feature_extractor, decoder=processor.decoder | |
| ) | |
| def run_asr(audio): | |
| sr, audio_array = audio | |
| audio_array = audio_array.astype(np.float32) | |
| if len(audio_array.shape) > 1: | |
| if audio_array.shape[1] == 1: | |
| audio_array = audio_array.squeeze() | |
| elif audio_array.shape[1] == 2: | |
| audio_array = to_mono(audio_array.T) | |
| else: | |
| raise ValueError("Audio with > 2 channels not supported") | |
| if sr != 16_000: | |
| audio_array = resample(audio_array, orig_sr=sr, target_sr=16_000) | |
| res = asr(audio_array, chunk_length_s=20, stride_length_s=2) | |
| return res["text"] | |
| text_out = gr.outputs.Textbox(label="transcript") | |
| interface = gr.Interface( | |
| run_asr, | |
| "microphone", | |
| text_out, | |
| layout="horizontal", | |
| theme="huggingface", | |
| title="Speech-to-text Ukrainian", | |
| description=DESC, | |
| flagging_options=["incorrect"], | |
| examples=["examples/dobryi_ranok.wav"] | |
| ) | |
| interface.launch(debug=True) |