Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC | |
| import torch | |
| # Load model and processor once | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") | |
| def transcribe(audio): | |
| # audio is a tuple: (sample_rate, numpy_array) | |
| sample_rate, waveform = audio | |
| # Convert waveform to float32 and process | |
| input_values = processor(waveform, sampling_rate=sample_rate, return_tensors="pt", padding="longest").input_values | |
| with torch.no_grad(): | |
| logits = model(input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids) | |
| return transcription[0] | |
| # Build Gradio interface | |
| iface = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(source="microphone", type="numpy"), | |
| outputs="text", | |
| title="Wav2Vec2 ASR", | |
| description="Record or upload audio, and get transcription using Wav2Vec2 large model." | |
| ) | |
| iface.launch() | |