fuller-wav2vec2 / app.py
jayarizco101's picture
Update app.py
5ddbd3e verified
raw
history blame contribute delete
798 Bytes
import gradio as gr
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import librosa
# Load your model
processor = Wav2Vec2Processor.from_pretrained("jayarizco101/fuller-finetuned-wav2vec2")
model = Wav2Vec2ForCTC.from_pretrained("jayarizco101/fuller-finetuned-wav2vec2")
model.eval()
def transcribe(audio):
# audio is automatically 16kHz float32 from Gradio
inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)
with torch.no_grad():
logits = model(**inputs).logits
pred_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(pred_ids)[0]
return transcription
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["upload"], type="numpy"),
outputs="text"
)
iface.launch()