KuyaToto commited on
Commit
cd70c07
·
verified ·
1 Parent(s): 245b979

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -0
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
+ import torchaudio
5
+
6
+ model_id = "facebook/wav2vec2-large-960h-lv60-self"
7
+
8
+ processor = Wav2Vec2Processor.from_pretrained(model_id)
9
+ model = Wav2Vec2ForCTC.from_pretrained(model_id)
10
+
11
+ def transcribe(audio):
12
+ waveform, sample_rate = torchaudio.load(audio)
13
+ if sample_rate != 16000:
14
+ resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
15
+ waveform = resampler(waveform)
16
+ input_values = processor(waveform.squeeze(), return_tensors="pt", sampling_rate=16000).input_values
17
+ with torch.no_grad():
18
+ logits = model(input_values).logits
19
+ predicted_ids = torch.argmax(logits, dim=-1)
20
+ transcription = processor.batch_decode(predicted_ids)[0]
21
+ return transcription
22
+
23
+ demo = gr.Interface(fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="text")
24
+ demo.launch()