nykodmar commited on
Commit
4f4a842
·
1 Parent(s): 91d8e15

Init commit

Browse files
Files changed (3) hide show
  1. app.py +57 -0
  2. packages.txt +2 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import os
4
+ import librosa
5
+ from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC, Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor
6
+ import torch
7
+
8
+
9
+ model_name = os.getenv("MODEL_NAME")
10
+ auth_token = os.getenv("API_TOKEN")
11
+
12
+ # Load models
13
+ tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(model_name, eos_token=None, bos_token=None, use_auth_token=auth_token)
14
+ processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_name, use_auth_token=auth_token)
15
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name, use_auth_token=auth_token)
16
+ decoder = processor.decoder
17
+ processor = Wav2Vec2ProcessorWithLM(feature_extractor=feature_extractor, tokenizer=tokenizer, decoder=decoder)
18
+ model = AutoModelForCTC.from_pretrained(model_name, use_auth_token=auth_token)
19
+
20
+ def load_data(input_file):
21
+
22
+ # Read the file
23
+ speech, sample_rate = librosa.load(input_file)
24
+
25
+ # Make it 1-D
26
+ if len(speech.shape) > 1:
27
+ speech = speech[:,0] + speech[:,1]
28
+
29
+ # Resampling at 16KHz
30
+ if sample_rate !=16_000:
31
+ speech = librosa.resample(speech, sample_rate, 16_000)
32
+ return speech
33
+
34
+
35
+
36
+ def transcribe(input_file):
37
+
38
+ audio = load_data(input_file)
39
+ # audio = input_file
40
+
41
+ # Tokenize
42
+ input_values = processor(audio, return_tensors="pt", sampling_rate=16_000).input_values
43
+
44
+ # Take logits
45
+ with torch.no_grad():
46
+ logits = model(input_values).logits.cpu().numpy()[0]
47
+
48
+ # Decode
49
+ text = decoder.decode(logits, beam_width=30)
50
+
51
+ return text
52
+
53
+ gr.Interface(
54
+ fn=transcribe,
55
+ inputs=gr.inputs.Audio(source="upload", type="filepath"),
56
+ outputs="text").launch()
57
+
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ libsndfile1
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ librosa
4
+ pyctcdecode
5
+ https://github.com/kpu/kenlm/archive/master.zip