Siddarth commited on
Commit
57303f6
·
1 Parent(s): a56e64f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -68
app.py CHANGED
@@ -1,22 +1,12 @@
1
 
2
  from Main import wav2art
3
  import numpy as np
4
- import pandas as pd
5
- import random
6
  import librosa
7
- from pathlib import Path
8
- import os
9
  import base64
10
- import urllib.request
11
  import gc
12
  gc.enable()
13
 
14
- import json
15
  import matplotlib.pyplot as plt
16
- import matplotlib
17
- import IPython.display as ipd
18
- from scipy.io import wavfile
19
- import scipy.io
20
  import soundfile as sf
21
  from cv2 import resize, INTER_LINEAR
22
  from PIL import Image
@@ -24,10 +14,6 @@ from PIL import Image
24
  import scipy.signal as signal
25
  from matplotlib.animation import FuncAnimation
26
 
27
- from glob import glob
28
-
29
- from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC, Wav2Vec2PhonemeCTCTokenizer, Wav2Vec2ForCTC
30
-
31
  import torch
32
  import librosa
33
 
@@ -145,63 +131,15 @@ if what == 'Upload audio file':
145
 
146
 
147
 
148
- text = text_area.text_area("", "Loading wav2vec 2.0 ... \n It may take a while!")
149
-
150
-
151
- # import model, feature extractor, tokenizer
152
- # model = torch.load('model.pt')
153
-
154
- # @st.cache(allow_output_mutation=True)
155
- # def load_model():
156
-
157
- # if not os.path.isfile('model.pt'):
158
- # with st.spinner("Downloading model... this may take awhile! \n Don't stop it!"):
159
- # import gdown
160
- # url = 'https://drive.google.com/uc?id=1-1sjyooNoDiis6LhSHGfB8iU_CGLVRlS'
161
- # gdown.download(url, 'model.pt', quiet=False)
162
-
163
- # model = torch.load('model.pt')
164
- # model.eval()
165
- # return model
166
-
167
- # model = load_model()
168
-
169
-
170
- # @st.cache(allow_output_mutation=True)
171
- # def load_model():
172
- # model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-lv-60-espeak-cv-ft")
173
- # return model
174
-
175
- # model = load_model()
176
-
177
 
 
178
 
179
- # url = 'https://github.com/siddarth-c/WatchMeSpeak/releases/download/wav2vec2/model.pt'
180
- # filename = url.split('/')[-1]
181
- # urllib.request.urlretrieve(url, filename)
182
- # model = torch.load('model.pt')
183
-
184
- # import requests
185
-
186
- # API_URL = "https://api-inference.huggingface.co/models/facebook/wav2vec2-lv-60-espeak-cv-ft"
187
- # headers = {"Authorization": "Bearer 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"}
188
-
189
- # def query():
190
- # with open('audio.wav', "rb") as f:
191
- # data = f.read()
192
- # response = requests.request("POST", API_URL, headers=headers, data=data)
193
- # return json.loads(response.content.decode("utf-8"))
194
-
195
- # logits = query()['text']
196
-
197
- # tokenizer = torch.load('tokenizer.pt')
198
- # feature_extractor = torch.load('feature_extractor.pt')
199
-
200
-
201
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-lv-60-espeak-cv-ft")
202
  tokenizer = torch.load('tokenizer.pt')
203
  feature_extractor = torch.load('feature_extractor.pt')
204
 
 
 
205
  text = text_area.text_area("", "Estimating phonemes ...")
206
 
207
  input_values = feature_extractor(wav, return_tensors="pt", sampling_rate = sr).input_values
@@ -266,6 +204,7 @@ if what == 'Upload audio file':
266
  length = int(len(emaR_sub) * 0.3)
267
  ema = signal.resample(emaR_sub, length)
268
 
 
269
 
270
  processed = []
271
  to_print = []
@@ -372,7 +311,7 @@ if what == 'Upload audio file':
372
  brain0 = Image.open('BrainAndSpinal.png')
373
 
374
 
375
- text = text_area.text_area("", "Rendering the initial frame ...")
376
 
377
 
378
 
@@ -635,7 +574,6 @@ if what == 'Upload audio file':
635
  newax.axis('off')
636
  ax.axis('off')
637
 
638
- text = text_area.text_area("", "Rendering all frames ...")
639
 
640
  my_bar.progress(30)
641
 
@@ -652,6 +590,9 @@ if what == 'Upload audio file':
652
 
653
  loaded = int(30 + (54 * frame_number) / len(ema))
654
 
 
 
 
655
  my_bar.progress(loaded)
656
 
657
  particles["position"] = ema[frame_number]
 
1
 
2
  from Main import wav2art
3
  import numpy as np
 
 
4
  import librosa
 
 
5
  import base64
 
6
  import gc
7
  gc.enable()
8
 
 
9
  import matplotlib.pyplot as plt
 
 
 
 
10
  import soundfile as sf
11
  from cv2 import resize, INTER_LINEAR
12
  from PIL import Image
 
14
  import scipy.signal as signal
15
  from matplotlib.animation import FuncAnimation
16
 
 
 
 
 
17
  import torch
18
  import librosa
19
 
 
131
 
132
 
133
 
134
+ text = text_area.text_area("", "Loading wav2vec 2.0 ...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ model = torch.load("model.pt")
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  tokenizer = torch.load('tokenizer.pt')
139
  feature_extractor = torch.load('feature_extractor.pt')
140
 
141
+ my_bar.progress(15)
142
+
143
  text = text_area.text_area("", "Estimating phonemes ...")
144
 
145
  input_values = feature_extractor(wav, return_tensors="pt", sampling_rate = sr).input_values
 
204
  length = int(len(emaR_sub) * 0.3)
205
  ema = signal.resample(emaR_sub, length)
206
 
207
+ my_bar.progress(25)
208
 
209
  processed = []
210
  to_print = []
 
311
  brain0 = Image.open('BrainAndSpinal.png')
312
 
313
 
314
+ text = text_area.text_area("", "Rendering frame: 0 / " + str(len(ema)))
315
 
316
 
317
 
 
574
  newax.axis('off')
575
  ax.axis('off')
576
 
 
577
 
578
  my_bar.progress(30)
579
 
 
590
 
591
  loaded = int(30 + (54 * frame_number) / len(ema))
592
 
593
+ text = text_area.text_area("", "Rendering frame: " + str(frame_number) + " / " + str(len(ema)))
594
+
595
+
596
  my_bar.progress(loaded)
597
 
598
  particles["position"] = ema[frame_number]