Upload distilled conformer recognizer object and loader script
Browse files- modeling.py +4 -4
- recognizer.dill +2 -2
modeling.py
CHANGED
|
@@ -46,17 +46,17 @@ class UpgradedRecognizer:
|
|
| 46 |
waveform = resampler(waveform)
|
| 47 |
if waveform.shape[0] > 1:
|
| 48 |
waveform = torch.mean(waveform, dim=0, keepdim=True)
|
| 49 |
-
|
| 50 |
audio_object = Audio(waveform.squeeze().cpu().numpy(), 16000)
|
| 51 |
-
|
| 52 |
features = self.pm.compute(audio_object)
|
| 53 |
features_tensor = torch.tensor(features).unsqueeze(0).to(self.device)
|
| 54 |
lengths_tensor = torch.tensor([features_tensor.shape[1]], device=self.device)
|
| 55 |
-
|
| 56 |
with torch.no_grad():
|
| 57 |
logits = self.am(features_tensor, lengths_tensor)
|
| 58 |
|
| 59 |
logits_numpy = logits.squeeze(0).cpu().numpy()
|
| 60 |
phoneme_list = self.lm.compute(logits_numpy, lang_id='ipa', topk=1)
|
| 61 |
-
|
| 62 |
return " ".join(phoneme_list)
|
|
|
|
| 46 |
waveform = resampler(waveform)
|
| 47 |
if waveform.shape[0] > 1:
|
| 48 |
waveform = torch.mean(waveform, dim=0, keepdim=True)
|
| 49 |
+
|
| 50 |
audio_object = Audio(waveform.squeeze().cpu().numpy(), 16000)
|
| 51 |
+
|
| 52 |
features = self.pm.compute(audio_object)
|
| 53 |
features_tensor = torch.tensor(features).unsqueeze(0).to(self.device)
|
| 54 |
lengths_tensor = torch.tensor([features_tensor.shape[1]], device=self.device)
|
| 55 |
+
|
| 56 |
with torch.no_grad():
|
| 57 |
logits = self.am(features_tensor, lengths_tensor)
|
| 58 |
|
| 59 |
logits_numpy = logits.squeeze(0).cpu().numpy()
|
| 60 |
phoneme_list = self.lm.compute(logits_numpy, lang_id='ipa', topk=1)
|
| 61 |
+
|
| 62 |
return " ".join(phoneme_list)
|
recognizer.dill
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27490f914a4e5a4484f59e8a78c5e3291fb3a170a76cfb68c1ffaff05c12b0fc
|
| 3 |
+
size 190360330
|