small fix
Browse files
README.md
CHANGED
|
@@ -46,11 +46,11 @@ To distinguish phone entries from BPE tokens that share the same Unicode, we enc
|
|
| 46 |
from espnet2.bin.s2t_inference import Speech2Text
|
| 47 |
import soundfile as sf # or librosa
|
| 48 |
|
| 49 |
-
task =
|
| 50 |
s2t = Speech2Text.from_pretrained(
|
| 51 |
"espnet/powsm",
|
| 52 |
device="cuda",
|
| 53 |
-
lang_sym=
|
| 54 |
task_sym=task, # <pr>, <asr>, <g2p>, <p2g>
|
| 55 |
)
|
| 56 |
|
|
@@ -60,7 +60,7 @@ pred = s2t(speech, text_prev=prompt)[0][0]
|
|
| 60 |
|
| 61 |
# post-processing for better format
|
| 62 |
pred = pred.split("<notimestamps>")[1].strip()
|
| 63 |
-
if task ==
|
| 64 |
pred = pred.replace("/", "")
|
| 65 |
print(pred)
|
| 66 |
```
|
|
@@ -83,7 +83,7 @@ s2t = Speech2Language.from_pretrained(
|
|
| 83 |
last_lang_sym="<zul>" # fixed; defined in vocab list
|
| 84 |
)
|
| 85 |
|
| 86 |
-
speech, rate = sf.read("sample.wav"
|
| 87 |
pred = model(speech)[0] # a list of lang-prob pair
|
| 88 |
print(pred)
|
| 89 |
```
|
|
|
|
| 46 |
from espnet2.bin.s2t_inference import Speech2Text
|
| 47 |
import soundfile as sf # or librosa
|
| 48 |
|
| 49 |
+
task = "<pr>"
|
| 50 |
s2t = Speech2Text.from_pretrained(
|
| 51 |
"espnet/powsm",
|
| 52 |
device="cuda",
|
| 53 |
+
lang_sym="<eng>", # ISO 639-3; set to <unk> for unseen languages
|
| 54 |
task_sym=task, # <pr>, <asr>, <g2p>, <p2g>
|
| 55 |
)
|
| 56 |
|
|
|
|
| 60 |
|
| 61 |
# post-processing for better format
|
| 62 |
pred = pred.split("<notimestamps>")[1].strip()
|
| 63 |
+
if task == "<pr>" or task == "<g2p>":
|
| 64 |
pred = pred.replace("/", "")
|
| 65 |
print(pred)
|
| 66 |
```
|
|
|
|
| 83 |
last_lang_sym="<zul>" # fixed; defined in vocab list
|
| 84 |
)
|
| 85 |
|
| 86 |
+
speech, rate = sf.read("sample.wav")
|
| 87 |
pred = model(speech)[0] # a list of lang-prob pair
|
| 88 |
print(pred)
|
| 89 |
```
|