Update README.md
Browse files
README.md
CHANGED
|
@@ -20,11 +20,53 @@ library_name: transformers
|
|
| 20 |
Supported tags: `(neutral)`, `(happy)`, `(sad)`, `(angry)`, `(surprised)`, `(disgusted)`, `(fearful)`, `(giggle)`, `(gasp)`, `(exhale)`, `(laugh)`, `(chuckle)`, `(sign)`
|
| 21 |
|
| 22 |
```python
|
| 23 |
-
|
|
|
|
|
|
|
| 24 |
import soundfile as sf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
tts = NeuTTSAir(backbone_repo="BarryFutureman/NeuTTS-Express", backbone_device="cuda",
|
| 27 |
-
codec_repo="neuphonic/distill-neucodec", codec_device="cuda")
|
| 28 |
input_text = "I just got the best news ever (giggle), and I couldn't be happier!"
|
| 29 |
start_time = time.time()
|
| 30 |
wav = tts.infer(input_text)
|
|
|
|
| 20 |
Supported tags: `(neutral)`, `(happy)`, `(sad)`, `(angry)`, `(surprised)`, `(disgusted)`, `(fearful)`, `(giggle)`, `(gasp)`, `(exhale)`, `(laugh)`, `(chuckle)`, `(sign)`
|
| 21 |
|
| 22 |
```python
|
| 23 |
+
import time
|
| 24 |
+
import re
|
| 25 |
+
import types
|
| 26 |
import soundfile as sf
|
| 27 |
+
from neuttsair.neutts import NeuTTSAir
|
| 28 |
+
|
| 29 |
+
# patch function
|
| 30 |
+
def patched_to_phones(self, text: str) -> str:
|
| 31 |
+
pattern = r'\([^)]*\)'
|
| 32 |
+
preserved_matches = list(re.finditer(pattern, text))
|
| 33 |
+
|
| 34 |
+
if not preserved_matches:
|
| 35 |
+
phones = self.phonemizer.phonemize([text])[0]
|
| 36 |
+
return " ".join(phones.split())
|
| 37 |
+
|
| 38 |
+
parts = re.split(pattern, text)
|
| 39 |
+
preserved_texts = [m.group() for m in preserved_matches]
|
| 40 |
+
|
| 41 |
+
phonemized_parts = []
|
| 42 |
+
for part in parts:
|
| 43 |
+
if part.strip():
|
| 44 |
+
phonemized = self.phonemizer.phonemize([part])[0]
|
| 45 |
+
phonemized = " ".join(phonemized.split())
|
| 46 |
+
phonemized_parts.append(phonemized)
|
| 47 |
+
else:
|
| 48 |
+
phonemized_parts.append("")
|
| 49 |
+
|
| 50 |
+
result = []
|
| 51 |
+
for i, phonemized_part in enumerate(phonemized_parts):
|
| 52 |
+
if phonemized_part:
|
| 53 |
+
result.append(phonemized_part)
|
| 54 |
+
if i < len(preserved_texts):
|
| 55 |
+
result.append(preserved_texts[i])
|
| 56 |
+
|
| 57 |
+
return " ".join(result)
|
| 58 |
+
|
| 59 |
+
# apply monkey patch
|
| 60 |
+
NeuTTSAir._to_phones = patched_to_phones
|
| 61 |
+
|
| 62 |
+
# inference
|
| 63 |
+
tts = NeuTTSAir(
|
| 64 |
+
backbone_repo="BarryFutureman/NeuTTS-Express",
|
| 65 |
+
backbone_device="cuda",
|
| 66 |
+
codec_repo="neuphonic/distill-neucodec",
|
| 67 |
+
codec_device="cuda"
|
| 68 |
+
)
|
| 69 |
|
|
|
|
|
|
|
| 70 |
input_text = "I just got the best news ever (giggle), and I couldn't be happier!"
|
| 71 |
start_time = time.time()
|
| 72 |
wav = tts.infer(input_text)
|