BarryFutureman
/

NeuTTS-Express

text-generation

text-generation-inference

Model card Files Files and versions

BarryFutureman commited on Dec 11, 2025

Commit

3b711a6

·

verified ·

1 Parent(s): 0641e9f

Update README.md

Files changed (1) hide show

README.md +45 -3

README.md CHANGED Viewed

@@ -20,11 +20,53 @@ library_name: transformers
 Supported tags: `(neutral)`, `(happy)`, `(sad)`, `(angry)`, `(surprised)`, `(disgusted)`, `(fearful)`, `(giggle)`, `(gasp)`, `(exhale)`, `(laugh)`, `(chuckle)`, `(sign)`
 ```python
-from neuttsair.neutts import NeuTTSAir
 import soundfile as sf
-tts = NeuTTSAir(backbone_repo="BarryFutureman/NeuTTS-Express", backbone_device="cuda",
-                codec_repo="neuphonic/distill-neucodec", codec_device="cuda")
 input_text = "I just got the best news ever (giggle), and I couldn't be happier!"
 start_time = time.time()
 wav = tts.infer(input_text)

 Supported tags: `(neutral)`, `(happy)`, `(sad)`, `(angry)`, `(surprised)`, `(disgusted)`, `(fearful)`, `(giggle)`, `(gasp)`, `(exhale)`, `(laugh)`, `(chuckle)`, `(sign)`
 ```python
+import time
+import re
+import types
 import soundfile as sf
+from neuttsair.neutts import NeuTTSAir
+# patch function
+def patched_to_phones(self, text: str) -> str:
+    pattern = r'\([^)]*\)'
+    preserved_matches = list(re.finditer(pattern, text))
+    if not preserved_matches:
+        phones = self.phonemizer.phonemize([text])[0]
+        return " ".join(phones.split())
+    parts = re.split(pattern, text)
+    preserved_texts = [m.group() for m in preserved_matches]
+    phonemized_parts = []
+    for part in parts:
+        if part.strip():
+            phonemized = self.phonemizer.phonemize([part])[0]
+            phonemized = " ".join(phonemized.split())
+            phonemized_parts.append(phonemized)
+        else:
+            phonemized_parts.append("")
+    result = []
+    for i, phonemized_part in enumerate(phonemized_parts):
+        if phonemized_part:
+            result.append(phonemized_part)
+        if i < len(preserved_texts):
+            result.append(preserved_texts[i])
+    return " ".join(result)
+# apply monkey patch
+NeuTTSAir._to_phones = patched_to_phones
+# inference
+tts = NeuTTSAir(
+    backbone_repo="BarryFutureman/NeuTTS-Express",
+    backbone_device="cuda",
+    codec_repo="neuphonic/distill-neucodec",
+    codec_device="cuda"
+)
 input_text = "I just got the best news ever (giggle), and I couldn't be happier!"
 start_time = time.time()
 wav = tts.infer(input_text)