Spaces:
Running
Running
Upload tiny_tts/__init__.py with huggingface_hub
Browse files- tiny_tts/__init__.py +5 -2
tiny_tts/__init__.py
CHANGED
|
@@ -38,7 +38,7 @@ class TinyTTS:
|
|
| 38 |
|
| 39 |
self.model = load_engine(checkpoint_path, self.device)
|
| 40 |
|
| 41 |
-
def speak(self, text, output_path="output.wav", speaker="
|
| 42 |
"""Synthesize text to speech and save to output_path."""
|
| 43 |
print(f"Synthesizing: {text}")
|
| 44 |
|
|
@@ -73,12 +73,15 @@ class TinyTTS:
|
|
| 73 |
bert = torch.zeros(1024, len(phone_ids)).to(self.device).unsqueeze(0)
|
| 74 |
ja_bert = torch.zeros(768, len(phone_ids)).to(self.device).unsqueeze(0)
|
| 75 |
|
|
|
|
|
|
|
|
|
|
| 76 |
with torch.no_grad():
|
| 77 |
audio, *_ = self.model.infer(
|
| 78 |
x, x_lengths, sid, tone, language, bert, ja_bert,
|
| 79 |
noise_scale=0.667,
|
| 80 |
noise_scale_w=0.8,
|
| 81 |
-
length_scale=
|
| 82 |
)
|
| 83 |
|
| 84 |
audio_np = audio[0, 0].cpu().numpy()
|
|
|
|
| 38 |
|
| 39 |
self.model = load_engine(checkpoint_path, self.device)
|
| 40 |
|
| 41 |
+
def speak(self, text, output_path="output.wav", speaker="MALE", speed=1.0):
|
| 42 |
"""Synthesize text to speech and save to output_path."""
|
| 43 |
print(f"Synthesizing: {text}")
|
| 44 |
|
|
|
|
| 73 |
bert = torch.zeros(1024, len(phone_ids)).to(self.device).unsqueeze(0)
|
| 74 |
ja_bert = torch.zeros(768, len(phone_ids)).to(self.device).unsqueeze(0)
|
| 75 |
|
| 76 |
+
# speed > 1.0 = faster speech, < 1.0 = slower speech
|
| 77 |
+
length_scale = 1.0 / speed
|
| 78 |
+
|
| 79 |
with torch.no_grad():
|
| 80 |
audio, *_ = self.model.infer(
|
| 81 |
x, x_lengths, sid, tone, language, bert, ja_bert,
|
| 82 |
noise_scale=0.667,
|
| 83 |
noise_scale_w=0.8,
|
| 84 |
+
length_scale=length_scale
|
| 85 |
)
|
| 86 |
|
| 87 |
audio_np = audio[0, 0].cpu().numpy()
|