Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,22 +14,13 @@ from datasets import load_dataset
|
|
| 14 |
|
| 15 |
warnings.filterwarnings("ignore")
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
import nltk
|
| 19 |
-
|
| 20 |
-
# Download necessary NLTK data
|
| 21 |
try:
|
| 22 |
nltk.data.find('tokenizers/punkt')
|
| 23 |
-
nltk.data.find('tokenizers/punkt_tab')
|
| 24 |
except LookupError:
|
| 25 |
nltk.download(['punkt', 'punkt_tab'], quiet=True)
|
| 26 |
|
| 27 |
-
# Download required NLTK data
|
| 28 |
-
try:
|
| 29 |
-
nltk.data.find('tokenizers/punkt')
|
| 30 |
-
except LookupError:
|
| 31 |
-
nltk.download('punkt')
|
| 32 |
-
|
| 33 |
|
| 34 |
class LongFormTTS:
|
| 35 |
def __init__(self):
|
|
@@ -61,7 +52,7 @@ class LongFormTTS:
|
|
| 61 |
# Handle common abbreviations
|
| 62 |
abbreviations = {
|
| 63 |
'Dr.': 'Doctor',
|
| 64 |
-
'Mr.': 'Mister',
|
| 65 |
'Mrs.': 'Missus',
|
| 66 |
'Ms.': 'Miss',
|
| 67 |
'Prof.': 'Professor',
|
|
@@ -99,8 +90,8 @@ class LongFormTTS:
|
|
| 99 |
if num > 9999:
|
| 100 |
return str(num)
|
| 101 |
ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
|
| 102 |
-
teens = ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
|
| 103 |
-
|
| 104 |
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
|
| 105 |
if num < 10:
|
| 106 |
return ones[num]
|
|
@@ -163,8 +154,8 @@ class LongFormTTS:
|
|
| 163 |
inputs = self.processor(text=text_chunk, return_tensors="pt").to(self.device)
|
| 164 |
with torch.no_grad():
|
| 165 |
speech = self.model.generate_speech(
|
| 166 |
-
inputs["input_ids"],
|
| 167 |
-
self.speaker_embeddings,
|
| 168 |
vocoder=self.vocoder
|
| 169 |
)
|
| 170 |
# Convert to numpy and move to CPU
|
|
@@ -247,7 +238,7 @@ def text_to_speech_interface(text, progress=gr.Progress()):
|
|
| 247 |
return None, "β Failed to generate audio. Please try with different text."
|
| 248 |
progress(0.9, desc="πΎ Saving audio file...")
|
| 249 |
# Save to temporary file
|
| 250 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".
|
| 251 |
sf.write(tmp_file.name, audio, sample_rate)
|
| 252 |
audio_path = tmp_file.name
|
| 253 |
progress(1.0, desc="β
Complete!")
|
|
@@ -336,7 +327,7 @@ def create_interface():
|
|
| 336 |
<li>π Natural human voice</li>
|
| 337 |
<li>β‘ Smart text processing</li>
|
| 338 |
<li>π§ Auto chunking</li>
|
| 339 |
-
<li
|
| 340 |
<li>π± Mobile friendly</li>
|
| 341 |
<li>π΅ High quality audio</li>
|
| 342 |
</ul>
|
|
@@ -410,5 +401,6 @@ if __name__ == "__main__":
|
|
| 410 |
demo.launch(
|
| 411 |
server_name="0.0.0.0",
|
| 412 |
server_port=7860,
|
| 413 |
-
share=True
|
|
|
|
| 414 |
)
|
|
|
|
| 14 |
|
| 15 |
warnings.filterwarnings("ignore")
|
| 16 |
|
| 17 |
+
# Download required NLTK data including punkt_tab
|
|
|
|
|
|
|
|
|
|
| 18 |
try:
|
| 19 |
nltk.data.find('tokenizers/punkt')
|
| 20 |
+
nltk.data.find('tokenizers/punkt_tab')
|
| 21 |
except LookupError:
|
| 22 |
nltk.download(['punkt', 'punkt_tab'], quiet=True)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
class LongFormTTS:
|
| 26 |
def __init__(self):
|
|
|
|
| 52 |
# Handle common abbreviations
|
| 53 |
abbreviations = {
|
| 54 |
'Dr.': 'Doctor',
|
| 55 |
+
'Mr.': 'Mister',
|
| 56 |
'Mrs.': 'Missus',
|
| 57 |
'Ms.': 'Miss',
|
| 58 |
'Prof.': 'Professor',
|
|
|
|
| 90 |
if num > 9999:
|
| 91 |
return str(num)
|
| 92 |
ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
|
| 93 |
+
teens = ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
|
| 94 |
+
"sixteen", "seventeen", "eighteen", "nineteen"]
|
| 95 |
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
|
| 96 |
if num < 10:
|
| 97 |
return ones[num]
|
|
|
|
| 154 |
inputs = self.processor(text=text_chunk, return_tensors="pt").to(self.device)
|
| 155 |
with torch.no_grad():
|
| 156 |
speech = self.model.generate_speech(
|
| 157 |
+
inputs["input_ids"],
|
| 158 |
+
self.speaker_embeddings,
|
| 159 |
vocoder=self.vocoder
|
| 160 |
)
|
| 161 |
# Convert to numpy and move to CPU
|
|
|
|
| 238 |
return None, "β Failed to generate audio. Please try with different text."
|
| 239 |
progress(0.9, desc="πΎ Saving audio file...")
|
| 240 |
# Save to temporary file
|
| 241 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
| 242 |
sf.write(tmp_file.name, audio, sample_rate)
|
| 243 |
audio_path = tmp_file.name
|
| 244 |
progress(1.0, desc="β
Complete!")
|
|
|
|
| 327 |
<li>π Natural human voice</li>
|
| 328 |
<li>β‘ Smart text processing</li>
|
| 329 |
<li>π§ Auto chunking</li>
|
| 330 |
+
<li>π Completely free</li>
|
| 331 |
<li>π± Mobile friendly</li>
|
| 332 |
<li>π΅ High quality audio</li>
|
| 333 |
</ul>
|
|
|
|
| 401 |
demo.launch(
|
| 402 |
server_name="0.0.0.0",
|
| 403 |
server_port=7860,
|
| 404 |
+
share=True,
|
| 405 |
+
ssr_mode=False
|
| 406 |
)
|