Spaces:
Sleeping
Sleeping
Update text_to_speech.py
Browse files- text_to_speech.py +11 -12
text_to_speech.py
CHANGED
|
@@ -15,7 +15,6 @@ from .utils import (
|
|
| 15 |
remove_directory_contents,
|
| 16 |
remove_files,
|
| 17 |
run_command,
|
| 18 |
-
write_chunked,
|
| 19 |
)
|
| 20 |
import numpy as np
|
| 21 |
from typing import Any, Dict
|
|
@@ -60,7 +59,7 @@ def error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename):
|
|
| 60 |
# Read audio data from the TemporaryFile using soundfile
|
| 61 |
audio_data, samplerate = sf.read(f)
|
| 62 |
f.close() # Close the TemporaryFile
|
| 63 |
-
|
| 64 |
filename, audio_data, samplerate, format="ogg", subtype="vorbis"
|
| 65 |
)
|
| 66 |
|
|
@@ -74,7 +73,7 @@ def error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename):
|
|
| 74 |
sample_rate_aux = 22050
|
| 75 |
duration = float(segment["end"]) - float(segment["start"])
|
| 76 |
data = np.zeros(int(sample_rate_aux * duration)).astype(np.float32)
|
| 77 |
-
|
| 78 |
filename, data, sample_rate_aux, format="ogg", subtype="vorbis"
|
| 79 |
)
|
| 80 |
logger.error("Audio will be replaced -> [silent audio].")
|
|
@@ -182,7 +181,7 @@ def segments_egde_tts(filtered_edge_segments, TRANSLATE_AUDIO_TO, is_gui):
|
|
| 182 |
# os.remove(temp_file)
|
| 183 |
|
| 184 |
# Save file
|
| 185 |
-
|
| 186 |
file=filename,
|
| 187 |
samplerate=sample_rate,
|
| 188 |
data=data,
|
|
@@ -257,7 +256,7 @@ def segments_bark_tts(
|
|
| 257 |
speech_output.cpu().numpy().squeeze().astype(np.float32),
|
| 258 |
sampling_rate,
|
| 259 |
)
|
| 260 |
-
|
| 261 |
file=filename,
|
| 262 |
samplerate=sampling_rate,
|
| 263 |
data=data_tts,
|
|
@@ -363,7 +362,7 @@ def segments_vits_tts(filtered_vits_segments, TRANSLATE_AUDIO_TO):
|
|
| 363 |
sampling_rate,
|
| 364 |
)
|
| 365 |
# Save file
|
| 366 |
-
|
| 367 |
file=filename,
|
| 368 |
samplerate=sampling_rate,
|
| 369 |
data=data_tts,
|
|
@@ -668,7 +667,7 @@ def segments_coqui_tts(
|
|
| 668 |
sampling_rate,
|
| 669 |
)
|
| 670 |
# Save file
|
| 671 |
-
|
| 672 |
file=filename,
|
| 673 |
samplerate=sampling_rate,
|
| 674 |
data=data_tts,
|
|
@@ -702,7 +701,7 @@ def piper_tts_voices_list():
|
|
| 702 |
|
| 703 |
# 1. Tải voices.json gốc từ hệ thống để giữ các giọng mặc định
|
| 704 |
file_path = download_manager(
|
| 705 |
-
url="https://huggingface.co/
|
| 706 |
path="./PIPER_MODELS",
|
| 707 |
)
|
| 708 |
|
|
@@ -932,7 +931,7 @@ def segments_vits_onnx_tts(filtered_onnx_vits_segments, TRANSLATE_AUDIO_TO):
|
|
| 932 |
sampling_rate,
|
| 933 |
)
|
| 934 |
# Save file
|
| 935 |
-
|
| 936 |
file=filename,
|
| 937 |
samplerate=sampling_rate,
|
| 938 |
data=data_tts,
|
|
@@ -1002,7 +1001,7 @@ def segments_openai_tts(
|
|
| 1002 |
sampling_rate,
|
| 1003 |
)
|
| 1004 |
|
| 1005 |
-
|
| 1006 |
file=filename,
|
| 1007 |
samplerate=sampling_rate,
|
| 1008 |
data=data_tts,
|
|
@@ -1586,7 +1585,7 @@ def toneconverter_freevc(
|
|
| 1586 |
target_wav=original_wav_audio_segment,
|
| 1587 |
)
|
| 1588 |
|
| 1589 |
-
|
| 1590 |
file=save_path,
|
| 1591 |
samplerate=tts.voice_converter.vc_config.audio.output_sample_rate,
|
| 1592 |
data=wav,
|
|
@@ -1648,4 +1647,4 @@ if __name__ == "__main__":
|
|
| 1648 |
tts_voice03="en-GB-SoniaNeural-Female",
|
| 1649 |
tts_voice04="en-NZ-MitchellNeural-Male",
|
| 1650 |
tts_voice05="en-GB-MaisieNeural-Female",
|
| 1651 |
-
)
|
|
|
|
| 15 |
remove_directory_contents,
|
| 16 |
remove_files,
|
| 17 |
run_command,
|
|
|
|
| 18 |
)
|
| 19 |
import numpy as np
|
| 20 |
from typing import Any, Dict
|
|
|
|
| 59 |
# Read audio data from the TemporaryFile using soundfile
|
| 60 |
audio_data, samplerate = sf.read(f)
|
| 61 |
f.close() # Close the TemporaryFile
|
| 62 |
+
sf.write(
|
| 63 |
filename, audio_data, samplerate, format="ogg", subtype="vorbis"
|
| 64 |
)
|
| 65 |
|
|
|
|
| 73 |
sample_rate_aux = 22050
|
| 74 |
duration = float(segment["end"]) - float(segment["start"])
|
| 75 |
data = np.zeros(int(sample_rate_aux * duration)).astype(np.float32)
|
| 76 |
+
sf.write(
|
| 77 |
filename, data, sample_rate_aux, format="ogg", subtype="vorbis"
|
| 78 |
)
|
| 79 |
logger.error("Audio will be replaced -> [silent audio].")
|
|
|
|
| 181 |
# os.remove(temp_file)
|
| 182 |
|
| 183 |
# Save file
|
| 184 |
+
sf.write(
|
| 185 |
file=filename,
|
| 186 |
samplerate=sample_rate,
|
| 187 |
data=data,
|
|
|
|
| 256 |
speech_output.cpu().numpy().squeeze().astype(np.float32),
|
| 257 |
sampling_rate,
|
| 258 |
)
|
| 259 |
+
sf.write(
|
| 260 |
file=filename,
|
| 261 |
samplerate=sampling_rate,
|
| 262 |
data=data_tts,
|
|
|
|
| 362 |
sampling_rate,
|
| 363 |
)
|
| 364 |
# Save file
|
| 365 |
+
sf.write(
|
| 366 |
file=filename,
|
| 367 |
samplerate=sampling_rate,
|
| 368 |
data=data_tts,
|
|
|
|
| 667 |
sampling_rate,
|
| 668 |
)
|
| 669 |
# Save file
|
| 670 |
+
sf.write(
|
| 671 |
file=filename,
|
| 672 |
samplerate=sampling_rate,
|
| 673 |
data=data_tts,
|
|
|
|
| 701 |
|
| 702 |
# 1. Tải voices.json gốc từ hệ thống để giữ các giọng mặc định
|
| 703 |
file_path = download_manager(
|
| 704 |
+
url="https://huggingface.co/hoangquocviet/piper-voices/resolve/main/voices.json",
|
| 705 |
path="./PIPER_MODELS",
|
| 706 |
)
|
| 707 |
|
|
|
|
| 931 |
sampling_rate,
|
| 932 |
)
|
| 933 |
# Save file
|
| 934 |
+
sf.write(
|
| 935 |
file=filename,
|
| 936 |
samplerate=sampling_rate,
|
| 937 |
data=data_tts,
|
|
|
|
| 1001 |
sampling_rate,
|
| 1002 |
)
|
| 1003 |
|
| 1004 |
+
sf.write(
|
| 1005 |
file=filename,
|
| 1006 |
samplerate=sampling_rate,
|
| 1007 |
data=data_tts,
|
|
|
|
| 1585 |
target_wav=original_wav_audio_segment,
|
| 1586 |
)
|
| 1587 |
|
| 1588 |
+
sf.write(
|
| 1589 |
file=save_path,
|
| 1590 |
samplerate=tts.voice_converter.vc_config.audio.output_sample_rate,
|
| 1591 |
data=wav,
|
|
|
|
| 1647 |
tts_voice03="en-GB-SoniaNeural-Female",
|
| 1648 |
tts_voice04="en-NZ-MitchellNeural-Male",
|
| 1649 |
tts_voice05="en-GB-MaisieNeural-Female",
|
| 1650 |
+
)
|