Update Modules/Generate_Speech.py
Browse files- Modules/Generate_Speech.py +17 -3
Modules/Generate_Speech.py
CHANGED
|
@@ -2,6 +2,10 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
import numpy as np
|
| 4 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
from typing import Annotated
|
| 7 |
|
|
@@ -108,7 +112,7 @@ def Generate_Speech(
|
|
| 108 |
"zf_xiaoni, zf_xiaoxiao, zf_xiaoyi, zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang."
|
| 109 |
),
|
| 110 |
] = "af_heart",
|
| 111 |
-
) ->
|
| 112 |
_log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
|
| 113 |
if not text or not text.strip():
|
| 114 |
try:
|
|
@@ -145,8 +149,18 @@ def Generate_Speech(
|
|
| 145 |
if total_segments > 1:
|
| 146 |
duration = len(final_audio) / 24_000
|
| 147 |
print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
except gr.Error as exc:
|
| 151 |
_log_call_end("Generate_Speech", f"gr_error={str(exc)}")
|
| 152 |
raise
|
|
|
|
| 2 |
|
| 3 |
import numpy as np
|
| 4 |
import gradio as gr
|
| 5 |
+
import os
|
| 6 |
+
import uuid
|
| 7 |
+
import scipy.io.wavfile
|
| 8 |
+
from .File_System import ROOT_DIR
|
| 9 |
|
| 10 |
from typing import Annotated
|
| 11 |
|
|
|
|
| 112 |
"zf_xiaoni, zf_xiaoxiao, zf_xiaoyi, zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang."
|
| 113 |
),
|
| 114 |
] = "af_heart",
|
| 115 |
+
) -> str:
|
| 116 |
_log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
|
| 117 |
if not text or not text.strip():
|
| 118 |
try:
|
|
|
|
| 149 |
if total_segments > 1:
|
| 150 |
duration = len(final_audio) / 24_000
|
| 151 |
print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
|
| 152 |
+
|
| 153 |
+
# Save to file
|
| 154 |
+
filename = f"speech_{uuid.uuid4().hex[:8]}.wav"
|
| 155 |
+
output_path = os.path.join(ROOT_DIR, filename)
|
| 156 |
+
|
| 157 |
+
# Normalize to 16-bit PCM
|
| 158 |
+
# final_audio is float32, likely in [-1, 1]. Scale to int16 range.
|
| 159 |
+
audio_int16 = (final_audio * 32767).astype(np.int16)
|
| 160 |
+
scipy.io.wavfile.write(output_path, 24000, audio_int16)
|
| 161 |
+
|
| 162 |
+
_log_call_end("Generate_Speech", f"saved_to={os.path.basename(output_path)} duration_sec={len(final_audio)/24_000:.2f}")
|
| 163 |
+
return output_path
|
| 164 |
except gr.Error as exc:
|
| 165 |
_log_call_end("Generate_Speech", f"gr_error={str(exc)}")
|
| 166 |
raise
|