Nymbo commited on
Commit
74c7cce
·
verified ·
1 Parent(s): 98102a9

Update Modules/Generate_Speech.py

Browse files
Files changed (1) hide show
  1. Modules/Generate_Speech.py +17 -3
Modules/Generate_Speech.py CHANGED
@@ -2,6 +2,10 @@ from __future__ import annotations
2
 
3
  import numpy as np
4
  import gradio as gr
 
 
 
 
5
 
6
  from typing import Annotated
7
 
@@ -108,7 +112,7 @@ def Generate_Speech(
108
  "zf_xiaoni, zf_xiaoxiao, zf_xiaoyi, zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang."
109
  ),
110
  ] = "af_heart",
111
- ) -> tuple[int, np.ndarray]:
112
  _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
113
  if not text or not text.strip():
114
  try:
@@ -145,8 +149,18 @@ def Generate_Speech(
145
  if total_segments > 1:
146
  duration = len(final_audio) / 24_000
147
  print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
148
- _log_call_end("Generate_Speech", f"samples={final_audio.shape[0]} duration_sec={len(final_audio)/24_000:.2f}")
149
- return 24_000, final_audio
 
 
 
 
 
 
 
 
 
 
150
  except gr.Error as exc:
151
  _log_call_end("Generate_Speech", f"gr_error={str(exc)}")
152
  raise
 
2
 
3
  import numpy as np
4
  import gradio as gr
5
+ import os
6
+ import uuid
7
+ import scipy.io.wavfile
8
+ from .File_System import ROOT_DIR
9
 
10
  from typing import Annotated
11
 
 
112
  "zf_xiaoni, zf_xiaoxiao, zf_xiaoyi, zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang."
113
  ),
114
  ] = "af_heart",
115
+ ) -> str:
116
  _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
117
  if not text or not text.strip():
118
  try:
 
149
  if total_segments > 1:
150
  duration = len(final_audio) / 24_000
151
  print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
152
+
153
+ # Save to file
154
+ filename = f"speech_{uuid.uuid4().hex[:8]}.wav"
155
+ output_path = os.path.join(ROOT_DIR, filename)
156
+
157
+ # Normalize to 16-bit PCM
158
+ # final_audio is float32, likely in [-1, 1]. Scale to int16 range.
159
+ audio_int16 = (final_audio * 32767).astype(np.int16)
160
+ scipy.io.wavfile.write(output_path, 24000, audio_int16)
161
+
162
+ _log_call_end("Generate_Speech", f"saved_to={os.path.basename(output_path)} duration_sec={len(final_audio)/24_000:.2f}")
163
+ return output_path
164
  except gr.Error as exc:
165
  _log_call_end("Generate_Speech", f"gr_error={str(exc)}")
166
  raise