Spaces:

Nymbo
/

Tools

Running

Nymbo commited on 18 days ago

Commit

74c7cce

verified ·

1 Parent(s): 98102a9

Update Modules/Generate_Speech.py

Files changed (1) hide show

Modules/Generate_Speech.py CHANGED Viewed

@@ -2,6 +2,10 @@ from __future__ import annotations
 import numpy as np
 import gradio as gr
 from typing import Annotated
@@ -108,7 +112,7 @@ def Generate_Speech(
             "zf_xiaoni, zf_xiaoxiao, zf_xiaoyi, zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang."
         ),
     ] = "af_heart",
-) -> tuple[int, np.ndarray]:
     _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
     if not text or not text.strip():
         try:
@@ -145,8 +149,18 @@ def Generate_Speech(
             if total_segments > 1:
                 duration = len(final_audio) / 24_000
                 print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
-        _log_call_end("Generate_Speech", f"samples={final_audio.shape[0]} duration_sec={len(final_audio)/24_000:.2f}")
-        return 24_000, final_audio
     except gr.Error as exc:
         _log_call_end("Generate_Speech", f"gr_error={str(exc)}")
         raise

 import numpy as np
 import gradio as gr
+import os
+import uuid
+import scipy.io.wavfile
+from .File_System import ROOT_DIR
 from typing import Annotated
             "zf_xiaoni, zf_xiaoxiao, zf_xiaoyi, zm_yunjian, zm_yunxi, zm_yunxia, zm_yunyang."
         ),
     ] = "af_heart",
+) -> str:
     _log_call_start("Generate_Speech", text=_truncate_for_log(text, 200), speed=speed, voice=voice)
     if not text or not text.strip():
         try:
             if total_segments > 1:
                 duration = len(final_audio) / 24_000
                 print(f"Completed: {total_segments} segments concatenated into {duration:.1f} seconds of audio")
+        # Save to file
+        filename = f"speech_{uuid.uuid4().hex[:8]}.wav"
+        output_path = os.path.join(ROOT_DIR, filename)
+        # Normalize to 16-bit PCM
+        # final_audio is float32, likely in [-1, 1]. Scale to int16 range.
+        audio_int16 = (final_audio * 32767).astype(np.int16)
+        scipy.io.wavfile.write(output_path, 24000, audio_int16)
+        _log_call_end("Generate_Speech", f"saved_to={os.path.basename(output_path)} duration_sec={len(final_audio)/24_000:.2f}")
+        return output_path
     except gr.Error as exc:
         _log_call_end("Generate_Speech", f"gr_error={str(exc)}")
         raise