Spaces:

ruslanmv
/

ai-story-server-cpu

Running on Zero

ruslanmv commited on Sep 28

Commit

69e6077

1 Parent(s): fa37078

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import struct
 import textwrap
 import requests
 import atexit
-import tempfile # <-- FIX: Import tempfile to manage temporary audio files
 from typing import List, Dict, Tuple, Generator
 # --- Fast, safe defaults ---
@@ -206,7 +206,6 @@ def init_models_and_latents() -> None:
     if not voice_latents:
         print("Computing voice conditioning latents...")
-        # --- FIX: Use a temporary directory to store resampled audio files ---
         with tempfile.TemporaryDirectory() as temp_dir:
             voice_files = {
                 "Cloée": "cloee-1.wav", "Julian": "julian-bedtime-style-1.wav",
@@ -214,15 +213,14 @@ def init_models_and_latents() -> None:
             }
             for role, filename in voice_files.items():
                 original_path = os.path.join("voices", filename)
-                # 1. Load and resample audio into a tensor
                 resampled_waveform = load_and_resample_audio(original_path)
-                # 2. Save the corrected tensor to a temporary file
                 temp_path = os.path.join(temp_dir, f"resampled_{filename}")
-                torchaudio.save(temp_path, resampled_waveform.squeeze(0), 24000)
-                # 3. Pass the path of the clean, temporary file to the model
                 voice_latents[role] = tts_model.get_conditioning_latents(
                     audio_path=temp_path,
                     gpt_cond_len=30,

 import textwrap
 import requests
 import atexit
+import tempfile
 from typing import List, Dict, Tuple, Generator
 # --- Fast, safe defaults ---
     if not voice_latents:
         print("Computing voice conditioning latents...")
         with tempfile.TemporaryDirectory() as temp_dir:
             voice_files = {
                 "Cloée": "cloee-1.wav", "Julian": "julian-bedtime-style-1.wav",
             }
             for role, filename in voice_files.items():
                 original_path = os.path.join("voices", filename)
                 resampled_waveform = load_and_resample_audio(original_path)
                 temp_path = os.path.join(temp_dir, f"resampled_{filename}")
+                # --- FIX: Replace torchaudio.save with the more stable soundfile.write ---
+                numpy_waveform = resampled_waveform.squeeze(0).cpu().numpy()
+                sf.write(temp_path, numpy_waveform, 24000)
                 voice_latents[role] = tts_model.get_conditioning_latents(
                     audio_path=temp_path,
                     gpt_cond_len=30,