Spaces:

sagaxlearn
/

TTS_API

Sleeping

khurrameycon commited on Feb 1, 2025

Commit

d0ae17f

verified ·

1 Parent(s): 5c1c2d4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from fastapi.responses import FileResponse
 from kokoro import KPipeline
 import soundfile as sf
 import os
 app = FastAPI()
@@ -34,25 +35,24 @@ async def generate_audio(text: str, voice: str = "af_heart", speed: float = 1.0)
     # Process only the first segment for demo
     for i, (gs, ps, audio) in enumerate(generator):
         # Convert to 16-bit PCM
-        # Ensure the audio is in the range [-1, 1] first
         audio = np.clip(audio, -1, 1)
         # Convert to 16-bit signed integers
         pcm_data = (audio * 32767).astype(np.int16)
-        # Convert to bytes, ensuring little-endian
-        raw_audio = pcm_data.tobytes(order='C')
-        # Return PCM data with appropriate headers
         return Response(
             content=raw_audio,
-            media_type="audio/l16",  # Linear PCM
             headers={
                 "Content-Disposition": f'attachment; filename="output.pcm"',
                 "X-Sample-Rate": "24000",
                 "X-Bits-Per-Sample": "16",
-                "X-Channels": "1",
-                "X-Encoding": "signed-integer",
-                "X-Endian": "little"
             }
         )
     return Response("No audio generated", status_code=400)

 from kokoro import KPipeline
 import soundfile as sf
 import os
+import numpy as np
 app = FastAPI()
     # Process only the first segment for demo
     for i, (gs, ps, audio) in enumerate(generator):
         # Convert to 16-bit PCM
+        # Ensure the audio is in the range [-1, 1]
         audio = np.clip(audio, -1, 1)
         # Convert to 16-bit signed integers
         pcm_data = (audio * 32767).astype(np.int16)
+        # Convert to bytes (automatically uses row-major order)
+        raw_audio = pcm_data.tobytes()
+        # Return PCM data with minimal necessary headers
         return Response(
             content=raw_audio,
+            media_type="application/octet-stream",
             headers={
                 "Content-Disposition": f'attachment; filename="output.pcm"',
                 "X-Sample-Rate": "24000",
                 "X-Bits-Per-Sample": "16",
+                "X-Endianness": "little"
             }
         )
     return Response("No audio generated", status_code=400)