Spaces:

ttsEmo
/

TTS_API

Sleeping

MariaKaiser commited on Mar 10

Commit

1f7f064

verified ·

1 Parent(s): 43378d2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -135,6 +135,31 @@ if not os.path.exists(dummy_path):
     silent = torch.zeros(1, 24000)  # 1 channel, 24000 samples
     torchaudio.save(dummy_path, silent, 24000)
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO):
     print(story.storyId)
@@ -147,12 +172,20 @@ async def process_story(story: StoryCreationDTO):
             for sentence in scene.sentences:
                 print(sentence.speaker, sentence.sentence)
-    # return dummy audio
-    return FileResponse(
-        dummy_path,
-        media_type="audio/wav",
-        filename="output.wav"
     )
     #return {"status": "Story received"}

     silent = torch.zeros(1, 24000)  # 1 channel, 24000 samples
     torchaudio.save(dummy_path, silent, 24000)
+from pydantic import BaseModel
+class TTSResponse(BaseModel):
+    file_name: str
+    duration: float  # seconds
+    audio_base64: str
+######## Convert your audio to Base64
+import base64
+import torchaudio
+import io
+def audio_to_base64(audio_path: str) -> (str, float):
+    # load audio to get duration
+    waveform, sr = torchaudio.load(audio_path)  # waveform shape: [channels, samples]
+    duration = waveform.shape[1] / sr  # seconds
+    # read file bytes
+    with open(audio_path, "rb") as f:
+        audio_bytes = f.read()
+    audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+    return audio_b64, duration
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO):
     print(story.storyId)
             for sentence in scene.sentences:
                 print(sentence.speaker, sentence.sentence)
+    # For testing, use your dummy WAV
+    dummy_path = os.path.join(OUTPUT_DIR, "dummy.wav")
+    # Convert to base64 and get duration
+    audio_b64, duration = audio_to_base64(dummy_path)
+    response = TTSResponse(
+        file_name="chapter1_scene2.wav",
+        duration=duration,
+        audio_base64=audio_b64
     )
+    return response
     #return {"status": "Story received"}