Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 19

Commit

cf9540e

verified ·

1 Parent(s): fe0e5ad

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -29

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import torchaudio
 import os
 from pydantic import BaseModel
 from typing import List, Optional
 # MODEL_DIR = "my_model"
@@ -140,7 +141,7 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
 #_______________generate audios and folder structure_______________________
-async def generate_story_audios(story: StoryCreationDTO, base_output: str = "stories"):
     """
     Generates audio files and folders for the entire story
     """
@@ -182,7 +183,7 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str = "sto
 from pydub import AudioSegment
 import asyncio
-async def concat_story_audio(story: StoryCreationDTO, base_output="stories", final_path: str = None,): # full path including filename
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
@@ -275,28 +276,10 @@ def root():
 #-----------------------------------------------------------
-########## creating a dummy audio file
-import torchaudio
-import torch
-import os
-OUTPUT_DIR = "outputs"
-os.makedirs(OUTPUT_DIR, exist_ok=True)
-dummy_path = os.path.join(OUTPUT_DIR, "dummy.wav")
-# Generate 1 second of silence at 24kHz
-if not os.path.exists(dummy_path):
-    silent = torch.zeros(1, 24000)  # 1 channel, 24000 samples
-    torchaudio.save(dummy_path, silent, 24000)
-from pydantic import BaseModel
 class TTSResponse(BaseModel):
     file_name: str
     duration: float  # seconds
-    audio_base64: str
 ######## Convert your audio to Base64
 import base64
@@ -318,32 +301,34 @@ def audio_to_base64(audio_path: str) -> (str, float):
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO):
     print(story.storyId)
     for cast in story.cast:
         print(cast.name, cast.voiceReference)
     for chapter in story.chapters:
         for scene in chapter.scenes:
             for sentence in scene.sentences:
                 print(sentence.speaker, sentence.sentence)
-    # For testing, use your dummy WAV
-    dummy_path = os.path.join(OUTPUT_DIR, "dummy.wav")
     # Convert to base64 and get duration
-    audio_b64, duration = audio_to_base64(dummy_path)
     response = TTSResponse(
-        file_name="chapter1_scene2.wav",
         duration=duration,
         audio_base64=audio_b64
     )
     return response
-    #return {"status": "Story received"}
 # async def tts_endpoint(

 import os
 from pydantic import BaseModel
 from typing import List, Optional
+from pathlib import Path
 # MODEL_DIR = "my_model"
 #_______________generate audios and folder structure_______________________
+async def generate_story_audios(story: StoryCreationDTO, base_output: str):
     """
     Generates audio files and folders for the entire story
     """
 from pydub import AudioSegment
 import asyncio
+async def concat_story_audio(story: StoryCreationDTO, base_output: str, final_path: str = None): # full path including filename
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
 #-----------------------------------------------------------
 class TTSResponse(BaseModel):
     file_name: str
     duration: float  # seconds
+    audio_base64: str
 ######## Convert your audio to Base64
 import base64
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO):
+  # Optional: print info for debugging
     print(story.storyId)
     for cast in story.cast:
         print(cast.name, cast.voiceReference)
     for chapter in story.chapters:
         for scene in chapter.scenes:
             for sentence in scene.sentences:
                 print(sentence.speaker, sentence.sentence)
+    # 1️⃣ Generate all sentence audios and folder structure
+    await generate_story_audios(story, base_output=OUTPUT_DIR)
+     # 2️⃣ Concatenate all into final story audio
+    final_story_path = os.path.join(OUTPUT_DIR, story.storyId, f"{story.storyId}_full.wav")
+    final_generated_story_path = await concat_story_audio(story, base_output=OUTPUT_DIR, final_path=final_story_path)
     # Convert to base64 and get duration
+    audio_b64, duration = audio_to_base64(final_generated_story_path)
     response = TTSResponse(
+        file_name= os.path.basename(final_generated_story_path),
         duration=duration,
         audio_base64=audio_b64
     )
     return response
 # async def tts_endpoint(