Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 19

Commit

fe0e5ad

verified ·

1 Parent(s): 79bfece

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py CHANGED Viewed

@@ -177,6 +177,77 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str = "sto
                 )
                 os.remove(prosody_file)
 app = FastAPI(title="EGTTS Arabic TTS API")
 #___________________Test end point to test supabase fetch

                 )
                 os.remove(prosody_file)
+#_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
+from pydub import AudioSegment
+import asyncio
+async def concat_story_audio(story: StoryCreationDTO, base_output="stories", final_path: str = None,): # full path including filename
+    story_dir = Path(base_output) / story.storyId
+    story_dir.mkdir(parents=True, exist_ok=True)
+    if final_path is None:
+        final_path = story_dir / f"{story.storyId}_full.wav"
+    else:
+        final_path = Path(final_path)
+        final_path.parent.mkdir(parents=True, exist_ok=True)  # ensure folder exists
+    chapters_audio = AudioSegment.silent(duration=0)  # start empty
+    for chapter in story.chapters:
+        chapter_dir = story_dir / chapter.chapterId
+        # --- Chapter title ---
+        title_path = chapter_dir / "title.wav"
+        chapter_audio = AudioSegment.from_wav(title_path)
+        for scene in chapter.scenes:
+            scene_dir = chapter_dir / scene.sceneId
+            scene_audio = AudioSegment.silent(duration=0)
+            # --- Concatenate sentence audios ---
+            for sentence in scene.sentences:
+                sentence_path = scene_dir / f"{sentence.sentenceId}.wav"
+                sentence_audio = AudioSegment.from_wav(sentence_path)
+                scene_audio += sentence_audio
+            # --- Add SFX for location if available ---
+            if scene.locationName:
+                sfx_file = await download_file_from_url(scene.locationName)
+                sfx_audio = AudioSegment.from_wav(sfx_file)
+                scene_audio = scene_audio.overlay(sfx_audio)
+                os.remove(sfx_file)
+            # --- Add background music if available ---
+            if scene.bgMusic and scene.bgMusic.musicPath:
+                bg_url = scene.bgMusic.musicPath
+                bg_file = await download_file_from_url(bg_url)
+                bg_audio = AudioSegment.from_wav(bg_file)
+                # Adjust volume
+                bg_audio = bg_audio - (1 - scene.bgMusic.volume) * 30  # approximate
+                # Loop if shorter than scene
+                if len(bg_audio) < len(scene_audio):
+                    loops = (len(scene_audio) // len(bg_audio)) + 1
+                    bg_audio = bg_audio * loops
+                bg_audio = bg_audio[:len(scene_audio)]  # trim to match scene
+                scene_audio = scene_audio.overlay(bg_audio)
+                os.remove(bg_file)
+            # Add 2 seconds of silence between scenes
+            scene_audio += AudioSegment.silent(duration=2000)
+            chapter_audio += scene_audio
+        # Add 3 seconds of silence between chapters
+        chapter_audio += AudioSegment.silent(duration=3000)
+        chapters_audio += chapter_audio
+    # Export final story
+    chapters_audio.export(final_path, format="wav")
+    return final_path
+#-------------------------------------------------------------
 app = FastAPI(title="EGTTS Arabic TTS API")
 #___________________Test end point to test supabase fetch