Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 23

Commit

b6ac661

verified ·

1 Parent(s): 1806c12

Update app.py

Browse files

Files changed (1) hide show

app.py +180 -34

app.py CHANGED Viewed

@@ -133,8 +133,6 @@ import asyncio
 #     return None
-download_cache = {}
 async def download_scene_files(scene: SceneDto):
     tasks = []
@@ -154,6 +152,8 @@ async def download_scene_files(scene: SceneDto):
     downloaded_files = await asyncio.gather(*tasks)
     return downloaded_files
 async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
     """
     Downloads a file from a URL and returns the path to a temporary file.
@@ -187,6 +187,55 @@ async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0)
     #print(f"All {retries} attempts failed for {url}, skipping...")
     return None
 #-----------------------------------------------------------
 #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
@@ -211,55 +260,152 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
 #_______________generate audios and folder structure_______________________
-async def generate_story_audios(story: StoryCreationDTO, base_output: str):
     """
-    Generates audio files and folders for the entire story
     """
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
     for chapter in story.chapters:
         chapter_dir = story_dir / chapter.chapterId
         chapter_dir.mkdir(exist_ok=True)
-        # --- Chapter title audio ---
-        prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
         title_save_path = chapter_dir / "title.wav"
-        tagged_text_title = generate_tagged_text(
-            chapter.title.sentence,
-            chapter.title.emotion,
-            chapter.title.intensity
         )
-        title_generated_audio_path = inference_by_model(
-            text=tagged_text_title,
-            audio_file=prosody_file_title,
-            save_path=title_save_path
         )
-        # os.remove(prosody_file_title)
         for scene in chapter.scenes:
             await download_scene_files(scene)
-            scene_dir = chapter_dir / scene.sceneId
-            scene_dir.mkdir(exist_ok=True)
-            # --- Sentences audio ---
             for sentence in scene.sentences:
-                # Download the prosody reference audio from Supabase
-                prosody_file = download_cache[sentence.prosodyReference]
-                sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
-                tagged_text = generate_tagged_text(
-                    sentence.sentence,
-                    sentence.emotion,
-                    sentence.intensity
-                )
-                sentence_generated_audio_path = inference_by_model(
-                    text=tagged_text,
-                    audio_file=prosody_file,
-                    save_path=sentence_save_path
-                )
-                # os.remove(prosody_file)
 #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________

 #     return None
 async def download_scene_files(scene: SceneDto):
     tasks = []
     downloaded_files = await asyncio.gather(*tasks)
     return downloaded_files
+download_cache = {}  # in-memory map: url -> local file
 async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
     """
     Downloads a file from a URL and returns the path to a temporary file.
     #print(f"All {retries} attempts failed for {url}, skipping...")
     return None
+# import os
+# import httpx
+# import asyncio
+# CACHE_DIR = "audio_cache"
+# os.makedirs(CACHE_DIR, exist_ok=True)  # create if not exists folder stores permanently on disk
+# async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
+#     """
+#     Downloads a file from a URL and stores it in a permanent cache folder.
+#     Returns the local file path. Reuses already downloaded files.
+#     """
+#     if url in download_cache:
+#         #print(f"{url} is in download cache")
+#         return download_cache[url]
+#     # determine local file path in cache folder
+#     filename = url.split("/")[-1]  # simple filename from URL
+#     local_path = os.path.join(CACHE_DIR, filename)
+#     # check if file already exists on disk
+#     if os.path.exists(local_path):
+#         #print(f"{url} is in disk and put to download cache now")
+#         download_cache[url] = local_path
+#         return local_path
+#     # download if not cached
+#     for attempt in range(1, retries + 1):
+#         try:
+#             async with httpx.AsyncClient(timeout=60.0) as client:
+#                 response = await client.get(url)
+#                 response.raise_for_status()
+#             # save to permanent cache folder
+#             with open(local_path, "wb") as f:
+#                 f.write(response.content)
+#             download_cache[url] = local_path
+#             #print(f"{url} is downloaded from supabase and stored in disk and download cache now")
+#             return local_path
+#         except Exception as e:
+#             if attempt < retries:
+#                 await asyncio.sleep(delay)
+#     return None
 #-----------------------------------------------------------
 #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
 #_______________generate audios and folder structure_______________________
+# async def generate_story_audios(story: StoryCreationDTO, base_output: str):
+#     """
+#     Generates audio files and folders for the entire story
+#     """
+#     story_dir = Path(base_output) / story.storyId
+#     story_dir.mkdir(parents=True, exist_ok=True)
+#     for chapter in story.chapters:
+#         chapter_dir = story_dir / chapter.chapterId
+#         chapter_dir.mkdir(exist_ok=True)
+#         # --- Chapter title audio ---
+#         prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
+#         title_save_path = chapter_dir / "title.wav"
+#         tagged_text_title = generate_tagged_text(
+#             chapter.title.sentence,
+#             chapter.title.emotion,
+#             chapter.title.intensity
+#         )
+        # title_generated_audio_path = inference_by_model(
+        #     text=tagged_text_title,
+        #     audio_file=prosody_file_title,
+        #     save_path=title_save_path
+        # )
+        # # os.remove(prosody_file_title)
+        # for scene in chapter.scenes:
+        #     await download_scene_files(scene)
+        #     scene_dir = chapter_dir / scene.sceneId
+        #     scene_dir.mkdir(exist_ok=True)
+        #     # --- Sentences audio ---
+        #     for sentence in scene.sentences:
+        #         # Download the prosody reference audio from Supabase
+        #         prosody_file = download_cache[sentence.prosodyReference]
+        #         sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
+        #         tagged_text = generate_tagged_text(
+        #             sentence.sentence,
+        #             sentence.emotion,
+        #             sentence.intensity
+        #         )
+        #         sentence_generated_audio_path = inference_by_model(
+        #             text=tagged_text,
+        #             audio_file=prosody_file,
+        #             save_path=sentence_save_path
+        #         )
+        #         # os.remove(prosody_file)
+import asyncio
+from pathlib import Path
+async def generate_story_audios_async(story: StoryCreationDTO, base_output: str, max_concurrent_gpu: int = 1):
     """
+    Generates audio files for the story while overlapping GPU inference and disk writes.
+    max_concurrent_gpu: semaphore to limit simultaneous GPU usage (1 if GPU is the bottleneck)
     """
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
+    print(f"[INFO] Generating story '{story.storyId}' in {story_dir}")
+    # Semaphore ensures we don't overload GPU
+    gpu_semaphore = asyncio.Semaphore(max_concurrent_gpu)
+    async def process_sentence(chapter_dir: Path, scene: SceneDto, sentence: SentenceDto):
+        print(f"[INFO] Starting sentence '{sentence.sentenceId}' in scene '{scene.sceneId}'")
+        async with gpu_semaphore:
+            print(f"[GPU] Acquired GPU for sentence '{sentence.sentenceId}'")
+            # Get prosody file from cache
+            prosody_file = download_cache.get(sentence.prosodyReference)
+            if not prosody_file:
+                print(f"[WARN] Prosody file for '{sentence.sentenceId}' not found in cache")
+                return None
+            sentence_save_path = chapter_dir / scene.sceneId / f"{sentence.sentenceId}.wav"
+            Path(sentence_save_path).parent.mkdir(parents=True, exist_ok=True)
+            tagged_text = generate_tagged_text(
+                sentence.sentence,
+                sentence.emotion,
+                sentence.intensity
+            )
+            # Run GPU inference in a thread pool to avoid blocking event loop
+            loop = asyncio.get_event_loop()
+            generated_path = await loop.run_in_executor(
+                None,
+                inference_by_model,
+                tagged_text,
+                prosody_file,
+                str(sentence_save_path)
+            )
+            print(f"[DONE] Generated audio for sentence '{sentence.sentenceId}' -> {generated_path}")
+            return generated_path
+    # Prepare tasks for chapters
+    chapter_tasks = []
     for chapter in story.chapters:
         chapter_dir = story_dir / chapter.chapterId
         chapter_dir.mkdir(exist_ok=True)
+        print(f"[INFO] Processing chapter '{chapter.chapterId}'")
+        # --- Chapter title ---
+        title_prosody = await download_file_from_url(chapter.title.prosodyReference)
         title_save_path = chapter_dir / "title.wav"
+        tagged_title = generate_tagged_text(
+            chapter.title.sentence,
+            chapter.title.emotion,
+            chapter.title.intensity
         )
+        print(f"[GPU] Generating title audio for chapter '{chapter.chapterId}'")
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(
+            None,
+            inference_by_model,
+            tagged_title,
+            title_prosody,
+            str(title_save_path)
         )
+        print(f"[DONE] Generated title audio for chapter '{chapter.chapterId}' -> {title_save_path}")
+        # --- Scenes ---
+        scene_tasks = []
         for scene in chapter.scenes:
+            print(f"[INFO] Downloading files for scene '{scene.sceneId}'")
             await download_scene_files(scene)
             for sentence in scene.sentences:
+                scene_tasks.append(process_sentence(chapter_dir, scene, sentence))
+        if scene_tasks:
+            print(f"[INFO] Running {len(scene_tasks)} sentences for chapter '{chapter.chapterId}' concurrently")
+            chapter_tasks.append(asyncio.gather(*scene_tasks))
+        else:
+            print(f"[WARN] No sentences found in chapter '{chapter.chapterId}'")
+    # Wait for all chapters to complete
+    if chapter_tasks:
+        await asyncio.gather(*chapter_tasks)
+        print(f"[INFO] Completed generating all chapters for story '{story.storyId}'")
+    else:
+        print(f"[WARN] No chapters/tasks to process for story '{story.storyId}'")
 #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________