Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 23

Commit

343b576

verified ·

1 Parent(s): 5554813

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -180

app.py CHANGED Viewed

@@ -133,6 +133,8 @@ import asyncio
 #     return None
 async def download_scene_files(scene: SceneDto):
     tasks = []
@@ -152,8 +154,6 @@ async def download_scene_files(scene: SceneDto):
     downloaded_files = await asyncio.gather(*tasks)
     return downloaded_files
-download_cache = {}  # in-memory map: url -> local file
 async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
     """
     Downloads a file from a URL and returns the path to a temporary file.
@@ -187,55 +187,6 @@ async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0)
     #print(f"All {retries} attempts failed for {url}, skipping...")
     return None
-# import os
-# import httpx
-# import asyncio
-# CACHE_DIR = "audio_cache"
-# os.makedirs(CACHE_DIR, exist_ok=True)  # create if not exists folder stores permanently on disk
-# async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
-#     """
-#     Downloads a file from a URL and stores it in a permanent cache folder.
-#     Returns the local file path. Reuses already downloaded files.
-#     """
-#     if url in download_cache:
-#         #print(f"{url} is in download cache")
-#         return download_cache[url]
-#     # determine local file path in cache folder
-#     filename = url.split("/")[-1]  # simple filename from URL
-#     local_path = os.path.join(CACHE_DIR, filename)
-#     # check if file already exists on disk
-#     if os.path.exists(local_path):
-#         #print(f"{url} is in disk and put to download cache now")
-#         download_cache[url] = local_path
-#         return local_path
-#     # download if not cached
-#     for attempt in range(1, retries + 1):
-#         try:
-#             async with httpx.AsyncClient(timeout=60.0) as client:
-#                 response = await client.get(url)
-#                 response.raise_for_status()
-#             # save to permanent cache folder
-#             with open(local_path, "wb") as f:
-#                 f.write(response.content)
-#             download_cache[url] = local_path
-#             #print(f"{url} is downloaded from supabase and stored in disk and download cache now")
-#             return local_path
-#         except Exception as e:
-#             if attempt < retries:
-#                 await asyncio.sleep(delay)
-#     return None
 #-----------------------------------------------------------
 #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
@@ -260,152 +211,55 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
 #_______________generate audios and folder structure_______________________
-# async def generate_story_audios(story: StoryCreationDTO, base_output: str):
-#     """
-#     Generates audio files and folders for the entire story
-#     """
-#     story_dir = Path(base_output) / story.storyId
-#     story_dir.mkdir(parents=True, exist_ok=True)
-#     for chapter in story.chapters:
-#         chapter_dir = story_dir / chapter.chapterId
-#         chapter_dir.mkdir(exist_ok=True)
-#         # --- Chapter title audio ---
-#         prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
-#         title_save_path = chapter_dir / "title.wav"
-#         tagged_text_title = generate_tagged_text(
-#             chapter.title.sentence,
-#             chapter.title.emotion,
-#             chapter.title.intensity
-#         )
-        # title_generated_audio_path = inference_by_model(
-        #     text=tagged_text_title,
-        #     audio_file=prosody_file_title,
-        #     save_path=title_save_path
-        # )
-        # # os.remove(prosody_file_title)
-        # for scene in chapter.scenes:
-        #     await download_scene_files(scene)
-        #     scene_dir = chapter_dir / scene.sceneId
-        #     scene_dir.mkdir(exist_ok=True)
-        #     # --- Sentences audio ---
-        #     for sentence in scene.sentences:
-        #         # Download the prosody reference audio from Supabase
-        #         prosody_file = download_cache[sentence.prosodyReference]
-        #         sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
-        #         tagged_text = generate_tagged_text(
-        #             sentence.sentence,
-        #             sentence.emotion,
-        #             sentence.intensity
-        #         )
-        #         sentence_generated_audio_path = inference_by_model(
-        #             text=tagged_text,
-        #             audio_file=prosody_file,
-        #             save_path=sentence_save_path
-        #         )
-        #         # os.remove(prosody_file)
-import asyncio
-from pathlib import Path
-async def generate_story_audios(story: StoryCreationDTO, base_output: str, max_concurrent_gpu: int = 1):
     """
-    Generates audio files for the story while overlapping GPU inference and disk writes.
-    max_concurrent_gpu: semaphore to limit simultaneous GPU usage (1 if GPU is the bottleneck)
     """
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
-    #print(f"[INFO] Generating story '{story.storyId}' in {story_dir}")
-    # Semaphore ensures we don't overload GPU
-    gpu_semaphore = asyncio.Semaphore(max_concurrent_gpu)
-    async def process_sentence(chapter_dir: Path, scene: SceneDto, sentence: SentenceDto):
-        #print(f"[INFO] Starting sentence '{sentence.sentenceId}' in scene '{scene.sceneId}'")
-        async with gpu_semaphore:
-            #print(f"[GPU] Acquired GPU for sentence '{sentence.sentenceId}'")
-            # Get prosody file from cache
-            prosody_file = download_cache.get(sentence.prosodyReference)
-            if not prosody_file:
-                #print(f"[WARN] Prosody file for '{sentence.sentenceId}' not found in cache")
-                return None
-            sentence_save_path = chapter_dir / scene.sceneId / f"{sentence.sentenceId}.wav"
-            Path(sentence_save_path).parent.mkdir(parents=True, exist_ok=True)
-            tagged_text = generate_tagged_text(
-                sentence.sentence,
-                sentence.emotion,
-                sentence.intensity
-            )
-            # Run GPU inference in a thread pool to avoid blocking event loop
-            loop = asyncio.get_event_loop()
-            generated_path = await loop.run_in_executor(
-                None,
-                inference_by_model,
-                tagged_text,
-                prosody_file,
-                str(sentence_save_path)
-            )
-            #print(f"[DONE] Generated audio for sentence '{sentence.sentenceId}' -> {generated_path}")
-            return generated_path
-    # Prepare tasks for chapters
-    chapter_tasks = []
     for chapter in story.chapters:
         chapter_dir = story_dir / chapter.chapterId
         chapter_dir.mkdir(exist_ok=True)
-        print(f"[INFO] Processing chapter '{chapter.chapterId}'")
-        # --- Chapter title ---
-        title_prosody = await download_file_from_url(chapter.title.prosodyReference)
         title_save_path = chapter_dir / "title.wav"
-        tagged_title = generate_tagged_text(
-            chapter.title.sentence,
-            chapter.title.emotion,
-            chapter.title.intensity
         )
-        #print(f"[GPU] Generating title audio for chapter '{chapter.chapterId}'")
-        loop = asyncio.get_event_loop()
-        await loop.run_in_executor(
-            None,
-            inference_by_model,
-            tagged_title,
-            title_prosody,
-            str(title_save_path)
         )
-        #print(f"[DONE] Generated title audio for chapter '{chapter.chapterId}' -> {title_save_path}")
-        # --- Scenes ---
-        scene_tasks = []
         for scene in chapter.scenes:
-            #print(f"[INFO] Downloading files for scene '{scene.sceneId}'")
             await download_scene_files(scene)
             for sentence in scene.sentences:
-                scene_tasks.append(process_sentence(chapter_dir, scene, sentence))
-        if scene_tasks:
-            #print(f"[INFO] Running {len(scene_tasks)} sentences for chapter '{chapter.chapterId}' concurrently")
-            chapter_tasks.append(asyncio.gather(*scene_tasks))
-        #else:
-            #print(f"[WARN] No sentences found in chapter '{chapter.chapterId}'")
-    # Wait for all chapters to complete
-    if chapter_tasks:
-        await asyncio.gather(*chapter_tasks)
-        #print(f"[INFO] Completed generating all chapters for story '{story.storyId}'")
-    #else:
-        #print(f"[WARN] No chapters/tasks to process for story '{story.storyId}'")
 #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________

 #     return None
+download_cache = {}
 async def download_scene_files(scene: SceneDto):
     tasks = []
     downloaded_files = await asyncio.gather(*tasks)
     return downloaded_files
 async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
     """
     Downloads a file from a URL and returns the path to a temporary file.
     #print(f"All {retries} attempts failed for {url}, skipping...")
     return None
 #-----------------------------------------------------------
 #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
 #_______________generate audios and folder structure_______________________
+async def generate_story_audios(story: StoryCreationDTO, base_output: str):
     """
+    Generates audio files and folders for the entire story
     """
     story_dir = Path(base_output) / story.storyId
     story_dir.mkdir(parents=True, exist_ok=True)
     for chapter in story.chapters:
         chapter_dir = story_dir / chapter.chapterId
         chapter_dir.mkdir(exist_ok=True)
+        # --- Chapter title audio ---
+        prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
         title_save_path = chapter_dir / "title.wav"
+        tagged_text_title = generate_tagged_text(
+            chapter.title.sentence,
+            chapter.title.emotion,
+            chapter.title.intensity
         )
+        title_generated_audio_path = inference_by_model(
+            text=tagged_text_title,
+            audio_file=prosody_file_title,
+            save_path=title_save_path
         )
+        # os.remove(prosody_file_title)
         for scene in chapter.scenes:
             await download_scene_files(scene)
+            scene_dir = chapter_dir / scene.sceneId
+            scene_dir.mkdir(exist_ok=True)
+            # --- Sentences audio ---
             for sentence in scene.sentences:
+                # Download the prosody reference audio from Supabase
+                prosody_file = download_cache[sentence.prosodyReference]
+                sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
+                tagged_text = generate_tagged_text(
+                    sentence.sentence,
+                    sentence.emotion,
+                    sentence.intensity
+                )
+                sentence_generated_audio_path = inference_by_model(
+                    text=tagged_text,
+                    audio_file=prosody_file,
+                    save_path=sentence_save_path
+                )
+                # os.remove(prosody_file)
 #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________