Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 20

Commit

0c3dd18

verified ·

1 Parent(s): fb77147

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -31

app.py CHANGED Viewed

@@ -155,8 +155,15 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str):
         # --- Chapter title audio ---
         prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
         title_save_path = chapter_dir / "title.wav"
         title_generated_audio_path = inference_by_model(
-            text=chapter.title.sentence,
             audio_file=prosody_file_title,
             save_path=title_save_path
         )
@@ -171,8 +178,13 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str):
                 # Download the prosody reference audio from Supabase
                 prosody_file = await download_file_from_url(sentence.prosodyReference)
                 sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
                 sentence_generated_audio_path = inference_by_model(
-                    text=sentence.sentence,
                     audio_file=prosody_file,
                     save_path=sentence_save_path
                 )
@@ -298,6 +310,35 @@ def audio_to_base64(audio_path: str) -> (str, float):
     audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
     return audio_b64, duration
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO):
@@ -329,35 +370,7 @@ async def process_story(story: StoryCreationDTO):
     return response
-#---------------------------concatenate text with tags ---------------------------
-# Map Intensity numbers to tag strings
-intensity_map = {
-    1: "low",
-    2: "mid",
-    3: "high"
-}
-# Map Emotion enum names to lowercase tag strings
-emotion_map = {
-    "HAPPINESS": "happiness",
-    "SADNESS": "sadness",
-    "FEAR": "fear",
-    "ANGER": "anger",
-    "SURPRISE": "surprise",
-    "WHISPER": "whisper",
-    "NARRATION": "narration"
-}
-def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: int) -> str:
-    """
-    Convert enums to <emo_x> <int_y> format and concatenate with text
-    """
-    emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
-    int_tag = f"<int_{intensity_map[intensity_enum]}>"
-    return f"{emo_tag} {int_tag} {text}"
-#-----------------------------------------------------------
 @app.post("/tts_test/")
 async def tts_endpoint(

         # --- Chapter title audio ---
         prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
         title_save_path = chapter_dir / "title.wav"
+         tagged_text_title = generate_tagged_text(
+            chapter.title.sentence,
+            chapter.title.emotion,
+            chapter.title.intensity
+        )
         title_generated_audio_path = inference_by_model(
+            text=tagged_text_title,
             audio_file=prosody_file_title,
             save_path=title_save_path
         )
                 # Download the prosody reference audio from Supabase
                 prosody_file = await download_file_from_url(sentence.prosodyReference)
                 sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
+                tagged_text = generate_tagged_text(
+                    sentence.sentence,
+                    sentence.emotion,
+                    sentence.intensity
+                )
                 sentence_generated_audio_path = inference_by_model(
+                    text=tagged_text,
                     audio_file=prosody_file,
                     save_path=sentence_save_path
                 )
     audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
     return audio_b64, duration
+#---------------------------concatenate text with tags ---------------------------
+# Map Intensity numbers to tag strings
+intensity_map = {
+    1: "low",
+    2: "mid",
+    3: "high"
+}
+# Map Emotion enum names to lowercase tag strings
+emotion_map = {
+    "HAPPINESS": "happiness",
+    "SADNESS": "sadness",
+    "FEAR": "fear",
+    "ANGER": "anger",
+    "SURPRISE": "surprise",
+    "WHISPER": "whisper",
+    "NARRATION": "narration"
+}
+def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: int) -> str:
+    """
+    Convert enums to <emo_x> <int_y> format and concatenate with text
+    """
+    emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
+    int_tag = f"<int_{intensity_map[intensity_enum]}>"
+    return f"{emo_tag} {int_tag} {text}"
+#-----------------------------------------------------------
 @app.post("/tts/")
 async def process_story(story: StoryCreationDTO):
     return response
+#----------------------------Test------------------------------------
 @app.post("/tts_test/")
 async def tts_endpoint(