Spaces:

ttsEmo
/

TTS_API

Sleeping

App Files Files Community

MariaKaiser commited on Mar 19

Commit

bfb8f1e

verified ·

1 Parent(s): 03de2c3

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -3

app.py CHANGED Viewed

@@ -328,19 +328,53 @@ async def process_story(story: StoryCreationDTO):
     )
     return response
 @app.post("/tts_test/")
 async def tts_endpoint(
     text: str = Form(...),
-    audio_file: UploadFile = File(...)
 ):
     file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
     with open(file_path, "wb") as f:
         f.write(await audio_file.read())
     output_path = os.path.join(OUTPUT_DIR, "out_test.wav")
-    output_wav = inference_by_model(text, file_path,output_path)
     return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
 import uvicorn

     )
     return response
+#---------------------------concatenate text with tags ---------------------------
+# Map Intensity numbers to tag strings
+intensity_map = {
+    1: "low",
+    2: "mid",
+    3: "high"
+}
+# Map Emotion enum names to lowercase tag strings
+emotion_map = {
+    "HAPPINESS": "happiness",
+    "SADNESS": "sadness",
+    "FEAR": "fear",
+    "ANGER": "anger",
+    "SURPRISE": "surprise",
+    "WHISPER": "whisper",
+    "NARRATION": "narration"
+}
+def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: int) -> str:
+     """
+    Convert enums to <emo_x> <int_y> format and concatenate with text
+    """
+    emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
+    int_tag = f"<int_{intensity_map[intensity_enum]}>"
+    return f"{emo_tag} {int_tag} {text}"
+#-----------------------------------------------------------
 @app.post("/tts_test/")
 async def tts_endpoint(
     text: str = Form(...),
+    audio_file: UploadFile = File(...),
+    emotionName: str = Form(...),
+    intensity: int = Form(...)
 ):
     file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
     with open(file_path, "wb") as f:
         f.write(await audio_file.read())
+    tagged_text = generate_tagged_text(text, emotionName, intensity)
     output_path = os.path.join(OUTPUT_DIR, "out_test.wav")
+    output_wav = inference_by_model(tagged_text, file_path,output_path)
     return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
 import uvicorn