Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -328,19 +328,53 @@ async def process_story(story: StoryCreationDTO):
|
|
| 328 |
)
|
| 329 |
|
| 330 |
return response
|
| 331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
@app.post("/tts_test/")
|
| 334 |
async def tts_endpoint(
|
| 335 |
text: str = Form(...),
|
| 336 |
-
audio_file: UploadFile = File(...)
|
|
|
|
|
|
|
| 337 |
):
|
|
|
|
| 338 |
file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
|
| 339 |
with open(file_path, "wb") as f:
|
| 340 |
f.write(await audio_file.read())
|
| 341 |
|
|
|
|
|
|
|
| 342 |
output_path = os.path.join(OUTPUT_DIR, "out_test.wav")
|
| 343 |
-
output_wav = inference_by_model(
|
| 344 |
return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
|
| 345 |
|
| 346 |
import uvicorn
|
|
|
|
| 328 |
)
|
| 329 |
|
| 330 |
return response
|
| 331 |
+
|
| 332 |
+
#---------------------------concatenate text with tags ---------------------------
|
| 333 |
+
|
| 334 |
+
# Map Intensity numbers to tag strings
|
| 335 |
+
intensity_map = {
|
| 336 |
+
1: "low",
|
| 337 |
+
2: "mid",
|
| 338 |
+
3: "high"
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
# Map Emotion enum names to lowercase tag strings
|
| 342 |
+
emotion_map = {
|
| 343 |
+
"HAPPINESS": "happiness",
|
| 344 |
+
"SADNESS": "sadness",
|
| 345 |
+
"FEAR": "fear",
|
| 346 |
+
"ANGER": "anger",
|
| 347 |
+
"SURPRISE": "surprise",
|
| 348 |
+
"WHISPER": "whisper",
|
| 349 |
+
"NARRATION": "narration"
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: int) -> str:
|
| 353 |
+
"""
|
| 354 |
+
Convert enums to <emo_x> <int_y> format and concatenate with text
|
| 355 |
+
"""
|
| 356 |
+
emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
|
| 357 |
+
int_tag = f"<int_{intensity_map[intensity_enum]}>"
|
| 358 |
+
return f"{emo_tag} {int_tag} {text}"
|
| 359 |
+
|
| 360 |
+
#-----------------------------------------------------------
|
| 361 |
|
| 362 |
@app.post("/tts_test/")
|
| 363 |
async def tts_endpoint(
|
| 364 |
text: str = Form(...),
|
| 365 |
+
audio_file: UploadFile = File(...),
|
| 366 |
+
emotionName: str = Form(...),
|
| 367 |
+
intensity: int = Form(...)
|
| 368 |
):
|
| 369 |
+
|
| 370 |
file_path = os.path.join(OUTPUT_DIR, audio_file.filename)
|
| 371 |
with open(file_path, "wb") as f:
|
| 372 |
f.write(await audio_file.read())
|
| 373 |
|
| 374 |
+
tagged_text = generate_tagged_text(text, emotionName, intensity)
|
| 375 |
+
|
| 376 |
output_path = os.path.join(OUTPUT_DIR, "out_test.wav")
|
| 377 |
+
output_wav = inference_by_model(tagged_text, file_path,output_path)
|
| 378 |
return FileResponse(output_wav, media_type="audio/wav", filename="output.wav")
|
| 379 |
|
| 380 |
import uvicorn
|