MariaKaiser commited on
Commit
0c3dd18
·
verified ·
1 Parent(s): fb77147

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -31
app.py CHANGED
@@ -155,8 +155,15 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str):
155
  # --- Chapter title audio ---
156
  prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
157
  title_save_path = chapter_dir / "title.wav"
 
 
 
 
 
 
 
158
  title_generated_audio_path = inference_by_model(
159
- text=chapter.title.sentence,
160
  audio_file=prosody_file_title,
161
  save_path=title_save_path
162
  )
@@ -171,8 +178,13 @@ async def generate_story_audios(story: StoryCreationDTO, base_output: str):
171
  # Download the prosody reference audio from Supabase
172
  prosody_file = await download_file_from_url(sentence.prosodyReference)
173
  sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
 
 
 
 
 
174
  sentence_generated_audio_path = inference_by_model(
175
- text=sentence.sentence,
176
  audio_file=prosody_file,
177
  save_path=sentence_save_path
178
  )
@@ -298,6 +310,35 @@ def audio_to_base64(audio_path: str) -> (str, float):
298
  audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
299
  return audio_b64, duration
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
  @app.post("/tts/")
303
  async def process_story(story: StoryCreationDTO):
@@ -329,35 +370,7 @@ async def process_story(story: StoryCreationDTO):
329
 
330
  return response
331
 
332
- #---------------------------concatenate text with tags ---------------------------
333
-
334
- # Map Intensity numbers to tag strings
335
- intensity_map = {
336
- 1: "low",
337
- 2: "mid",
338
- 3: "high"
339
- }
340
-
341
- # Map Emotion enum names to lowercase tag strings
342
- emotion_map = {
343
- "HAPPINESS": "happiness",
344
- "SADNESS": "sadness",
345
- "FEAR": "fear",
346
- "ANGER": "anger",
347
- "SURPRISE": "surprise",
348
- "WHISPER": "whisper",
349
- "NARRATION": "narration"
350
- }
351
-
352
- def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: int) -> str:
353
- """
354
- Convert enums to <emo_x> <int_y> format and concatenate with text
355
- """
356
- emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
357
- int_tag = f"<int_{intensity_map[intensity_enum]}>"
358
- return f"{emo_tag} {int_tag} {text}"
359
-
360
- #-----------------------------------------------------------
361
 
362
  @app.post("/tts_test/")
363
  async def tts_endpoint(
 
155
  # --- Chapter title audio ---
156
  prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
157
  title_save_path = chapter_dir / "title.wav"
158
+
159
+ tagged_text_title = generate_tagged_text(
160
+ chapter.title.sentence,
161
+ chapter.title.emotion,
162
+ chapter.title.intensity
163
+ )
164
+
165
  title_generated_audio_path = inference_by_model(
166
+ text=tagged_text_title,
167
  audio_file=prosody_file_title,
168
  save_path=title_save_path
169
  )
 
178
  # Download the prosody reference audio from Supabase
179
  prosody_file = await download_file_from_url(sentence.prosodyReference)
180
  sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
181
+ tagged_text = generate_tagged_text(
182
+ sentence.sentence,
183
+ sentence.emotion,
184
+ sentence.intensity
185
+ )
186
  sentence_generated_audio_path = inference_by_model(
187
+ text=tagged_text,
188
  audio_file=prosody_file,
189
  save_path=sentence_save_path
190
  )
 
310
  audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
311
  return audio_b64, duration
312
 
313
+ #---------------------------concatenate text with tags ---------------------------
314
+
315
+ # Map Intensity numbers to tag strings
316
+ intensity_map = {
317
+ 1: "low",
318
+ 2: "mid",
319
+ 3: "high"
320
+ }
321
+
322
+ # Map Emotion enum names to lowercase tag strings
323
+ emotion_map = {
324
+ "HAPPINESS": "happiness",
325
+ "SADNESS": "sadness",
326
+ "FEAR": "fear",
327
+ "ANGER": "anger",
328
+ "SURPRISE": "surprise",
329
+ "WHISPER": "whisper",
330
+ "NARRATION": "narration"
331
+ }
332
+
333
+ def generate_tagged_text(text: str, emotion_enum: str, intensity_enum: int) -> str:
334
+ """
335
+ Convert enums to <emo_x> <int_y> format and concatenate with text
336
+ """
337
+ emo_tag = f"<emo_{emotion_map[emotion_enum]}>"
338
+ int_tag = f"<int_{intensity_map[intensity_enum]}>"
339
+ return f"{emo_tag} {int_tag} {text}"
340
+
341
+ #-----------------------------------------------------------
342
 
343
  @app.post("/tts/")
344
  async def process_story(story: StoryCreationDTO):
 
370
 
371
  return response
372
 
373
+ #----------------------------Test------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
  @app.post("/tts_test/")
376
  async def tts_endpoint(