MariaKaiser commited on
Commit
45f8e2f
·
verified ·
1 Parent(s): 39d8f28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -32
app.py CHANGED
@@ -260,55 +260,135 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
260
 
261
  #_______________generate audios and folder structure_______________________
262
 
263
- async def generate_story_audios(story: StoryCreationDTO, base_output: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  """
265
- Generates audio files and folders for the entire story
 
266
  """
267
  story_dir = Path(base_output) / story.storyId
268
  story_dir.mkdir(parents=True, exist_ok=True)
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  for chapter in story.chapters:
271
  chapter_dir = story_dir / chapter.chapterId
272
  chapter_dir.mkdir(exist_ok=True)
273
 
274
- # --- Chapter title audio ---
275
- prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
276
  title_save_path = chapter_dir / "title.wav"
277
-
278
- tagged_text_title = generate_tagged_text(
279
- chapter.title.sentence,
280
- chapter.title.emotion,
281
- chapter.title.intensity
282
  )
283
-
284
- title_generated_audio_path = inference_by_model(
285
- text=tagged_text_title,
286
- audio_file=prosody_file_title,
287
- save_path=title_save_path
 
 
 
 
288
  )
289
- # os.remove(prosody_file_title)
290
 
 
 
291
  for scene in chapter.scenes:
 
292
  await download_scene_files(scene)
293
- scene_dir = chapter_dir / scene.sceneId
294
- scene_dir.mkdir(exist_ok=True)
295
 
296
- # --- Sentences audio ---
297
  for sentence in scene.sentences:
298
- # Download the prosody reference audio from Supabase
299
- prosody_file = download_cache[sentence.prosodyReference]
300
- sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
301
- tagged_text = generate_tagged_text(
302
- sentence.sentence,
303
- sentence.emotion,
304
- sentence.intensity
305
- )
306
- sentence_generated_audio_path = inference_by_model(
307
- text=tagged_text,
308
- audio_file=prosody_file,
309
- save_path=sentence_save_path
310
- )
311
- # os.remove(prosody_file)
312
 
313
  #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
314
 
 
260
 
261
  #_______________generate audios and folder structure_______________________
262
 
263
+ # async def generate_story_audios(story: StoryCreationDTO, base_output: str):
264
+ # """
265
+ # Generates audio files and folders for the entire story
266
+ # """
267
+ # story_dir = Path(base_output) / story.storyId
268
+ # story_dir.mkdir(parents=True, exist_ok=True)
269
+
270
+ # for chapter in story.chapters:
271
+ # chapter_dir = story_dir / chapter.chapterId
272
+ # chapter_dir.mkdir(exist_ok=True)
273
+
274
+ # # --- Chapter title audio ---
275
+ # prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
276
+ # title_save_path = chapter_dir / "title.wav"
277
+
278
+ # tagged_text_title = generate_tagged_text(
279
+ # chapter.title.sentence,
280
+ # chapter.title.emotion,
281
+ # chapter.title.intensity
282
+ # )
283
+
284
+ # title_generated_audio_path = inference_by_model(
285
+ # text=tagged_text_title,
286
+ # audio_file=prosody_file_title,
287
+ # save_path=title_save_path
288
+ # )
289
+ # # os.remove(prosody_file_title)
290
+
291
+ # for scene in chapter.scenes:
292
+ # await download_scene_files(scene)
293
+ # scene_dir = chapter_dir / scene.sceneId
294
+ # scene_dir.mkdir(exist_ok=True)
295
+
296
+ # # --- Sentences audio ---
297
+ # for sentence in scene.sentences:
298
+ # # Download the prosody reference audio from Supabase
299
+ # prosody_file = download_cache[sentence.prosodyReference]
300
+ # sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
301
+ # tagged_text = generate_tagged_text(
302
+ # sentence.sentence,
303
+ # sentence.emotion,
304
+ # sentence.intensity
305
+ # )
306
+ # sentence_generated_audio_path = inference_by_model(
307
+ # text=tagged_text,
308
+ # audio_file=prosody_file,
309
+ # save_path=sentence_save_path
310
+ # )
311
+ # # os.remove(prosody_file)
312
+
313
+ import asyncio
314
+ from pathlib import Path
315
+
316
+ async def generate_story_audios_async(story: StoryCreationDTO, base_output: str, max_concurrent_gpu: int = 1):
317
  """
318
+ Generates audio files for the story while overlapping GPU inference and disk writes.
319
+ max_concurrent_gpu: semaphore to limit simultaneous GPU usage (1 if GPU is the bottleneck)
320
  """
321
  story_dir = Path(base_output) / story.storyId
322
  story_dir.mkdir(parents=True, exist_ok=True)
323
 
324
+ # Semaphore ensures we don't overload GPU
325
+ gpu_semaphore = asyncio.Semaphore(max_concurrent_gpu)
326
+
327
+ async def process_sentence(chapter_dir: Path, scene: SceneDto, sentence: SentenceDto):
328
+ async with gpu_semaphore:
329
+ # Get prosody file from cache
330
+ prosody_file = download_cache[sentence.prosodyReference]
331
+
332
+ sentence_save_path = chapter_dir / scene.sceneId / f"{sentence.sentenceId}.wav"
333
+ Path(sentence_save_path).parent.mkdir(parents=True, exist_ok=True)
334
+
335
+ tagged_text = generate_tagged_text(
336
+ sentence.sentence,
337
+ sentence.emotion,
338
+ sentence.intensity
339
+ )
340
+
341
+ # Run GPU inference in a thread pool to avoid blocking event loop
342
+ loop = asyncio.get_event_loop()
343
+ generated_path = await loop.run_in_executor(
344
+ None,
345
+ inference_by_model,
346
+ tagged_text,
347
+ prosody_file,
348
+ str(sentence_save_path)
349
+ )
350
+
351
+ return generated_path
352
+
353
+ # Prepare tasks for chapters
354
+ chapter_tasks = []
355
  for chapter in story.chapters:
356
  chapter_dir = story_dir / chapter.chapterId
357
  chapter_dir.mkdir(exist_ok=True)
358
 
359
+ # --- Chapter title ---
360
+ title_prosody = download_cache[chapter.title.prosodyReference]
361
  title_save_path = chapter_dir / "title.wav"
362
+ tagged_title = generate_tagged_text(
363
+ chapter.title.sentence,
364
+ chapter.title.emotion,
365
+ chapter.title.intensity
 
366
  )
367
+
368
+ # Run title generation immediately
369
+ loop = asyncio.get_event_loop()
370
+ await loop.run_in_executor(
371
+ None,
372
+ inference_by_model,
373
+ tagged_title,
374
+ title_prosody,
375
+ str(title_save_path)
376
  )
 
377
 
378
+ # --- Scenes ---
379
+ scene_tasks = []
380
  for scene in chapter.scenes:
381
+ # Download scene files (prosody, sfx, bg) concurrently
382
  await download_scene_files(scene)
 
 
383
 
 
384
  for sentence in scene.sentences:
385
+ scene_tasks.append(process_sentence(chapter_dir, scene, sentence))
386
+
387
+ # Run all sentences in this chapter concurrently but limited by GPU semaphore
388
+ chapter_tasks.append(asyncio.gather(*scene_tasks))
389
+
390
+ # Wait for all chapters to complete
391
+ await asyncio.gather(*chapter_tasks)
 
 
 
 
 
 
 
392
 
393
  #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
394