MariaKaiser commited on
Commit
b6ac661
·
verified ·
1 Parent(s): 1806c12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -34
app.py CHANGED
@@ -133,8 +133,6 @@ import asyncio
133
  # return None
134
 
135
 
136
- download_cache = {}
137
-
138
  async def download_scene_files(scene: SceneDto):
139
  tasks = []
140
 
@@ -154,6 +152,8 @@ async def download_scene_files(scene: SceneDto):
154
  downloaded_files = await asyncio.gather(*tasks)
155
  return downloaded_files
156
 
 
 
157
  async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
158
  """
159
  Downloads a file from a URL and returns the path to a temporary file.
@@ -187,6 +187,55 @@ async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0)
187
  #print(f"All {retries} attempts failed for {url}, skipping...")
188
  return None
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  #-----------------------------------------------------------
191
 
192
  #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
@@ -211,55 +260,152 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
211
 
212
  #_______________generate audios and folder structure_______________________
213
 
214
- async def generate_story_audios(story: StoryCreationDTO, base_output: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  """
216
- Generates audio files and folders for the entire story
 
217
  """
218
  story_dir = Path(base_output) / story.storyId
219
  story_dir.mkdir(parents=True, exist_ok=True)
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  for chapter in story.chapters:
222
  chapter_dir = story_dir / chapter.chapterId
223
  chapter_dir.mkdir(exist_ok=True)
 
 
 
 
224
 
225
- # --- Chapter title audio ---
226
- prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
227
  title_save_path = chapter_dir / "title.wav"
228
-
229
- tagged_text_title = generate_tagged_text(
230
- chapter.title.sentence,
231
- chapter.title.emotion,
232
- chapter.title.intensity
233
  )
234
-
235
- title_generated_audio_path = inference_by_model(
236
- text=tagged_text_title,
237
- audio_file=prosody_file_title,
238
- save_path=title_save_path
 
 
 
 
239
  )
240
- # os.remove(prosody_file_title)
241
 
 
 
242
  for scene in chapter.scenes:
 
243
  await download_scene_files(scene)
244
- scene_dir = chapter_dir / scene.sceneId
245
- scene_dir.mkdir(exist_ok=True)
246
 
247
- # --- Sentences audio ---
248
  for sentence in scene.sentences:
249
- # Download the prosody reference audio from Supabase
250
- prosody_file = download_cache[sentence.prosodyReference]
251
- sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
252
- tagged_text = generate_tagged_text(
253
- sentence.sentence,
254
- sentence.emotion,
255
- sentence.intensity
256
- )
257
- sentence_generated_audio_path = inference_by_model(
258
- text=tagged_text,
259
- audio_file=prosody_file,
260
- save_path=sentence_save_path
261
- )
262
- # os.remove(prosody_file)
263
 
264
  #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
265
 
 
133
  # return None
134
 
135
 
 
 
136
  async def download_scene_files(scene: SceneDto):
137
  tasks = []
138
 
 
152
  downloaded_files = await asyncio.gather(*tasks)
153
  return downloaded_files
154
 
155
+ download_cache = {} # in-memory map: url -> local file
156
+
157
  async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
158
  """
159
  Downloads a file from a URL and returns the path to a temporary file.
 
187
  #print(f"All {retries} attempts failed for {url}, skipping...")
188
  return None
189
 
190
+ # import os
191
+ # import httpx
192
+ # import asyncio
193
+
194
+ # CACHE_DIR = "audio_cache"
195
+ # os.makedirs(CACHE_DIR, exist_ok=True) # create if not exists folder stores permanently on disk
196
+
197
+
198
+
199
+ # async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
200
+ # """
201
+ # Downloads a file from a URL and stores it in a permanent cache folder.
202
+ # Returns the local file path. Reuses already downloaded files.
203
+ # """
204
+ # if url in download_cache:
205
+ # #print(f"{url} is in download cache")
206
+ # return download_cache[url]
207
+
208
+ # # determine local file path in cache folder
209
+ # filename = url.split("/")[-1] # simple filename from URL
210
+ # local_path = os.path.join(CACHE_DIR, filename)
211
+
212
+ # # check if file already exists on disk
213
+ # if os.path.exists(local_path):
214
+ # #print(f"{url} is in disk and put to download cache now")
215
+ # download_cache[url] = local_path
216
+ # return local_path
217
+
218
+ # # download if not cached
219
+ # for attempt in range(1, retries + 1):
220
+ # try:
221
+ # async with httpx.AsyncClient(timeout=60.0) as client:
222
+ # response = await client.get(url)
223
+ # response.raise_for_status()
224
+
225
+ # # save to permanent cache folder
226
+ # with open(local_path, "wb") as f:
227
+ # f.write(response.content)
228
+
229
+ # download_cache[url] = local_path
230
+ # #print(f"{url} is downloaded from supabase and stored in disk and download cache now")
231
+ # return local_path
232
+
233
+ # except Exception as e:
234
+ # if attempt < retries:
235
+ # await asyncio.sleep(delay)
236
+
237
+ # return None
238
+
239
  #-----------------------------------------------------------
240
 
241
  #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
 
260
 
261
  #_______________generate audios and folder structure_______________________
262
 
263
+ # async def generate_story_audios(story: StoryCreationDTO, base_output: str):
264
+ # """
265
+ # Generates audio files and folders for the entire story
266
+ # """
267
+ # story_dir = Path(base_output) / story.storyId
268
+ # story_dir.mkdir(parents=True, exist_ok=True)
269
+
270
+ # for chapter in story.chapters:
271
+ # chapter_dir = story_dir / chapter.chapterId
272
+ # chapter_dir.mkdir(exist_ok=True)
273
+
274
+ # # --- Chapter title audio ---
275
+ # prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
276
+ # title_save_path = chapter_dir / "title.wav"
277
+
278
+ # tagged_text_title = generate_tagged_text(
279
+ # chapter.title.sentence,
280
+ # chapter.title.emotion,
281
+ # chapter.title.intensity
282
+ # )
283
+
284
+ # title_generated_audio_path = inference_by_model(
285
+ # text=tagged_text_title,
286
+ # audio_file=prosody_file_title,
287
+ # save_path=title_save_path
288
+ # )
289
+ # # os.remove(prosody_file_title)
290
+
291
+ # for scene in chapter.scenes:
292
+ # await download_scene_files(scene)
293
+ # scene_dir = chapter_dir / scene.sceneId
294
+ # scene_dir.mkdir(exist_ok=True)
295
+
296
+ # # --- Sentences audio ---
297
+ # for sentence in scene.sentences:
298
+ # # Download the prosody reference audio from Supabase
299
+ # prosody_file = download_cache[sentence.prosodyReference]
300
+ # sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
301
+ # tagged_text = generate_tagged_text(
302
+ # sentence.sentence,
303
+ # sentence.emotion,
304
+ # sentence.intensity
305
+ # )
306
+ # sentence_generated_audio_path = inference_by_model(
307
+ # text=tagged_text,
308
+ # audio_file=prosody_file,
309
+ # save_path=sentence_save_path
310
+ # )
311
+ # # os.remove(prosody_file)
312
+
313
+ import asyncio
314
+ from pathlib import Path
315
+
316
+ async def generate_story_audios_async(story: StoryCreationDTO, base_output: str, max_concurrent_gpu: int = 1):
317
  """
318
+ Generates audio files for the story while overlapping GPU inference and disk writes.
319
+ max_concurrent_gpu: semaphore to limit simultaneous GPU usage (1 if GPU is the bottleneck)
320
  """
321
  story_dir = Path(base_output) / story.storyId
322
  story_dir.mkdir(parents=True, exist_ok=True)
323
 
324
+ print(f"[INFO] Generating story '{story.storyId}' in {story_dir}")
325
+
326
+ # Semaphore ensures we don't overload GPU
327
+ gpu_semaphore = asyncio.Semaphore(max_concurrent_gpu)
328
+
329
+ async def process_sentence(chapter_dir: Path, scene: SceneDto, sentence: SentenceDto):
330
+ print(f"[INFO] Starting sentence '{sentence.sentenceId}' in scene '{scene.sceneId}'")
331
+ async with gpu_semaphore:
332
+ print(f"[GPU] Acquired GPU for sentence '{sentence.sentenceId}'")
333
+ # Get prosody file from cache
334
+ prosody_file = download_cache.get(sentence.prosodyReference)
335
+ if not prosody_file:
336
+ print(f"[WARN] Prosody file for '{sentence.sentenceId}' not found in cache")
337
+ return None
338
+
339
+ sentence_save_path = chapter_dir / scene.sceneId / f"{sentence.sentenceId}.wav"
340
+ Path(sentence_save_path).parent.mkdir(parents=True, exist_ok=True)
341
+
342
+ tagged_text = generate_tagged_text(
343
+ sentence.sentence,
344
+ sentence.emotion,
345
+ sentence.intensity
346
+ )
347
+
348
+ # Run GPU inference in a thread pool to avoid blocking event loop
349
+ loop = asyncio.get_event_loop()
350
+ generated_path = await loop.run_in_executor(
351
+ None,
352
+ inference_by_model,
353
+ tagged_text,
354
+ prosody_file,
355
+ str(sentence_save_path)
356
+ )
357
+ print(f"[DONE] Generated audio for sentence '{sentence.sentenceId}' -> {generated_path}")
358
+ return generated_path
359
+
360
+ # Prepare tasks for chapters
361
+ chapter_tasks = []
362
  for chapter in story.chapters:
363
  chapter_dir = story_dir / chapter.chapterId
364
  chapter_dir.mkdir(exist_ok=True)
365
+ print(f"[INFO] Processing chapter '{chapter.chapterId}'")
366
+
367
+ # --- Chapter title ---
368
+ title_prosody = await download_file_from_url(chapter.title.prosodyReference)
369
 
 
 
370
  title_save_path = chapter_dir / "title.wav"
371
+ tagged_title = generate_tagged_text(
372
+ chapter.title.sentence,
373
+ chapter.title.emotion,
374
+ chapter.title.intensity
 
375
  )
376
+
377
+ print(f"[GPU] Generating title audio for chapter '{chapter.chapterId}'")
378
+ loop = asyncio.get_event_loop()
379
+ await loop.run_in_executor(
380
+ None,
381
+ inference_by_model,
382
+ tagged_title,
383
+ title_prosody,
384
+ str(title_save_path)
385
  )
386
+ print(f"[DONE] Generated title audio for chapter '{chapter.chapterId}' -> {title_save_path}")
387
 
388
+ # --- Scenes ---
389
+ scene_tasks = []
390
  for scene in chapter.scenes:
391
+ print(f"[INFO] Downloading files for scene '{scene.sceneId}'")
392
  await download_scene_files(scene)
 
 
393
 
 
394
  for sentence in scene.sentences:
395
+ scene_tasks.append(process_sentence(chapter_dir, scene, sentence))
396
+
397
+ if scene_tasks:
398
+ print(f"[INFO] Running {len(scene_tasks)} sentences for chapter '{chapter.chapterId}' concurrently")
399
+ chapter_tasks.append(asyncio.gather(*scene_tasks))
400
+ else:
401
+ print(f"[WARN] No sentences found in chapter '{chapter.chapterId}'")
402
+
403
+ # Wait for all chapters to complete
404
+ if chapter_tasks:
405
+ await asyncio.gather(*chapter_tasks)
406
+ print(f"[INFO] Completed generating all chapters for story '{story.storyId}'")
407
+ else:
408
+ print(f"[WARN] No chapters/tasks to process for story '{story.storyId}'")
409
 
410
  #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
411