MariaKaiser commited on
Commit
343b576
·
verified ·
1 Parent(s): 5554813

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -180
app.py CHANGED
@@ -133,6 +133,8 @@ import asyncio
133
  # return None
134
 
135
 
 
 
136
  async def download_scene_files(scene: SceneDto):
137
  tasks = []
138
 
@@ -152,8 +154,6 @@ async def download_scene_files(scene: SceneDto):
152
  downloaded_files = await asyncio.gather(*tasks)
153
  return downloaded_files
154
 
155
- download_cache = {} # in-memory map: url -> local file
156
-
157
  async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
158
  """
159
  Downloads a file from a URL and returns the path to a temporary file.
@@ -187,55 +187,6 @@ async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0)
187
  #print(f"All {retries} attempts failed for {url}, skipping...")
188
  return None
189
 
190
- # import os
191
- # import httpx
192
- # import asyncio
193
-
194
- # CACHE_DIR = "audio_cache"
195
- # os.makedirs(CACHE_DIR, exist_ok=True) # create if not exists folder stores permanently on disk
196
-
197
-
198
-
199
- # async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
200
- # """
201
- # Downloads a file from a URL and stores it in a permanent cache folder.
202
- # Returns the local file path. Reuses already downloaded files.
203
- # """
204
- # if url in download_cache:
205
- # #print(f"{url} is in download cache")
206
- # return download_cache[url]
207
-
208
- # # determine local file path in cache folder
209
- # filename = url.split("/")[-1] # simple filename from URL
210
- # local_path = os.path.join(CACHE_DIR, filename)
211
-
212
- # # check if file already exists on disk
213
- # if os.path.exists(local_path):
214
- # #print(f"{url} is in disk and put to download cache now")
215
- # download_cache[url] = local_path
216
- # return local_path
217
-
218
- # # download if not cached
219
- # for attempt in range(1, retries + 1):
220
- # try:
221
- # async with httpx.AsyncClient(timeout=60.0) as client:
222
- # response = await client.get(url)
223
- # response.raise_for_status()
224
-
225
- # # save to permanent cache folder
226
- # with open(local_path, "wb") as f:
227
- # f.write(response.content)
228
-
229
- # download_cache[url] = local_path
230
- # #print(f"{url} is downloaded from supabase and stored in disk and download cache now")
231
- # return local_path
232
-
233
- # except Exception as e:
234
- # if attempt < retries:
235
- # await asyncio.sleep(delay)
236
-
237
- # return None
238
-
239
  #-----------------------------------------------------------
240
 
241
  #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
@@ -260,152 +211,55 @@ def inference_by_model(text: str, audio_file: str, save_path: str) -> str:
260
 
261
  #_______________generate audios and folder structure_______________________
262
 
263
- # async def generate_story_audios(story: StoryCreationDTO, base_output: str):
264
- # """
265
- # Generates audio files and folders for the entire story
266
- # """
267
- # story_dir = Path(base_output) / story.storyId
268
- # story_dir.mkdir(parents=True, exist_ok=True)
269
-
270
- # for chapter in story.chapters:
271
- # chapter_dir = story_dir / chapter.chapterId
272
- # chapter_dir.mkdir(exist_ok=True)
273
-
274
- # # --- Chapter title audio ---
275
- # prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
276
- # title_save_path = chapter_dir / "title.wav"
277
-
278
- # tagged_text_title = generate_tagged_text(
279
- # chapter.title.sentence,
280
- # chapter.title.emotion,
281
- # chapter.title.intensity
282
- # )
283
-
284
- # title_generated_audio_path = inference_by_model(
285
- # text=tagged_text_title,
286
- # audio_file=prosody_file_title,
287
- # save_path=title_save_path
288
- # )
289
- # # os.remove(prosody_file_title)
290
-
291
- # for scene in chapter.scenes:
292
- # await download_scene_files(scene)
293
- # scene_dir = chapter_dir / scene.sceneId
294
- # scene_dir.mkdir(exist_ok=True)
295
-
296
- # # --- Sentences audio ---
297
- # for sentence in scene.sentences:
298
- # # Download the prosody reference audio from Supabase
299
- # prosody_file = download_cache[sentence.prosodyReference]
300
- # sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
301
- # tagged_text = generate_tagged_text(
302
- # sentence.sentence,
303
- # sentence.emotion,
304
- # sentence.intensity
305
- # )
306
- # sentence_generated_audio_path = inference_by_model(
307
- # text=tagged_text,
308
- # audio_file=prosody_file,
309
- # save_path=sentence_save_path
310
- # )
311
- # # os.remove(prosody_file)
312
-
313
- import asyncio
314
- from pathlib import Path
315
-
316
- async def generate_story_audios(story: StoryCreationDTO, base_output: str, max_concurrent_gpu: int = 1):
317
  """
318
- Generates audio files for the story while overlapping GPU inference and disk writes.
319
- max_concurrent_gpu: semaphore to limit simultaneous GPU usage (1 if GPU is the bottleneck)
320
  """
321
  story_dir = Path(base_output) / story.storyId
322
  story_dir.mkdir(parents=True, exist_ok=True)
323
 
324
- #print(f"[INFO] Generating story '{story.storyId}' in {story_dir}")
325
-
326
- # Semaphore ensures we don't overload GPU
327
- gpu_semaphore = asyncio.Semaphore(max_concurrent_gpu)
328
-
329
- async def process_sentence(chapter_dir: Path, scene: SceneDto, sentence: SentenceDto):
330
- #print(f"[INFO] Starting sentence '{sentence.sentenceId}' in scene '{scene.sceneId}'")
331
- async with gpu_semaphore:
332
- #print(f"[GPU] Acquired GPU for sentence '{sentence.sentenceId}'")
333
- # Get prosody file from cache
334
- prosody_file = download_cache.get(sentence.prosodyReference)
335
- if not prosody_file:
336
- #print(f"[WARN] Prosody file for '{sentence.sentenceId}' not found in cache")
337
- return None
338
-
339
- sentence_save_path = chapter_dir / scene.sceneId / f"{sentence.sentenceId}.wav"
340
- Path(sentence_save_path).parent.mkdir(parents=True, exist_ok=True)
341
-
342
- tagged_text = generate_tagged_text(
343
- sentence.sentence,
344
- sentence.emotion,
345
- sentence.intensity
346
- )
347
-
348
- # Run GPU inference in a thread pool to avoid blocking event loop
349
- loop = asyncio.get_event_loop()
350
- generated_path = await loop.run_in_executor(
351
- None,
352
- inference_by_model,
353
- tagged_text,
354
- prosody_file,
355
- str(sentence_save_path)
356
- )
357
- #print(f"[DONE] Generated audio for sentence '{sentence.sentenceId}' -> {generated_path}")
358
- return generated_path
359
-
360
- # Prepare tasks for chapters
361
- chapter_tasks = []
362
  for chapter in story.chapters:
363
  chapter_dir = story_dir / chapter.chapterId
364
  chapter_dir.mkdir(exist_ok=True)
365
- print(f"[INFO] Processing chapter '{chapter.chapterId}'")
366
-
367
- # --- Chapter title ---
368
- title_prosody = await download_file_from_url(chapter.title.prosodyReference)
369
 
 
 
370
  title_save_path = chapter_dir / "title.wav"
371
- tagged_title = generate_tagged_text(
372
- chapter.title.sentence,
373
- chapter.title.emotion,
374
- chapter.title.intensity
 
375
  )
376
-
377
- #print(f"[GPU] Generating title audio for chapter '{chapter.chapterId}'")
378
- loop = asyncio.get_event_loop()
379
- await loop.run_in_executor(
380
- None,
381
- inference_by_model,
382
- tagged_title,
383
- title_prosody,
384
- str(title_save_path)
385
  )
386
- #print(f"[DONE] Generated title audio for chapter '{chapter.chapterId}' -> {title_save_path}")
387
 
388
- # --- Scenes ---
389
- scene_tasks = []
390
  for scene in chapter.scenes:
391
- #print(f"[INFO] Downloading files for scene '{scene.sceneId}'")
392
  await download_scene_files(scene)
 
 
393
 
 
394
  for sentence in scene.sentences:
395
- scene_tasks.append(process_sentence(chapter_dir, scene, sentence))
396
-
397
- if scene_tasks:
398
- #print(f"[INFO] Running {len(scene_tasks)} sentences for chapter '{chapter.chapterId}' concurrently")
399
- chapter_tasks.append(asyncio.gather(*scene_tasks))
400
- #else:
401
- #print(f"[WARN] No sentences found in chapter '{chapter.chapterId}'")
402
-
403
- # Wait for all chapters to complete
404
- if chapter_tasks:
405
- await asyncio.gather(*chapter_tasks)
406
- #print(f"[INFO] Completed generating all chapters for story '{story.storyId}'")
407
- #else:
408
- #print(f"[WARN] No chapters/tasks to process for story '{story.storyId}'")
409
 
410
  #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
411
 
 
133
  # return None
134
 
135
 
136
+ download_cache = {}
137
+
138
  async def download_scene_files(scene: SceneDto):
139
  tasks = []
140
 
 
154
  downloaded_files = await asyncio.gather(*tasks)
155
  return downloaded_files
156
 
 
 
157
  async def download_file_from_url(url: str, retries: int = 3, delay: float = 2.0) -> str | None:
158
  """
159
  Downloads a file from a URL and returns the path to a temporary file.
 
187
  #print(f"All {retries} attempts failed for {url}, skipping...")
188
  return None
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  #-----------------------------------------------------------
191
 
192
  #takes the text to be said and path to the prosody audio and path to save the generated audio and returns path to the generated audio
 
211
 
212
  #_______________generate audios and folder structure_______________________
213
 
214
+ async def generate_story_audios(story: StoryCreationDTO, base_output: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  """
216
+ Generates audio files and folders for the entire story
 
217
  """
218
  story_dir = Path(base_output) / story.storyId
219
  story_dir.mkdir(parents=True, exist_ok=True)
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  for chapter in story.chapters:
222
  chapter_dir = story_dir / chapter.chapterId
223
  chapter_dir.mkdir(exist_ok=True)
 
 
 
 
224
 
225
+ # --- Chapter title audio ---
226
+ prosody_file_title = await download_file_from_url(chapter.title.prosodyReference)
227
  title_save_path = chapter_dir / "title.wav"
228
+
229
+ tagged_text_title = generate_tagged_text(
230
+ chapter.title.sentence,
231
+ chapter.title.emotion,
232
+ chapter.title.intensity
233
  )
234
+
235
+ title_generated_audio_path = inference_by_model(
236
+ text=tagged_text_title,
237
+ audio_file=prosody_file_title,
238
+ save_path=title_save_path
 
 
 
 
239
  )
240
+ # os.remove(prosody_file_title)
241
 
 
 
242
  for scene in chapter.scenes:
 
243
  await download_scene_files(scene)
244
+ scene_dir = chapter_dir / scene.sceneId
245
+ scene_dir.mkdir(exist_ok=True)
246
 
247
+ # --- Sentences audio ---
248
  for sentence in scene.sentences:
249
+ # Download the prosody reference audio from Supabase
250
+ prosody_file = download_cache[sentence.prosodyReference]
251
+ sentence_save_path = scene_dir / f"{sentence.sentenceId}.wav"
252
+ tagged_text = generate_tagged_text(
253
+ sentence.sentence,
254
+ sentence.emotion,
255
+ sentence.intensity
256
+ )
257
+ sentence_generated_audio_path = inference_by_model(
258
+ text=tagged_text,
259
+ audio_file=prosody_file,
260
+ save_path=sentence_save_path
261
+ )
262
+ # os.remove(prosody_file)
263
 
264
  #_______________ Concatenating the generated audios to make the final story (post-processing)_______________________
265