Rajhuggingface4253 commited on
Commit
1b19075
·
verified ·
1 Parent(s): b01e4fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -376,41 +376,51 @@ async def stream_text_to_speech_cloning(
376
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
377
  reference_audio: UploadFile = File(...)):
378
  """
379
- Sentence-by-Sentence Streaming Endpoint (Kokoro Feature adaptation).
380
- Includes FFmpeg conversion for uploaded audio format compatibility.
381
  """
382
  if not hasattr(app.state, 'tts_wrapper'):
383
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
384
 
385
  # 1. Asynchronously save reference audio (non-blocking)
386
  temp_ref_path = await save_upload_file_async(reference_audio)
387
- converted_wav_path = None # NEW: Initialize for cleanup
388
 
389
  try:
390
- # 2. **NEW STEP**: Convert the uploaded file (WebM, etc.) to a 24kHz WAV file using FFmpeg
391
  converted_wav_path = await run_blocking_task_async(
392
  convert_to_wav_blocking,
393
  temp_ref_path
394
  )
395
 
396
- # 3. Define the generator function, which will run in the thread pool implicitly
397
- def stream_generator():
 
 
 
 
398
  try:
399
- # The entire streaming process runs blocking inside the thread pool
400
  for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
401
  text,
402
- converted_wav_path, # IMPORTANT: Pass the CONVERTED WAV path
403
  speed,
404
  output_format
405
  ):
406
  yield chunk_bytes
407
  except Exception as e:
 
408
  logger.error(f"Streaming generator error: {e}")
409
- # Note: Cleanup for converted_wav_path is handled in the main finally block below.
410
-
411
- # The StreamingResponse is returned immediately to start the stream
 
 
 
 
 
412
  return StreamingResponse(
413
- stream_generator(),
414
  media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
415
  headers={
416
  "Content-Disposition": "attachment; filename=tts_live_stream.mp3",
@@ -418,18 +428,20 @@ async def stream_text_to_speech_cloning(
418
  "Cache-Control": "no-cache"
419
  }
420
  )
 
421
  except Exception as e:
422
  logger.error(f"Streaming setup error: {e}")
423
- # Reraise HTTPExceptions that may have come from the conversion step
424
- if isinstance(e, HTTPException):
425
- raise
426
- raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
427
- finally:
428
- # 4. Clean up BOTH the original file AND the converted WAV file
429
  if os.path.exists(temp_ref_path):
430
  os.unlink(temp_ref_path)
431
  if converted_wav_path and os.path.exists(converted_wav_path):
432
  os.unlink(converted_wav_path)
 
 
 
 
 
 
433
 
434
  @app.get("/audio/{filename}")
435
  async def get_audio(filename: str):
 
376
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
377
  reference_audio: UploadFile = File(...)):
378
  """
379
+ Sentence-by-Sentence Streaming Endpoint.
380
+ Fixes race condition by moving cleanup into the streaming generator.
381
  """
382
  if not hasattr(app.state, 'tts_wrapper'):
383
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
384
 
385
  # 1. Asynchronously save reference audio (non-blocking)
386
  temp_ref_path = await save_upload_file_async(reference_audio)
387
+ converted_wav_path = None # Initialize for cleanup
388
 
389
  try:
390
+ # 2. Convert the uploaded file (WebM, etc.) to a 24kHz WAV file
391
  converted_wav_path = await run_blocking_task_async(
392
  convert_to_wav_blocking,
393
  temp_ref_path
394
  )
395
 
396
+ # 2.5. CLEANUP ORIGINAL FILE IMMEDIATELY: It is no longer needed after conversion
397
+ if os.path.exists(temp_ref_path):
398
+ os.unlink(temp_ref_path)
399
+
400
+ # 3. Define the generator function, which will run in the thread pool
401
+ def stream_generator(path_to_delete: str):
402
  try:
403
+ # This logic uses the path_to_delete parameter, which is guaranteed to exist
404
  for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
405
  text,
406
+ path_to_delete, # Pass the CONVERTED WAV path
407
  speed,
408
  output_format
409
  ):
410
  yield chunk_bytes
411
  except Exception as e:
412
+ # Log the error and raise it to stop the stream
413
  logger.error(f"Streaming generator error: {e}")
414
+ raise # Re-raise to ensure the stream terminates
415
+ finally:
416
+ # 4. **CRUCIAL FIX:** Clean up the converted file ONLY AFTER GENERATION IS DONE
417
+ if os.path.exists(path_to_delete):
418
+ os.unlink(path_to_delete)
419
+ logger.info(f"Cleaned up converted file: {path_to_delete}")
420
+
421
+ # Return StreamingResponse, passing the path to the generator
422
  return StreamingResponse(
423
+ stream_generator(converted_wav_path),
424
  media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
425
  headers={
426
  "Content-Disposition": "attachment; filename=tts_live_stream.mp3",
 
428
  "Cache-Control": "no-cache"
429
  }
430
  )
431
+
432
  except Exception as e:
433
  logger.error(f"Streaming setup error: {e}")
434
+ # Clean up files only if the setup failed *before* starting the generator
 
 
 
 
 
435
  if os.path.exists(temp_ref_path):
436
  os.unlink(temp_ref_path)
437
  if converted_wav_path and os.path.exists(converted_wav_path):
438
  os.unlink(converted_wav_path)
439
+
440
+ # Reraise HTTPExceptions that may have come from the conversion step
441
+ if isinstance(e, HTTPException):
442
+ raise
443
+ raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
444
+ # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
445
 
446
  @app.get("/audio/{filename}")
447
  async def get_audio(filename: str):