Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -376,41 +376,51 @@ async def stream_text_to_speech_cloning(
|
|
| 376 |
output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
|
| 377 |
reference_audio: UploadFile = File(...)):
|
| 378 |
"""
|
| 379 |
-
Sentence-by-Sentence Streaming Endpoint
|
| 380 |
-
|
| 381 |
"""
|
| 382 |
if not hasattr(app.state, 'tts_wrapper'):
|
| 383 |
raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
|
| 384 |
|
| 385 |
# 1. Asynchronously save reference audio (non-blocking)
|
| 386 |
temp_ref_path = await save_upload_file_async(reference_audio)
|
| 387 |
-
converted_wav_path = None #
|
| 388 |
|
| 389 |
try:
|
| 390 |
-
# 2.
|
| 391 |
converted_wav_path = await run_blocking_task_async(
|
| 392 |
convert_to_wav_blocking,
|
| 393 |
temp_ref_path
|
| 394 |
)
|
| 395 |
|
| 396 |
-
#
|
| 397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
try:
|
| 399 |
-
#
|
| 400 |
for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
|
| 401 |
text,
|
| 402 |
-
|
| 403 |
speed,
|
| 404 |
output_format
|
| 405 |
):
|
| 406 |
yield chunk_bytes
|
| 407 |
except Exception as e:
|
|
|
|
| 408 |
logger.error(f"Streaming generator error: {e}")
|
| 409 |
-
#
|
| 410 |
-
|
| 411 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
return StreamingResponse(
|
| 413 |
-
stream_generator(),
|
| 414 |
media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
|
| 415 |
headers={
|
| 416 |
"Content-Disposition": "attachment; filename=tts_live_stream.mp3",
|
|
@@ -418,18 +428,20 @@ async def stream_text_to_speech_cloning(
|
|
| 418 |
"Cache-Control": "no-cache"
|
| 419 |
}
|
| 420 |
)
|
|
|
|
| 421 |
except Exception as e:
|
| 422 |
logger.error(f"Streaming setup error: {e}")
|
| 423 |
-
#
|
| 424 |
-
if isinstance(e, HTTPException):
|
| 425 |
-
raise
|
| 426 |
-
raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
|
| 427 |
-
finally:
|
| 428 |
-
# 4. Clean up BOTH the original file AND the converted WAV file
|
| 429 |
if os.path.exists(temp_ref_path):
|
| 430 |
os.unlink(temp_ref_path)
|
| 431 |
if converted_wav_path and os.path.exists(converted_wav_path):
|
| 432 |
os.unlink(converted_wav_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
@app.get("/audio/{filename}")
|
| 435 |
async def get_audio(filename: str):
|
|
|
|
| 376 |
output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
|
| 377 |
reference_audio: UploadFile = File(...)):
|
| 378 |
"""
|
| 379 |
+
Sentence-by-Sentence Streaming Endpoint.
|
| 380 |
+
Fixes race condition by moving cleanup into the streaming generator.
|
| 381 |
"""
|
| 382 |
if not hasattr(app.state, 'tts_wrapper'):
|
| 383 |
raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
|
| 384 |
|
| 385 |
# 1. Asynchronously save reference audio (non-blocking)
|
| 386 |
temp_ref_path = await save_upload_file_async(reference_audio)
|
| 387 |
+
converted_wav_path = None # Initialize for cleanup
|
| 388 |
|
| 389 |
try:
|
| 390 |
+
# 2. Convert the uploaded file (WebM, etc.) to a 24kHz WAV file
|
| 391 |
converted_wav_path = await run_blocking_task_async(
|
| 392 |
convert_to_wav_blocking,
|
| 393 |
temp_ref_path
|
| 394 |
)
|
| 395 |
|
| 396 |
+
# 2.5. CLEANUP ORIGINAL FILE IMMEDIATELY: It is no longer needed after conversion
|
| 397 |
+
if os.path.exists(temp_ref_path):
|
| 398 |
+
os.unlink(temp_ref_path)
|
| 399 |
+
|
| 400 |
+
# 3. Define the generator function, which will run in the thread pool
|
| 401 |
+
def stream_generator(path_to_delete: str):
|
| 402 |
try:
|
| 403 |
+
# This logic uses the path_to_delete parameter, which is guaranteed to exist
|
| 404 |
for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
|
| 405 |
text,
|
| 406 |
+
path_to_delete, # Pass the CONVERTED WAV path
|
| 407 |
speed,
|
| 408 |
output_format
|
| 409 |
):
|
| 410 |
yield chunk_bytes
|
| 411 |
except Exception as e:
|
| 412 |
+
# Log the error and raise it to stop the stream
|
| 413 |
logger.error(f"Streaming generator error: {e}")
|
| 414 |
+
raise # Re-raise to ensure the stream terminates
|
| 415 |
+
finally:
|
| 416 |
+
# 4. **CRUCIAL FIX:** Clean up the converted file ONLY AFTER GENERATION IS DONE
|
| 417 |
+
if os.path.exists(path_to_delete):
|
| 418 |
+
os.unlink(path_to_delete)
|
| 419 |
+
logger.info(f"Cleaned up converted file: {path_to_delete}")
|
| 420 |
+
|
| 421 |
+
# Return StreamingResponse, passing the path to the generator
|
| 422 |
return StreamingResponse(
|
| 423 |
+
stream_generator(converted_wav_path),
|
| 424 |
media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
|
| 425 |
headers={
|
| 426 |
"Content-Disposition": "attachment; filename=tts_live_stream.mp3",
|
|
|
|
| 428 |
"Cache-Control": "no-cache"
|
| 429 |
}
|
| 430 |
)
|
| 431 |
+
|
| 432 |
except Exception as e:
|
| 433 |
logger.error(f"Streaming setup error: {e}")
|
| 434 |
+
# Clean up files only if the setup failed *before* starting the generator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
if os.path.exists(temp_ref_path):
|
| 436 |
os.unlink(temp_ref_path)
|
| 437 |
if converted_wav_path and os.path.exists(converted_wav_path):
|
| 438 |
os.unlink(converted_wav_path)
|
| 439 |
+
|
| 440 |
+
# Reraise HTTPExceptions that may have come from the conversion step
|
| 441 |
+
if isinstance(e, HTTPException):
|
| 442 |
+
raise
|
| 443 |
+
raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
|
| 444 |
+
# Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
|
| 445 |
|
| 446 |
@app.get("/audio/{filename}")
|
| 447 |
async def get_audio(filename: str):
|