Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -391,54 +391,49 @@ async def text_to_speech(
|
|
| 391 |
async def stream_text_to_speech_cloning(
|
| 392 |
text: str = Form(..., min_length=1, max_length=5000),
|
| 393 |
reference_text: str = Form(...),
|
| 394 |
-
speed: float = Form(1.0, ge=0.5, le=2.0),
|
| 395 |
output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
|
| 396 |
-
reference_audio: UploadFile = File(...)
|
|
|
|
| 397 |
"""
|
| 398 |
-
|
| 399 |
-
Fixes race condition by moving cleanup into the streaming generator.
|
| 400 |
"""
|
| 401 |
if not hasattr(app.state, 'tts_wrapper'):
|
| 402 |
raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
|
| 403 |
-
|
| 404 |
-
# 1. Asynchronously save reference audio (non-blocking)
|
| 405 |
temp_ref_path = await save_upload_file_async(reference_audio)
|
| 406 |
-
converted_wav_path = None
|
| 407 |
-
|
| 408 |
try:
|
| 409 |
-
# 2. Convert the uploaded file (WebM, etc.) to a 24kHz WAV file
|
| 410 |
converted_wav_path = await run_blocking_task_async(
|
| 411 |
-
convert_to_wav_blocking,
|
| 412 |
-
temp_ref_path
|
| 413 |
)
|
| 414 |
-
|
| 415 |
-
# 2.5. CLEANUP ORIGINAL FILE IMMEDIATELY: It is no longer needed after conversion
|
| 416 |
if os.path.exists(temp_ref_path):
|
| 417 |
os.unlink(temp_ref_path)
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
def stream_generator(path_to_delete: str):
|
| 421 |
try:
|
| 422 |
-
# This
|
| 423 |
-
for
|
| 424 |
-
text,
|
| 425 |
-
path_to_delete,
|
| 426 |
-
reference_text,
|
| 427 |
-
|
| 428 |
-
output_format
|
| 429 |
):
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
except Exception as e:
|
| 432 |
-
# Log the error and raise it to stop the stream
|
| 433 |
logger.error(f"Streaming generator error: {e}")
|
| 434 |
-
raise
|
| 435 |
finally:
|
| 436 |
-
# 4. **CRUCIAL FIX:** Clean up the converted file ONLY AFTER GENERATION IS DONE
|
| 437 |
if os.path.exists(path_to_delete):
|
| 438 |
os.unlink(path_to_delete)
|
| 439 |
logger.info(f"Cleaned up converted file: {path_to_delete}")
|
| 440 |
|
| 441 |
-
# Return StreamingResponse, passing the path to the generator
|
| 442 |
return StreamingResponse(
|
| 443 |
stream_generator(converted_wav_path),
|
| 444 |
media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
|
|
@@ -449,20 +444,17 @@ async def stream_text_to_speech_cloning(
|
|
| 449 |
"X-Accel-Buffering": "no"
|
| 450 |
}
|
| 451 |
)
|
| 452 |
-
|
| 453 |
except Exception as e:
|
| 454 |
logger.error(f"Streaming setup error: {e}")
|
| 455 |
-
# Clean up files only if the setup failed *before* starting the generator
|
| 456 |
if os.path.exists(temp_ref_path):
|
| 457 |
os.unlink(temp_ref_path)
|
| 458 |
if converted_wav_path and os.path.exists(converted_wav_path):
|
| 459 |
os.unlink(converted_wav_path)
|
| 460 |
-
|
| 461 |
-
# Reraise HTTPExceptions that may have come from the conversion step
|
| 462 |
if isinstance(e, HTTPException):
|
| 463 |
raise
|
| 464 |
raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
|
| 465 |
-
# Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
|
| 466 |
|
| 467 |
@app.get("/audio/{filename}")
|
| 468 |
async def get_audio(filename: str):
|
|
|
|
| 391 |
async def stream_text_to_speech_cloning(
|
| 392 |
text: str = Form(..., min_length=1, max_length=5000),
|
| 393 |
reference_text: str = Form(...),
|
| 394 |
+
speed: float = Form(1.0, ge=0.5, le=2.0), # Kept for API compatibility, not used in this logic
|
| 395 |
output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
|
| 396 |
+
reference_audio: UploadFile = File(...)
|
| 397 |
+
):
|
| 398 |
"""
|
| 399 |
+
High-performance parallel streaming endpoint using the local wrapper.
|
|
|
|
| 400 |
"""
|
| 401 |
if not hasattr(app.state, 'tts_wrapper'):
|
| 402 |
raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
|
| 403 |
+
|
|
|
|
| 404 |
temp_ref_path = await save_upload_file_async(reference_audio)
|
| 405 |
+
converted_wav_path = None
|
| 406 |
+
|
| 407 |
try:
|
|
|
|
| 408 |
converted_wav_path = await run_blocking_task_async(
|
| 409 |
+
convert_to_wav_blocking, temp_ref_path
|
|
|
|
| 410 |
)
|
| 411 |
+
|
|
|
|
| 412 |
if os.path.exists(temp_ref_path):
|
| 413 |
os.unlink(temp_ref_path)
|
| 414 |
+
|
| 415 |
+
async def stream_generator(path_to_delete: str):
|
|
|
|
| 416 |
try:
|
| 417 |
+
# This now calls our new wrapper's parallel streaming method
|
| 418 |
+
async for audio_chunk in app.state.tts_wrapper.stream_speech_parallel(
|
| 419 |
+
text=text,
|
| 420 |
+
ref_audio_path=path_to_delete,
|
| 421 |
+
ref_text=reference_text,
|
| 422 |
+
executor=tts_executor
|
|
|
|
| 423 |
):
|
| 424 |
+
audio_buffer = io.BytesIO()
|
| 425 |
+
sf.write(audio_buffer, audio_chunk, SAMPLE_RATE, format=output_format)
|
| 426 |
+
audio_buffer.seek(0)
|
| 427 |
+
yield audio_buffer.read()
|
| 428 |
+
|
| 429 |
except Exception as e:
|
|
|
|
| 430 |
logger.error(f"Streaming generator error: {e}")
|
| 431 |
+
raise
|
| 432 |
finally:
|
|
|
|
| 433 |
if os.path.exists(path_to_delete):
|
| 434 |
os.unlink(path_to_delete)
|
| 435 |
logger.info(f"Cleaned up converted file: {path_to_delete}")
|
| 436 |
|
|
|
|
| 437 |
return StreamingResponse(
|
| 438 |
stream_generator(converted_wav_path),
|
| 439 |
media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
|
|
|
|
| 444 |
"X-Accel-Buffering": "no"
|
| 445 |
}
|
| 446 |
)
|
| 447 |
+
|
| 448 |
except Exception as e:
|
| 449 |
logger.error(f"Streaming setup error: {e}")
|
|
|
|
| 450 |
if os.path.exists(temp_ref_path):
|
| 451 |
os.unlink(temp_ref_path)
|
| 452 |
if converted_wav_path and os.path.exists(converted_wav_path):
|
| 453 |
os.unlink(converted_wav_path)
|
| 454 |
+
|
|
|
|
| 455 |
if isinstance(e, HTTPException):
|
| 456 |
raise
|
| 457 |
raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
|
|
|
|
| 458 |
|
| 459 |
@app.get("/audio/{filename}")
|
| 460 |
async def get_audio(filename: str):
|