Rajhuggingface4253 commited on
Commit
c0df123
·
verified ·
1 Parent(s): 6b2b49d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -33
app.py CHANGED
@@ -391,54 +391,49 @@ async def text_to_speech(
391
  async def stream_text_to_speech_cloning(
392
  text: str = Form(..., min_length=1, max_length=5000),
393
  reference_text: str = Form(...),
394
- speed: float = Form(1.0, ge=0.5, le=2.0),
395
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
396
- reference_audio: UploadFile = File(...)):
 
397
  """
398
- Sentence-by-Sentence Streaming Endpoint.
399
- Fixes race condition by moving cleanup into the streaming generator.
400
  """
401
  if not hasattr(app.state, 'tts_wrapper'):
402
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
403
-
404
- # 1. Asynchronously save reference audio (non-blocking)
405
  temp_ref_path = await save_upload_file_async(reference_audio)
406
- converted_wav_path = None # Initialize for cleanup
407
-
408
  try:
409
- # 2. Convert the uploaded file (WebM, etc.) to a 24kHz WAV file
410
  converted_wav_path = await run_blocking_task_async(
411
- convert_to_wav_blocking,
412
- temp_ref_path
413
  )
414
-
415
- # 2.5. CLEANUP ORIGINAL FILE IMMEDIATELY: It is no longer needed after conversion
416
  if os.path.exists(temp_ref_path):
417
  os.unlink(temp_ref_path)
418
-
419
- # 3. Define the generator function, which will run in the thread pool
420
- def stream_generator(path_to_delete: str):
421
  try:
422
- # This logic uses the path_to_delete parameter, which is guaranteed to exist
423
- for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
424
- text,
425
- path_to_delete, # Pass the CONVERTED WAV path
426
- reference_text,
427
- speed,
428
- output_format
429
  ):
430
- yield chunk_bytes
 
 
 
 
431
  except Exception as e:
432
- # Log the error and raise it to stop the stream
433
  logger.error(f"Streaming generator error: {e}")
434
- raise # Re-raise to ensure the stream terminates
435
  finally:
436
- # 4. **CRUCIAL FIX:** Clean up the converted file ONLY AFTER GENERATION IS DONE
437
  if os.path.exists(path_to_delete):
438
  os.unlink(path_to_delete)
439
  logger.info(f"Cleaned up converted file: {path_to_delete}")
440
 
441
- # Return StreamingResponse, passing the path to the generator
442
  return StreamingResponse(
443
  stream_generator(converted_wav_path),
444
  media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
@@ -449,20 +444,17 @@ async def stream_text_to_speech_cloning(
449
  "X-Accel-Buffering": "no"
450
  }
451
  )
452
-
453
  except Exception as e:
454
  logger.error(f"Streaming setup error: {e}")
455
- # Clean up files only if the setup failed *before* starting the generator
456
  if os.path.exists(temp_ref_path):
457
  os.unlink(temp_ref_path)
458
  if converted_wav_path and os.path.exists(converted_wav_path):
459
  os.unlink(converted_wav_path)
460
-
461
- # Reraise HTTPExceptions that may have come from the conversion step
462
  if isinstance(e, HTTPException):
463
  raise
464
  raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
465
- # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
466
 
467
  @app.get("/audio/{filename}")
468
  async def get_audio(filename: str):
 
391
  async def stream_text_to_speech_cloning(
392
  text: str = Form(..., min_length=1, max_length=5000),
393
  reference_text: str = Form(...),
394
+ speed: float = Form(1.0, ge=0.5, le=2.0), # Kept for API compatibility, not used in this logic
395
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
396
+ reference_audio: UploadFile = File(...)
397
+ ):
398
  """
399
+ High-performance parallel streaming endpoint using the local wrapper.
 
400
  """
401
  if not hasattr(app.state, 'tts_wrapper'):
402
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
403
+
 
404
  temp_ref_path = await save_upload_file_async(reference_audio)
405
+ converted_wav_path = None
406
+
407
  try:
 
408
  converted_wav_path = await run_blocking_task_async(
409
+ convert_to_wav_blocking, temp_ref_path
 
410
  )
411
+
 
412
  if os.path.exists(temp_ref_path):
413
  os.unlink(temp_ref_path)
414
+
415
+ async def stream_generator(path_to_delete: str):
 
416
  try:
417
+ # This now calls our new wrapper's parallel streaming method
418
+ async for audio_chunk in app.state.tts_wrapper.stream_speech_parallel(
419
+ text=text,
420
+ ref_audio_path=path_to_delete,
421
+ ref_text=reference_text,
422
+ executor=tts_executor
 
423
  ):
424
+ audio_buffer = io.BytesIO()
425
+ sf.write(audio_buffer, audio_chunk, SAMPLE_RATE, format=output_format)
426
+ audio_buffer.seek(0)
427
+ yield audio_buffer.read()
428
+
429
  except Exception as e:
 
430
  logger.error(f"Streaming generator error: {e}")
431
+ raise
432
  finally:
 
433
  if os.path.exists(path_to_delete):
434
  os.unlink(path_to_delete)
435
  logger.info(f"Cleaned up converted file: {path_to_delete}")
436
 
 
437
  return StreamingResponse(
438
  stream_generator(converted_wav_path),
439
  media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
 
444
  "X-Accel-Buffering": "no"
445
  }
446
  )
447
+
448
  except Exception as e:
449
  logger.error(f"Streaming setup error: {e}")
 
450
  if os.path.exists(temp_ref_path):
451
  os.unlink(temp_ref_path)
452
  if converted_wav_path and os.path.exists(converted_wav_path):
453
  os.unlink(converted_wav_path)
454
+
 
455
  if isinstance(e, HTTPException):
456
  raise
457
  raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
 
458
 
459
  @app.get("/audio/{filename}")
460
  async def get_audio(filename: str):