Rajhuggingface4253 commited on
Commit
e94e39e
·
verified ·
1 Parent(s): b307da8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -31
app.py CHANGED
@@ -350,43 +350,79 @@ async def stream_text_to_speech_cloning(
350
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
351
  reference_audio: UploadFile = File(...)):
352
  """
353
- Sentence-by-Sentence Streaming with in-memory processing and caching.
 
354
  """
355
  if not hasattr(app.state, 'tts_wrapper'):
356
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
357
 
358
- try:
359
- # 1. Convert the uploaded file to WAV directly in memory
360
- converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
361
- ref_audio_bytes = converted_wav_buffer.getvalue()
362
-
363
- # 2. The generator now runs in the thread pool, using the audio bytes
364
- def stream_generator():
365
  try:
366
- for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
367
- text,
368
- ref_audio_bytes, # Pass bytes, not a path
369
- reference_text,
370
- speed,
371
- output_format
372
- ):
373
- yield chunk_bytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  except Exception as e:
375
- logger.error(f"Streaming generator error: {e}")
376
- # This ensures the stream terminates on an error
377
- raise
378
-
379
- # Return StreamingResponse with the generator
380
- return StreamingResponse(
381
- stream_generator(),
382
- media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
383
- )
384
-
385
- except Exception as e:
386
- logger.error(f"Streaming setup error: {e}")
387
- if isinstance(e, HTTPException):
388
- raise
389
- raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
391
 
392
  @app.get("/audio/{filename}")
 
350
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
351
  reference_audio: UploadFile = File(...)):
352
  """
353
+ Sentence-by-Sentence Streaming using a high-performance, asyncio-native
354
+ producer-consumer pipeline. This overlaps CPU-bound AI work with network I/O.
355
  """
356
  if not hasattr(app.state, 'tts_wrapper'):
357
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
358
 
359
+ # This async generator is the final, correct implementation.
360
+ async def stream_generator():
361
+ loop = asyncio.get_event_loop()
362
+ q = asyncio.Queue(maxsize=2)
363
+
364
+ # The PRODUCER is now an async task that runs in the background.
365
+ async def producer():
366
  try:
367
+ # The one-time setup cost: convert and encode the reference voice.
368
+ # This is done before the loop to ensure the voice is ready.
369
+ converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
370
+ ref_audio_bytes = converted_wav_buffer.getvalue()
371
+ audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
372
+ ref_s = await loop.run_in_executor(
373
+ tts_executor,
374
+ app.state.tts_wrapper._get_or_create_reference_encoding,
375
+ audio_hash,
376
+ ref_audio_bytes
377
+ )
378
+
379
+ sentences = app.state.tts_wrapper._split_text_into_chunks(text)
380
+
381
+ for sentence in sentences:
382
+ # Define the blocking work for a single chunk
383
+ def process_chunk():
384
+ with torch.no_grad():
385
+ audio_chunk = app.state.tts_wrapper.tts_model.infer(sentence, ref_s, reference_text)
386
+ return app.state.tts_wrapper._convert_to_streamable_format(audio_chunk, output_format)
387
+
388
+ # Offload the blocking work to the thread pool
389
+ mp3_bytes = await loop.run_in_executor(tts_executor, process_chunk)
390
+ # Put the finished MP3 chunk into the async queue
391
+ await q.put(mp3_bytes)
392
+
393
  except Exception as e:
394
+ logger.error(f"Error in producer task: {e}")
395
+ await q.put(e)
396
+ finally:
397
+ # Signal that production is finished
398
+ await q.put(None)
399
+
400
+ # Start the producer as a background task. It starts working immediately.
401
+ producer_task = asyncio.create_task(producer())
402
+
403
+ # The main loop now acts as the CONSUMER.
404
+ while True:
405
+ # Await the next finished MP3 chunk from the queue.
406
+ result = await q.get()
407
+
408
+ if result is None:
409
+ break
410
+
411
+ if isinstance(result, Exception):
412
+ logger.error(f"Terminating stream due to producer error: {result}")
413
+ raise result
414
+
415
+ # Yield the chunk to the user. While the network sends this,
416
+ # the producer is already working on the next chunk in the background.
417
+ yield result
418
+
419
+ # Ensure the producer task is cleaned up.
420
+ await producer_task
421
+
422
+ return StreamingResponse(
423
+ stream_generator(),
424
+ media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
425
+ )
426
  # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
427
 
428
  @app.get("/audio/{filename}")