Rajhuggingface4253 commited on
Commit
faa5294
·
verified ·
1 Parent(s): 78223d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -36
app.py CHANGED
@@ -350,54 +350,69 @@ async def stream_text_to_speech_cloning(
350
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
351
  reference_audio: UploadFile = File(...)):
352
  """
353
- TRUE Real-Time Streaming - Fixed for immediate chunk delivery.
 
354
  """
355
  if not hasattr(app.state, 'tts_wrapper'):
356
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
357
 
358
  async def stream_generator():
359
  loop = asyncio.get_event_loop()
360
-
361
- try:
362
- # 1. Setup: Convert audio and get reference encoding
363
- converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
364
- ref_audio_bytes = converted_wav_buffer.getvalue()
365
- audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
366
-
367
- ref_s = await loop.run_in_executor(
368
- tts_executor,
369
- app.state.tts_wrapper._get_or_create_reference_encoding,
370
- audio_hash,
371
- ref_audio_bytes
372
- )
373
-
374
- sentences = app.state.tts_wrapper._split_text_into_chunks(text)
375
- logger.info(f"🚀 TRUE STREAMING: Processing {len(sentences)} chunks")
376
-
377
- def process_chunk(sentence_text):
378
- with torch.no_grad():
379
- audio_chunk = app.state.tts_wrapper.tts_model.infer(sentence_text, ref_s, reference_text)
380
- return app.state.tts_wrapper._convert_to_streamable_format(audio_chunk, output_format)
381
-
382
- # 2. ✅ TRUE STREAMING: Process and yield chunks ONE BY ONE
383
- for i, sentence in enumerate(sentences):
384
- logger.info(f"🎯 Processing chunk {i+1}/{len(sentences)}: '{sentence[:50]}...'")
385
 
386
- # Process this chunk and yield immediately
387
- chunk_bytes = await loop.run_in_executor(tts_executor, process_chunk, sentence)
388
- logger.info(f"✅ Yielded chunk {i+1}: {len(chunk_bytes)} bytes")
389
- yield chunk_bytes
 
 
 
390
 
391
- logger.info("🎉 Streaming complete - all chunks delivered")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
- except Exception as e:
394
- logger.error(f" Stream error: {e}")
395
- raise
 
 
 
 
 
 
396
 
397
  return StreamingResponse(
398
  stream_generator(),
399
- media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
400
- headers={"Cache-Control": "no-cache"}
401
  )
402
 
403
  @app.get("/audio/{filename}")
 
350
  output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
351
  reference_audio: UploadFile = File(...)):
352
  """
353
+ Sentence-by-Sentence Streaming using a high-performance, asyncio-native
354
+ producer-consumer pipeline.
355
  """
356
  if not hasattr(app.state, 'tts_wrapper'):
357
  raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
358
 
359
  async def stream_generator():
360
  loop = asyncio.get_event_loop()
361
+ q = asyncio.Queue(maxsize=2)
362
+
363
+ async def producer():
364
+ try:
365
+ converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
366
+ ref_audio_bytes = converted_wav_buffer.getvalue()
367
+ audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
+ # Use LRU cache like blocking endpoint
370
+ ref_s = await loop.run_in_executor(
371
+ tts_executor,
372
+ app.state.tts_wrapper._get_or_create_reference_encoding,
373
+ audio_hash,
374
+ ref_audio_bytes
375
+ )
376
 
377
+ sentences = app.state.tts_wrapper._split_text_into_chunks(text)
378
+
379
+ def process_chunk(sentence_text):
380
+ with torch.no_grad():
381
+ audio_chunk = app.state.tts_wrapper.tts_model.infer(sentence_text, ref_s, reference_text)
382
+ return app.state.tts_wrapper._convert_to_streamable_format(audio_chunk, output_format)
383
+
384
+ # Schedule all chunks to be processed in the background.
385
+ for sentence in sentences:
386
+ task = loop.run_in_executor(tts_executor, process_chunk, sentence)
387
+ await q.put(task) # Put the FUTURE, not the result, in the queue.
388
+
389
+ except Exception as e:
390
+ logger.error(f"Error in producer task: {e}")
391
+ await q.put(e)
392
+ finally:
393
+ await q.put(None) # Signal that all tasks have been scheduled.
394
+
395
+ producer_task = asyncio.create_task(producer())
396
+
397
+ # The CONSUMER's job is to wait for each result and yield it.
398
+ while True:
399
+ result = await q.get()
400
+ if result is None:
401
+ break
402
 
403
+ if isinstance(result, Exception):
404
+ logger.error(f"Terminating stream due to producer error: {result}")
405
+ raise result
406
+
407
+ # Await the result of the background task
408
+ chunk_bytes = await result
409
+ yield chunk_bytes
410
+
411
+ await producer_task
412
 
413
  return StreamingResponse(
414
  stream_generator(),
415
+ media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
 
416
  )
417
 
418
  @app.get("/audio/{filename}")