Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 19

Commit

e94e39e

verified ·

1 Parent(s): b307da8

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -31

app.py CHANGED Viewed

@@ -350,43 +350,79 @@ async def stream_text_to_speech_cloning(
     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
     reference_audio: UploadFile = File(...)):
     """
-    Sentence-by-Sentence Streaming with in-memory processing and caching.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
-    try:
-        # 1. Convert the uploaded file to WAV directly in memory
-        converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
-        ref_audio_bytes = converted_wav_buffer.getvalue()
-        # 2. The generator now runs in the thread pool, using the audio bytes
-        def stream_generator():
             try:
-                for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
-                    text,
-                    ref_audio_bytes, # Pass bytes, not a path
-                    reference_text,
-                    speed,
-                    output_format
-                ):
-                    yield chunk_bytes
             except Exception as e:
-                logger.error(f"Streaming generator error: {e}")
-                # This ensures the stream terminates on an error
-                raise
-        # Return StreamingResponse with the generator
-        return StreamingResponse(
-            stream_generator(),
-            media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
-        )
-    except Exception as e:
-        logger.error(f"Streaming setup error: {e}")
-        if isinstance(e, HTTPException):
-             raise
-        raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
     # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
 @app.get("/audio/{filename}")

     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
     reference_audio: UploadFile = File(...)):
     """
+    Sentence-by-Sentence Streaming using a high-performance, asyncio-native
+    producer-consumer pipeline. This overlaps CPU-bound AI work with network I/O.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
+    # This async generator is the final, correct implementation.
+    async def stream_generator():
+        loop = asyncio.get_event_loop()
+        q = asyncio.Queue(maxsize=2)
+        # The PRODUCER is now an async task that runs in the background.
+        async def producer():
             try:
+                # The one-time setup cost: convert and encode the reference voice.
+                # This is done before the loop to ensure the voice is ready.
+                converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
+                ref_audio_bytes = converted_wav_buffer.getvalue()
+                audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
+                ref_s = await loop.run_in_executor(
+                    tts_executor,
+                    app.state.tts_wrapper._get_or_create_reference_encoding,
+                    audio_hash,
+                    ref_audio_bytes
+                )
+                sentences = app.state.tts_wrapper._split_text_into_chunks(text)
+                for sentence in sentences:
+                    # Define the blocking work for a single chunk
+                    def process_chunk():
+                        with torch.no_grad():
+                            audio_chunk = app.state.tts_wrapper.tts_model.infer(sentence, ref_s, reference_text)
+                        return app.state.tts_wrapper._convert_to_streamable_format(audio_chunk, output_format)
+                    # Offload the blocking work to the thread pool
+                    mp3_bytes = await loop.run_in_executor(tts_executor, process_chunk)
+                    # Put the finished MP3 chunk into the async queue
+                    await q.put(mp3_bytes)
             except Exception as e:
+                logger.error(f"Error in producer task: {e}")
+                await q.put(e)
+            finally:
+                # Signal that production is finished
+                await q.put(None)
+        # Start the producer as a background task. It starts working immediately.
+        producer_task = asyncio.create_task(producer())
+        # The main loop now acts as the CONSUMER.
+        while True:
+            # Await the next finished MP3 chunk from the queue.
+            result = await q.get()
+            if result is None:
+                break
+            if isinstance(result, Exception):
+                logger.error(f"Terminating stream due to producer error: {result}")
+                raise result
+            # Yield the chunk to the user. While the network sends this,
+            # the producer is already working on the next chunk in the background.
+            yield result
+        # Ensure the producer task is cleaned up.
+        await producer_task
+    return StreamingResponse(
+        stream_generator(),
+        media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
+    )
     # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
 @app.get("/audio/{filename}")