Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 18

Commit

dacdc6d

verified ·

1 Parent(s): c36a042

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -77

app.py CHANGED Viewed

@@ -152,33 +152,60 @@ class NeuTTSWrapper:
             audio = self.tts_model.infer(text, ref_s, reference_text)
         return audio
-    def stream_speech_blocking(self, text: str, ref_audio_path: str, reference_text: str, speed: float, audio_format: str) -> Generator[bytes, None, None]:
-        """Sentence-by-Sentence Streaming (Blocking)."""
-        logger.info(f"Starting streaming synthesis for text length: {len(text)}")
         ref_s = self.tts_model.encode_reference(ref_audio_path)
-        # 3. Split text
         sentences = self._split_text_into_chunks(text)
-        # 4. Stream chunks
         for i, sentence in enumerate(sentences):
             if not sentence.strip():
                 continue
-            logger.debug(f"Generating streaming chunk {i+1}: '{sentence[:30]}...'")
-            # Infer sentence
             with torch.no_grad():
                 audio_chunk = self.tts_model.infer(sentence, ref_s, reference_text)
-            # Convert and yield
-            yield self._convert_to_streamable_format(audio_chunk, audio_format)
-        logger.info("Streaming synthesis complete.")
 # --- Asynchronous Offloading ---
 async def run_blocking_task_async(func, *args, **kwargs):
@@ -368,78 +395,55 @@ async def text_to_speech(
 async def stream_text_to_speech_cloning(
     text: str = Form(..., min_length=1, max_length=5000),
     reference_text: str = Form(...),
-    speed: float = Form(1.0, ge=0.5, le=2.0),
     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
-    reference_audio: UploadFile = File(...)):
     """
-    Sentence-by-Sentence Streaming Endpoint.
-    Fixes race condition by moving cleanup into the streaming generator.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
-    # 1. Asynchronously save reference audio (non-blocking)
     temp_ref_path = await save_upload_file_async(reference_audio)
-    converted_wav_path = None # Initialize for cleanup
-    try:
-        # 2. Convert the uploaded file (WebM, etc.) to a 24kHz WAV file
-        converted_wav_path = await run_blocking_task_async(
-            convert_to_wav_blocking,
-            temp_ref_path
-        )
-        # 2.5. CLEANUP ORIGINAL FILE IMMEDIATELY: It is no longer needed after conversion
-        if os.path.exists(temp_ref_path):
-            os.unlink(temp_ref_path)
-        # 3. Define the generator function, which will run in the thread pool
-        def stream_generator(path_to_delete: str):
-            try:
-                # This logic uses the path_to_delete parameter, which is guaranteed to exist
-                for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
-                    text,
-                    path_to_delete, # Pass the CONVERTED WAV path
-                    reference_text,
-                    speed,
-                    output_format
-                ):
-                    yield chunk_bytes
-            except Exception as e:
-                # Log the error and raise it to stop the stream
-                logger.error(f"Streaming generator error: {e}")
-                raise # Re-raise to ensure the stream terminates
-            finally:
-                # 4. **CRUCIAL FIX:** Clean up the converted file ONLY AFTER GENERATION IS DONE
-                if os.path.exists(path_to_delete):
-                    os.unlink(path_to_delete)
-                    logger.info(f"Cleaned up converted file: {path_to_delete}")
-        # Return StreamingResponse, passing the path to the generator
-        return StreamingResponse(
-            stream_generator(converted_wav_path),
-            media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
-            headers={
-                "Content-Disposition": "attachment; filename=tts_live_stream.mp3",
-                "Transfer-Encoding": "chunked",
-                "Cache-Control": "no-cache",
-                "X-Accel-Buffering": "no"
-            }
-        )
-    except Exception as e:
-        logger.error(f"Streaming setup error: {e}")
-        # Clean up files only if the setup failed *before* starting the generator
-        if os.path.exists(temp_ref_path):
-            os.unlink(temp_ref_path)
-        if converted_wav_path and os.path.exists(converted_wav_path):
-            os.unlink(converted_wav_path)
-        # Reraise HTTPExceptions that may have come from the conversion step
-        if isinstance(e, HTTPException):
-             raise
-        raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
-    # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):

             audio = self.tts_model.infer(text, ref_s, reference_text)
         return audio
+    def stream_producer(self, queue: asyncio.Queue, text: str, ref_audio_path: str, reference_text: str):
+    """
+    [PRODUCER] Runs in a thread, generates audio chunks, and puts them into a queue.
+    """
+    try:
+        logger.info("Starting audio production thread...")
         ref_s = self.tts_model.encode_reference(ref_audio_path)
         sentences = self._split_text_into_chunks(text)
         for i, sentence in enumerate(sentences):
             if not sentence.strip():
                 continue
+            # RESTORED: The per-chunk debug log
+            logger.debug(f"Producing chunk {i+1}/{len(sentences)}: '{sentence[:30]}...'")
             with torch.no_grad():
                 audio_chunk = self.tts_model.infer(sentence, ref_s, reference_text)
+            # Put the raw audio chunk into the queue for the consumer
+            queue.put_nowait(audio_chunk)
+    except Exception as e:
+        logger.error(f"Error in producer thread: {e}")
+        queue.put_nowait(e)
+    finally:
+        # Signal that production is finished by putting a sentinel value (None)
+        queue.put_nowait(None)
+async def stream_consumer(queue: asyncio.Queue, output_format: str):
+    """
+    [CONSUMER] Asynchronously gets items from the queue and yields them to the client.
+    """
+    logger.info("Starting audio consumption...")
+    while True:
+        # Wait for an item to appear in the queue
+        item = await queue.get()
+        if isinstance(item, Exception):
+            logger.error(f"Consumer received an error from the producer: {item}")
+            break
+        if item is None:
+            # Sentinel value received, meaning the stream is finished
+            logger.info("Consumer received end-of-stream signal.")
+            break
+        # We have a valid audio chunk, convert it to the desired format
+        audio_bytes = await run_blocking_task_async(
+            app.state.tts_wrapper._convert_to_streamable_format,
+            item, # The NumPy array from the queue
+            output_format
+        )
+        yield audio_bytes
 # --- Asynchronous Offloading ---
 async def run_blocking_task_async(func, *args, **kwargs):
 async def stream_text_to_speech_cloning(
     text: str = Form(..., min_length=1, max_length=5000),
     reference_text: str = Form(...),
     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
+    reference_audio: UploadFile = File(...)
+):
     """
+    TRUE streaming endpoint using the definitive producer-consumer pattern.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     temp_ref_path = await save_upload_file_async(reference_audio)
+    async def cleanup_and_run_stream():
+        """A nested async generator to handle the entire producer-consumer lifecycle and cleanup."""
+        converted_wav_path = None
+        queue = asyncio.Queue()
+        loop = asyncio.get_event_loop()
+        try:
+            # Convert the uploaded file to the required WAV format
+            converted_wav_path = await run_blocking_task_async(convert_to_wav_blocking, temp_ref_path)
+            # Start the producer (the model) in a background thread.
+            # It will start putting audio chunks into the queue.
+            loop.run_in_executor(
+                tts_executor,
+                app.state.tts_wrapper.stream_producer,
+                queue, text, converted_wav_path, reference_text
+            )
+            # Start the consumer, which gets chunks from the queue and yields them to the client.
+            async for chunk in stream_consumer(queue, output_format):
+                yield chunk
+        finally:
+            # This block guarantees cleanup after the stream is finished or fails
+            if os.path.exists(temp_ref_path):
+                os.unlink(temp_ref_path)
+            if converted_wav_path and os.path.exists(converted_wav_path):
+                os.unlink(converted_wav_path)
+            logger.info("Cleaned up temporary stream files.")
+    return StreamingResponse(
+        cleanup_and_run_stream(),
+        media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}",
+        headers={
+            "Content-Disposition": "attachment; filename=tts_live_stream.mp3",
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no" # Header to prevent proxy buffering
+        }
+    )
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):