Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 19

Commit

b307da8

verified ·

1 Parent(s): 2c4e22c

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -44

app.py CHANGED Viewed

@@ -21,12 +21,11 @@ from pydantic import BaseModel, Field
 import re
 import hashlib
 from functools import lru_cache
-import queue
 # Ensure the cloned neutts-air repository is in the path
 import sys
 sys.path.append(os.path.join(os.getcwd(), 'neutts-air'))
 from neuttsair.neutts import NeuTTSAir
-from threading import Thread
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("NeuTTS-API")
@@ -36,7 +35,7 @@ logger = logging.getLogger("NeuTTS-API")
 # Explicitly use CPU as per Dockerfile and Hugging Face free tier compatibility
 DEVICE = "cpu"
 # Configure Max Workers for concurrent synthesis threads (1-2 is safe for CPU-only)
-MAX_WORKERS = 1
 tts_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
 SAMPLE_RATE = 24000
 CLEANUP_THRESHOLD = 300 # 1 hour in seconds
@@ -351,56 +350,33 @@ async def stream_text_to_speech_cloning(
     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
     reference_audio: UploadFile = File(...)):
     """
-    Sentence-by-Sentence Streaming using a parallel producer-consumer pipeline
-    to ensure continuous, low-latency audio flow.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     try:
         converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
         ref_audio_bytes = converted_wav_buffer.getvalue()
         def stream_generator():
-            q = queue.Queue(maxsize=2)
-            def producer():
-                try:
-                    audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
-                    ref_s = app.state.tts_wrapper._get_or_create_reference_encoding(audio_hash, ref_audio_bytes)
-                    sentences = app.state.tts_wrapper._split_text_into_chunks(text)
-                    for sentence in sentences:
-                        with torch.no_grad():
-                            audio_chunk = app.state.tts_wrapper.tts_model.infer(sentence, ref_s, reference_text)
-                        q.put(audio_chunk)
-                except Exception as e:
-                    logger.error(f"Error in producer thread: {e}")
-                    q.put(e)
-                finally:
-                    q.put(None)
-            # === THIS IS THE FIX ===
-            # Start the producer in a standard, separate thread.
-            # This avoids the asyncio loop error.
-            producer_thread = Thread(target=producer)
-            producer_thread.start()
-            # =======================
-            while True:
-                result = q.get()
-                if result is None:
-                    break
-                if isinstance(result, Exception):
-                    logger.error(f"Terminating stream due to producer error: {result}")
-                    raise result
-                yield app.state.tts_wrapper._convert_to_streamable_format(result, output_format)
         return StreamingResponse(
             stream_generator(),
             media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
@@ -411,6 +387,7 @@ async def stream_text_to_speech_cloning(
         if isinstance(e, HTTPException):
              raise
         raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):

 import re
 import hashlib
 from functools import lru_cache
 # Ensure the cloned neutts-air repository is in the path
 import sys
 sys.path.append(os.path.join(os.getcwd(), 'neutts-air'))
 from neuttsair.neutts import NeuTTSAir
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("NeuTTS-API")
 # Explicitly use CPU as per Dockerfile and Hugging Face free tier compatibility
 DEVICE = "cpu"
 # Configure Max Workers for concurrent synthesis threads (1-2 is safe for CPU-only)
+MAX_WORKERS = 2
 tts_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
 SAMPLE_RATE = 24000
 CLEANUP_THRESHOLD = 300 # 1 hour in seconds
     output_format: str = Form("mp3", pattern="^(wav|mp3|flac)$"),
     reference_audio: UploadFile = File(...)):
     """
+    Sentence-by-Sentence Streaming with in-memory processing and caching.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
     try:
+        # 1. Convert the uploaded file to WAV directly in memory
         converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
         ref_audio_bytes = converted_wav_buffer.getvalue()
+        # 2. The generator now runs in the thread pool, using the audio bytes
         def stream_generator():
+            try:
+                for chunk_bytes in app.state.tts_wrapper.stream_speech_blocking(
+                    text,
+                    ref_audio_bytes, # Pass bytes, not a path
+                    reference_text,
+                    speed,
+                    output_format
+                ):
+                    yield chunk_bytes
+            except Exception as e:
+                logger.error(f"Streaming generator error: {e}")
+                # This ensures the stream terminates on an error
+                raise
+        # Return StreamingResponse with the generator
         return StreamingResponse(
             stream_generator(),
             media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
         if isinstance(e, HTTPException):
              raise
         raise HTTPException(status_code=500, detail=f"Streaming synthesis failed: {e}")
+    # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):