Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 19

Commit

69ddef5

verified ·

1 Parent(s): ab0a603

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -19

app.py CHANGED Viewed

@@ -35,7 +35,7 @@ logger = logging.getLogger("NeuTTS-API")
 # Explicitly use CPU as per Dockerfile and Hugging Face free tier compatibility
 DEVICE = "cpu"
 # Configure Max Workers for concurrent synthesis threads (1-2 is safe for CPU-only)
-MAX_WORKERS = 2
 tts_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
 SAMPLE_RATE = 24000
 CLEANUP_THRESHOLD = 300 # 1 hour in seconds
@@ -94,7 +94,6 @@ class NeuTTSWrapper:
     def __init__(self, device: str = "cpu"):
         self.tts_model = None
         self.device = device
-        self.encoding_cache = {}
         self.load_model()
     def load_model(self):
@@ -352,7 +351,7 @@ async def stream_text_to_speech_cloning(
     reference_audio: UploadFile = File(...)):
     """
     Sentence-by-Sentence Streaming using a high-performance, asyncio-native
-    producer-consumer pipeline. This overlaps CPU-bound AI work with network I/O.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
@@ -361,29 +360,22 @@ async def stream_text_to_speech_cloning(
         loop = asyncio.get_event_loop()
         q = asyncio.Queue(maxsize=2)
-        # The PRODUCER's job is to quickly schedule work, not wait for it.
         async def producer():
             try:
                 converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
                 ref_audio_bytes = converted_wav_buffer.getvalue()
                 audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
-                # Check cache for reference encoding
-                if audio_hash in app.state.tts_wrapper.encoding_cache:
-                    logger.info(f"Streaming Cache HIT for hash: {audio_hash[:10]}...")
-                    ref_s = app.state.tts_wrapper.encoding_cache[audio_hash]
-                else:
-                    logger.info(f"Streaming Cache MISS for hash: {audio_hash[:10]}...")
-                    ref_s = await loop.run_in_executor(
-                        tts_executor,
-                        app.state.tts_wrapper.get_reference_encoding,
-                        ref_audio_bytes
-                    )
-                    app.state.tts_wrapper.encoding_cache[audio_hash] = ref_s
                 sentences = app.state.tts_wrapper._split_text_into_chunks(text)
-                # This function does the heavy lifting for one chunk.
                 def process_chunk(sentence_text):
                     with torch.no_grad():
                         audio_chunk = app.state.tts_wrapper.tts_model.infer(sentence_text, ref_s, reference_text)
@@ -408,7 +400,6 @@ async def stream_text_to_speech_cloning(
             if result is None:
                 break
-            # Check if the item in the queue is a task (future) or an exception
             if isinstance(result, Exception):
                 logger.error(f"Terminating stream due to producer error: {result}")
                 raise result
@@ -423,7 +414,6 @@ async def stream_text_to_speech_cloning(
         stream_generator(),
         media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
     )
-    # Note: The outer 'finally' block is now removed as its logic is handled in 2.5 and 4.
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):

 # Explicitly use CPU as per Dockerfile and Hugging Face free tier compatibility
 DEVICE = "cpu"
 # Configure Max Workers for concurrent synthesis threads (1-2 is safe for CPU-only)
+MAX_WORKERS = 3
 tts_executor = ThreadPoolExecutor(max_workers=MAX_WORKERS)
 SAMPLE_RATE = 24000
 CLEANUP_THRESHOLD = 300 # 1 hour in seconds
     def __init__(self, device: str = "cpu"):
         self.tts_model = None
         self.device = device
         self.load_model()
     def load_model(self):
     reference_audio: UploadFile = File(...)):
     """
     Sentence-by-Sentence Streaming using a high-performance, asyncio-native
+    producer-consumer pipeline.
     """
     if not hasattr(app.state, 'tts_wrapper'):
         raise HTTPException(status_code=503, detail="Service unavailable: Model not loaded")
         loop = asyncio.get_event_loop()
         q = asyncio.Queue(maxsize=2)
         async def producer():
             try:
                 converted_wav_buffer = await convert_to_wav_in_memory(reference_audio)
                 ref_audio_bytes = converted_wav_buffer.getvalue()
                 audio_hash = hashlib.sha256(ref_audio_bytes).hexdigest()
+                # ✅ Use LRU cache like blocking endpoint
+                ref_s = await loop.run_in_executor(
+                    tts_executor,
+                    app.state.tts_wrapper._get_or_create_reference_encoding,
+                    audio_hash,
+                    ref_audio_bytes
+                )
                 sentences = app.state.tts_wrapper._split_text_into_chunks(text)
                 def process_chunk(sentence_text):
                     with torch.no_grad():
                         audio_chunk = app.state.tts_wrapper.tts_model.infer(sentence_text, ref_s, reference_text)
             if result is None:
                 break
             if isinstance(result, Exception):
                 logger.error(f"Terminating stream due to producer error: {result}")
                 raise result
         stream_generator(),
         media_type=f"audio/{'mpeg' if output_format == 'mp3' else output_format}"
     )
 @app.get("/audio/{filename}")
 async def get_audio(filename: str):