Spaces:

Rajhuggingface4253
/

neu

Paused

App Files Files Community

Rajhuggingface4253 commited on Oct 19

Commit

6b2b49d

verified ·

1 Parent(s): c63a379

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -30

app.py CHANGED Viewed

@@ -134,12 +134,7 @@ class NeuTTSWrapper:
         audio_buffer.seek(0)
         return audio_buffer.read()
-    def _split_text_into_chunks(self, text: str) -> list[str]:
-        """Simple sentence splitting for streaming (can be enhanced with regex)."""
-        sentences = [s.strip() for s in text.split('.') if s.strip()]
-        if not sentences:
-            sentences = [text.strip()]
-        return sentences
     def generate_speech_blocking(self, text: str, ref_audio_path: str, reference_text: str) -> np.ndarray:
         """Blocking synthesis for standard endpoint."""
@@ -152,32 +147,60 @@ class NeuTTSWrapper:
             audio = self.tts_model.infer(text, ref_s, reference_text)
         return audio
-    def stream_speech_blocking(self, text: str, ref_audio_path: str, reference_text: str, speed: float, audio_format: str) -> Generator[bytes, None, None]:
-        """Sentence-by-Sentence Streaming (Blocking)."""
-        logger.info(f"Starting streaming synthesis for text length: {len(text)}")
-        ref_s = self.tts_model.encode_reference(ref_audio_path)
-        # 3. Split text
-        sentences = self._split_text_into_chunks(text)
-        # 4. Stream chunks
-        for i, sentence in enumerate(sentences):
-            if not sentence.strip():
-                continue
-            logger.debug(f"Generating streaming chunk {i+1}: '{sentence[:30]}...'")
-            # Infer sentence
-            with torch.no_grad():
-                audio_chunk = self.tts_model.infer(sentence, ref_s, reference_text)
-            # Convert and yield
-            yield self._convert_to_streamable_format(audio_chunk, audio_format)
-        logger.info("Streaming synthesis complete.")
 # --- Asynchronous Offloading ---

         audio_buffer.seek(0)
         return audio_buffer.read()
     def generate_speech_blocking(self, text: str, ref_audio_path: str, reference_text: str) -> np.ndarray:
         """Blocking synthesis for standard endpoint."""
             audio = self.tts_model.infer(text, ref_s, reference_text)
         return audio
+    def _split_into_streaming_chunks(self, text: str) -> list[str]:
+        """
+        Splits text into smaller, more manageable chunks for streaming.
+        """
+        sentences = []
+        current_sentence = ""
+        for char in text:
+            current_sentence += char
+            if char in '.!?;:':
+                sentences.append(current_sentence.strip())
+                current_sentence = ""
+        if current_sentence.strip():
+            sentences.append(current_sentence.strip())
+        if not sentences:
+            if ',' in text:
+                sentences = [chunk.strip() for chunk in text.split(',') if chunk.strip()]
+            else:
+                chunk_size = 100
+                sentences = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
+        return [s for s in sentences if s]
+    # --- NEW: Parallel Worker (Now a method of the class) ---
+    def _synthesize_chunk_blocking(self, sentence: str, ref_s: torch.Tensor, ref_text: str) -> np.ndarray:
+        """Worker function to synthesize a single chunk of text. Runs in a thread pool."""
+        with torch.no_grad():
+            # It now correctly calls the model stored in self.tts_model
+            audio_chunk = self.tts_model.infer(sentence, ref_s, ref_text)
+        return audio_chunk
+    # --- NEW: Parallel Streaming Generator (Now a method of the class) ---
+    async def stream_speech_parallel(self, text: str, ref_audio_path: str, ref_text: str, executor: ThreadPoolExecutor):
+        """
+        Performs streaming synthesis using a parallel producer-consumer pattern.
+        """
+        loop = asyncio.get_event_loop()
+        # It now correctly calls the model's encode_reference method
+        ref_s = await loop.run_in_executor(
+            executor, self.tts_model.encode_reference, ref_audio_path
+        )
+        # It now correctly calls its own text splitting method
+        sentences = self._split_into_streaming_chunks(text)
+        tasks = [
+            loop.run_in_executor(
+                # It now correctly calls its own worker method
+                executor, self._synthesize_chunk_blocking, sentence, ref_s, ref_text
+            )
+            for sentence in sentences
+        ]
+        for task in tasks:
+            audio_chunk = await task
+            yield audio_chunk
 # --- Asynchronous Offloading ---