Rajhuggingface4253 commited on
Commit
6b2b49d
·
verified ·
1 Parent(s): c63a379

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -30
app.py CHANGED
@@ -134,12 +134,7 @@ class NeuTTSWrapper:
134
  audio_buffer.seek(0)
135
  return audio_buffer.read()
136
 
137
- def _split_text_into_chunks(self, text: str) -> list[str]:
138
- """Simple sentence splitting for streaming (can be enhanced with regex)."""
139
- sentences = [s.strip() for s in text.split('.') if s.strip()]
140
- if not sentences:
141
- sentences = [text.strip()]
142
- return sentences
143
 
144
  def generate_speech_blocking(self, text: str, ref_audio_path: str, reference_text: str) -> np.ndarray:
145
  """Blocking synthesis for standard endpoint."""
@@ -152,32 +147,60 @@ class NeuTTSWrapper:
152
  audio = self.tts_model.infer(text, ref_s, reference_text)
153
  return audio
154
 
155
- def stream_speech_blocking(self, text: str, ref_audio_path: str, reference_text: str, speed: float, audio_format: str) -> Generator[bytes, None, None]:
156
- """Sentence-by-Sentence Streaming (Blocking)."""
157
- logger.info(f"Starting streaming synthesis for text length: {len(text)}")
158
-
159
-
160
-
161
- ref_s = self.tts_model.encode_reference(ref_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- # 3. Split text
164
- sentences = self._split_text_into_chunks(text)
165
 
166
- # 4. Stream chunks
167
- for i, sentence in enumerate(sentences):
168
- if not sentence.strip():
169
- continue
170
-
171
- logger.debug(f"Generating streaming chunk {i+1}: '{sentence[:30]}...'")
172
-
173
- # Infer sentence
174
- with torch.no_grad():
175
- audio_chunk = self.tts_model.infer(sentence, ref_s, reference_text)
176
-
177
- # Convert and yield
178
- yield self._convert_to_streamable_format(audio_chunk, audio_format)
179
-
180
- logger.info("Streaming synthesis complete.")
181
 
182
  # --- Asynchronous Offloading ---
183
 
 
134
  audio_buffer.seek(0)
135
  return audio_buffer.read()
136
 
137
+
 
 
 
 
 
138
 
139
  def generate_speech_blocking(self, text: str, ref_audio_path: str, reference_text: str) -> np.ndarray:
140
  """Blocking synthesis for standard endpoint."""
 
147
  audio = self.tts_model.infer(text, ref_s, reference_text)
148
  return audio
149
 
150
+ def _split_into_streaming_chunks(self, text: str) -> list[str]:
151
+ """
152
+ Splits text into smaller, more manageable chunks for streaming.
153
+ """
154
+ sentences = []
155
+ current_sentence = ""
156
+ for char in text:
157
+ current_sentence += char
158
+ if char in '.!?;:':
159
+ sentences.append(current_sentence.strip())
160
+ current_sentence = ""
161
+ if current_sentence.strip():
162
+ sentences.append(current_sentence.strip())
163
+ if not sentences:
164
+ if ',' in text:
165
+ sentences = [chunk.strip() for chunk in text.split(',') if chunk.strip()]
166
+ else:
167
+ chunk_size = 100
168
+ sentences = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
169
+ return [s for s in sentences if s]
170
+
171
+ # --- NEW: Parallel Worker (Now a method of the class) ---
172
+ def _synthesize_chunk_blocking(self, sentence: str, ref_s: torch.Tensor, ref_text: str) -> np.ndarray:
173
+ """Worker function to synthesize a single chunk of text. Runs in a thread pool."""
174
+ with torch.no_grad():
175
+ # It now correctly calls the model stored in self.tts_model
176
+ audio_chunk = self.tts_model.infer(sentence, ref_s, ref_text)
177
+ return audio_chunk
178
+
179
+ # --- NEW: Parallel Streaming Generator (Now a method of the class) ---
180
+ async def stream_speech_parallel(self, text: str, ref_audio_path: str, ref_text: str, executor: ThreadPoolExecutor):
181
+ """
182
+ Performs streaming synthesis using a parallel producer-consumer pattern.
183
+ """
184
+ loop = asyncio.get_event_loop()
185
+ # It now correctly calls the model's encode_reference method
186
+ ref_s = await loop.run_in_executor(
187
+ executor, self.tts_model.encode_reference, ref_audio_path
188
+ )
189
 
190
+ # It now correctly calls its own text splitting method
191
+ sentences = self._split_into_streaming_chunks(text)
192
 
193
+ tasks = [
194
+ loop.run_in_executor(
195
+ # It now correctly calls its own worker method
196
+ executor, self._synthesize_chunk_blocking, sentence, ref_s, ref_text
197
+ )
198
+ for sentence in sentences
199
+ ]
200
+
201
+ for task in tasks:
202
+ audio_chunk = await task
203
+ yield audio_chunk
 
 
 
 
204
 
205
  # --- Asynchronous Offloading ---
206