daihui.zhang commited on
Commit
02e7bde
·
1 Parent(s): 359ffc6

update and fix bug newline

Browse files
transcribe/pipelines/pipe_vad.py CHANGED
@@ -17,11 +17,11 @@ class VadPipe(BasePipe):
17
  def init(cls):
18
  if cls.model is None:
19
  cls.model = SileroVADProcessor(
20
- activate_threshold=0.4, # 降低以捕获更多音频
21
  fusion_threshold=0.45, # 提高以更好地融合语音片段
22
  min_speech_duration=0.2, # 略微降低以捕获短音节
23
  max_speech_duration=20, # 保持不变
24
- # min_silence_duration=300, # 增加到300毫秒,允许说话间的自然停顿
25
  sample_rate=cls.sample_rate # 采样率,音频信号的采样频率
26
  )
27
  cls.vac = FixedVADIterator(cls.model.silero_vad, sampling_rate=cls.sample_rate,)
 
17
  def init(cls):
18
  if cls.model is None:
19
  cls.model = SileroVADProcessor(
20
+ activate_threshold=0.45, # 降低以捕获更多音频
21
  fusion_threshold=0.45, # 提高以更好地融合语音片段
22
  min_speech_duration=0.2, # 略微降低以捕获短音节
23
  max_speech_duration=20, # 保持不变
24
+ min_silence_duration=300, # 增加到300毫秒,允许说话间的自然停顿
25
  sample_rate=cls.sample_rate # 采样率,音频信号的采样频率
26
  )
27
  cls.vac = FixedVADIterator(cls.model.silero_vad, sampling_rate=cls.sample_rate,)
transcribe/strategy.py CHANGED
@@ -183,7 +183,7 @@ class TranscriptBuffer:
183
  """更新临时缓冲字符串"""
184
  self._buffer = text
185
 
186
- def commit_line(self) -> None:
187
  """将缓冲字符串提交为短句"""
188
  if self._buffer:
189
  self._sentences.append(self._buffer)
@@ -219,6 +219,10 @@ class TranscriptBuffer:
219
  output = self.split_and_join(
220
  text.replace(
221
  self._separator, ""))
 
 
 
 
222
 
223
  return output
224
 
@@ -249,15 +253,18 @@ class TranscriptBuffer:
249
  return output
250
 
251
 
252
- def update_and_commit(self, stable_string: str, remaining_string:str, is_end_sentence=False):
253
  if self.source_language == "en":
254
- stable_string = self.rebuild(stable_string)
255
- remaining_string = self.rebuild(remaining_string)
 
256
 
257
  logger.debug(f"{self.__dict__}")
258
  if is_end_sentence:
259
- self.update_pending_text(stable_string)
260
- self.commit_line()
 
 
261
  current_text_len = len(self.current_not_commit_text.split(self._separator)) if self._separator else len(self.current_not_commit_text)
262
  # current_text_len = len(self.current_not_commit_text.split(self._separator))
263
  self.update_pending_text(remaining_string)
@@ -266,8 +273,9 @@ class TranscriptBuffer:
266
  self._current_seg_id += 1
267
  return True
268
  else:
269
- self.update_pending_text(stable_string)
270
- self.commit_line()
 
271
  self.update_pending_text(remaining_string)
272
  return False
273
 
@@ -301,7 +309,7 @@ class TranscriptStabilityAnalyzer:
301
  logger.debug(f"Current separator: {self._separator}")
302
 
303
  def merge_chunks(self, chunks: List[TranscriptChunk])->str:
304
- return "".join(r.join() for r in chunks)
305
 
306
 
307
 
@@ -365,12 +373,12 @@ class TranscriptStabilityAnalyzer:
365
 
366
 
367
  def _yield_commit_results(self, stable_chunk, remaining_chunks, is_end_sentence: bool) -> Iterator[TranscriptResult]:
368
- stable_str = stable_chunk.join() if hasattr(stable_chunk, "join") else self.merge_chunks(stable_chunk)
369
- remaining_str = self.merge_chunks(remaining_chunks)
370
  frame_cut_index = stable_chunk[-1].get_buffer_index() if isinstance(stable_chunk, list) else stable_chunk.get_buffer_index()
371
 
372
  prev_seg_id = self._transcript_buffer.get_seg_id()
373
- commit_paragraph = self._transcript_buffer.update_and_commit(stable_str, remaining_str, is_end_sentence)
374
  logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
375
 
376
  if commit_paragraph:
 
183
  """更新临时缓冲字符串"""
184
  self._buffer = text
185
 
186
+ def commit_line(self,) -> None:
187
  """将缓冲字符串提交为短句"""
188
  if self._buffer:
189
  self._sentences.append(self._buffer)
 
219
  output = self.split_and_join(
220
  text.replace(
221
  self._separator, ""))
222
+
223
+ logger.debug("==== rebuild string ====")
224
+ logger.debug(text)
225
+ logger.debug(output)
226
 
227
  return output
228
 
 
253
  return output
254
 
255
 
256
+ def update_and_commit(self, stable_strings: List[str], remaining_strings:List[str], is_end_sentence=False):
257
  if self.source_language == "en":
258
+ stable_strings = [self.rebuild(i) for i in stable_strings]
259
+ remaining_strings =[self.rebuild(i) for i in remaining_strings]
260
+ remaining_string = "".join(remaining_strings)
261
 
262
  logger.debug(f"{self.__dict__}")
263
  if is_end_sentence:
264
+ for stable_str in stable_strings:
265
+ self.update_pending_text(stable_str)
266
+ self.commit_line()
267
+
268
  current_text_len = len(self.current_not_commit_text.split(self._separator)) if self._separator else len(self.current_not_commit_text)
269
  # current_text_len = len(self.current_not_commit_text.split(self._separator))
270
  self.update_pending_text(remaining_string)
 
273
  self._current_seg_id += 1
274
  return True
275
  else:
276
+ for stable_str in stable_strings:
277
+ self.update_pending_text(stable_str)
278
+ self.commit_line()
279
  self.update_pending_text(remaining_string)
280
  return False
281
 
 
309
  logger.debug(f"Current separator: {self._separator}")
310
 
311
  def merge_chunks(self, chunks: List[TranscriptChunk])->str:
312
+ return list(r.join() for r in chunks)
313
 
314
 
315
 
 
373
 
374
 
375
  def _yield_commit_results(self, stable_chunk, remaining_chunks, is_end_sentence: bool) -> Iterator[TranscriptResult]:
376
+ stable_str_list = [stable_chunk.join()] if hasattr(stable_chunk, "join") else self.merge_chunks(stable_chunk)
377
+ remaining_str_list = self.merge_chunks(remaining_chunks)
378
  frame_cut_index = stable_chunk[-1].get_buffer_index() if isinstance(stable_chunk, list) else stable_chunk.get_buffer_index()
379
 
380
  prev_seg_id = self._transcript_buffer.get_seg_id()
381
+ commit_paragraph = self._transcript_buffer.update_and_commit(stable_str_list, remaining_str_list, is_end_sentence)
382
  logger.debug(f"current buffer: {self._transcript_buffer.__dict__}")
383
 
384
  if commit_paragraph:
transcribe/whisper_llm_serve.py CHANGED
@@ -122,17 +122,17 @@ class WhisperTranscriptionService(ServeClientBase):
122
  frames = self.frames_np.copy()
123
 
124
  # 音频过短时的处理
125
- if len(frames) <= 100:
126
  # 极短音频段,清空并返回None
127
- self._update_audio_buffer(len(frames))
128
  return None
129
- elif len(frames) < self.sample_rate:
130
  # 不足一秒的音频,补充静音
131
  silence_audio = np.zeros((self.sample_rate + 1000,), dtype=np.float32)
132
  silence_audio[-len(frames):] = frames
133
  return silence_audio.copy()
134
 
135
- return frames.copy()
136
 
137
  def _transcribe_audio(self, audio_buffer: np.ndarray) -> List[TranscriptToken]:
138
  """转录音频并返回转录片段"""
 
122
  frames = self.frames_np.copy()
123
 
124
  # 音频过短时的处理
125
+ if len(frames) <= 10:
126
  # 极短音频段,清空并返回None
127
+ # self._update_audio_buffer(len(frames))
128
  return None
129
+ if len(frames) < self.sample_rate:
130
  # 不足一秒的音频,补充静音
131
  silence_audio = np.zeros((self.sample_rate + 1000,), dtype=np.float32)
132
  silence_audio[-len(frames):] = frames
133
  return silence_audio.copy()
134
 
135
+ return frames
136
 
137
  def _transcribe_audio(self, audio_buffer: np.ndarray) -> List[TranscriptToken]:
138
  """转录音频并返回转录片段"""